# A motion-seg dataset, for Edward's [MotionSeg](https://gitlab.com/edward_chen/joint_segmentation_motion_estimation) code.
Stough 7/20

In [1]:
%matplotlib widget
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import echonet
import os
from argparse import Namespace
import pandas as pd
from scipy.signal import find_peaks
from torch.utils.data import Dataset
from torch.nn.functional import interpolate
from queue import SimpleQueue as squeue

In [2]:
args = Namespace(modelname="deeplabv3_resnet50",
                 pretrained=False,
                 clip_length=10,
                 device="cuda",
                 output=None,
                 num_workers=6,
                 image_size=[256,256],
                 norm=True,
                 all_clips=True)

mean, std = echonet.utils.get_mean_and_std(echonet.datasets.Echo(split="train"))

output = os.path.join("output", "segmentation", "{}_{}".format(args.modelname, "pretrained" if args.pretrained else "random"))

100%|██████████| 16/16 [00:01<00:00, 15.16it/s]


&nbsp;

## We need to define some useful functions.

In [3]:
# New EDESpairs: Start at every systolic, step backwards to the 
# nearest diastolic, and call that a clip. Then for the next one 
# make sure we don't pass the previous systole while stepping back.

def EDESpairs(diastole, systole):
    dframes = np.sort(np.array(diastole))
    sframes = np.sort(np.array(systole))
    clips = []
    
    inds = np.searchsorted(diastole, systole, side='left')
    for i, sf in enumerate(systole):
        if inds[i] == 0: # no prior diastolic frames for this sf
            continue
        best_df = diastole[inds[i]-1] # diastole frame nearest this sf.
        if len(clips) == 0 or best_df != clips[-1][0]:
            clips.append((best_df, sf))
            
    return clips



# # Given iterable lists of frame numbers. The first pair 
# # is the first diastole and the first larger systole. 
# # EDESpairs({38, 73, 96}, {19, 53, 87}) 
# #        => [(38, 53), (73, 87)]
# def EDESpairs(diastole, systole):
#     ret = []
#     dq = squeue()
#     sq = squeue()
#     [dq.put(x) for x in sorted(diastole)];
#     [sq.put(x) for x in sorted(systole)];
    
#     while not dq.empty():
#         dframe = dq.get()
#         while not sq.empty():
#             t = sq.get()
#             if t > dframe:
#                 ret.append((dframe, t))
#                 break
#     return ret

In [4]:
class CamusizeVideo(object):
    def __init__(self, im_size=[256,256], clip_length=args.clip_length, norm=True):
        self.im_size = im_size
        self.clip_length = clip_length
        self.norm = norm
        
    def _norm(self, video):
        # expecting f x h x w 
        # make 0-1, but in tensors:
        # https://discuss.pytorch.org/t/how-to-efficiently-normalize-a-batch-of-tensor-to-0-1/65122/4
        # print(f'_norm sees videos shape {videos.shape}\n')
        AA = video.clone()
        AA = AA.view(self.clip_length, -1) # each frame is vectorized (256x256 -> 65536)
        AA -= AA.min(1, keepdim=True)[0] # subtract frame min from each frame
        AA /= AA.max(1, keepdim=True)[0] # divide by frame max for each frame
        AA = 2.0*AA.view(video.shape)-1.0 # reconstitute the frames (65536 -> 256x256)
        return AA
    
    def _rgb2gray(self, video):
        # Takes the 5D batch/c/f/h/w and collapses c to size 1 by combining the r,g,b components.
        # 0.2989 * R + 0.5870 * G + 0.1140 * B 
        return torch.mul(video, torch.tensor([.2989, .5870, .1140])[:, None, None, None]).sum(0, keepdim=True)
        
    '''
    Object call: Should take video batches and convert to
    CAMUS-acceptable images (in [0-1], and 256x256 single channel):
    video is c(3) x f x h x w. 
    '''
    def __call__(self, video):
        out_video = self._rgb2gray(torch.tensor(video)) # -> 1 x f x h x w
        out_video = interpolate(out_video.unsqueeze(0), size=[self.clip_length] + self.im_size, 
                                 mode='trilinear', align_corners=False) # 1 x clip_length x im_size
        
        out_video = self._norm(out_video.squeeze()) # -> norm each frame to [0,1]
        
        return out_video.unsqueeze(0) # -> 1 x clip_length x im_size

In [5]:
class StanfordMotionDataset(Dataset):
    def __init__(self, 
                 modelname = args.modelname,
                 pretrained = args.pretrained,
                 image_size=args.image_size, 
                 clip_length=args.clip_length, 
                 norm=args.norm,
                 all_clips=args.all_clips,
                 **kwargs
                 ):
        
        mean, std = echonet.utils.get_mean_and_std(echonet.datasets.Echo(split="train"))
        self.output = os.path.join("output", "segmentation", "{}_{}".format(modelname, 
                                                                            "pretrained" if pretrained else "random"))
        self.all_clips = all_clips
        self.image_size = image_size
        self.clip_length = clip_length
        self.norm = norm
        # Need filename for saving, and human-selected frames to annotate
        self.stanford = echonet.datasets.Echo(split="test",
                                              target_type=["Filename", "EF", "EDV", "ESV", \
                                                           "LargeIndex", "SmallIndex", \
                                                           "LargeFrame", "SmallFrame", \
                                                           "LargeTrace", "SmallTrace"],  
                                              mean=mean, std=std,  # Normalization
                                              length=None, max_length=None, period=1  # Take all frames
                                             )
        self.camusizer = CamusizeVideo(im_size=self.image_size, 
                                       clip_length=self.clip_length, 
                                       norm=self.norm
                                      )
        self.sizes = pd.read_csv(os.path.join(self.output, "size.csv"))
        self.ids = pd.unique(self.sizes.Filename)
        
    
    def __len__(self):
        return len(self.ids)
        
    '''
    Next try at finding video clips. 
    '''
    def __getitem__(self, idx):
        # Get the appropriate info from the Stanford dataset
        video, (filename, ef, edv, esv, l_index, s_index, l_frame, s_frame, l_trace, s_trace) = self.stanford[idx]
        
        # Now get the clip points for this video using the already recorded sizes.
        idx_sizes = self.sizes.loc[self.sizes.Filename == self.ids[idx]]

        size = idx_sizes.Size.values
        _05cut, _85cut, _95cut = np.percentile(size, [5, 85, 95]) 


        trim_min = _05cut
        trim_max = _95cut
        trim_range = trim_max - trim_min
        systole = find_peaks(-size, distance=20, prominence=(0.50 * trim_range))[0]
        diastole = find_peaks(size, distance=20, prominence=(0.50 * trim_range))[0]

        # keep only real diastoles..
        diastole = [x for x in diastole if size[x] >= _85cut]
        # Add first frame
        if np.mean(size[:3]) >= _85cut:
            diastole = [0] + diastole
        diastole = np.array(diastole)

        clip_pairs = EDESpairs(diastole, systole)
        
        #return clip_pairs
        
#         assert len(clip_pairs) > 0, f'StanfordMotionDataset clips issue: ' \
#                                     f'Video {self.ids[idx]} had diastole {diastole} and systole {systole}.\n'
        
        videoclips = torch.tensor([])
        for dframe, sframe in clip_pairs:
            videoclips = torch.cat((videoclips,
                                   self.camusizer(video[:,dframe:sframe,...])), 0)
            if not self.all_clips:
                return videoclips, clip_pairs, video, filename, large_index, small_index, ef, edv, esv
        return videoclips, clip_pairs, video, filename, ef, edv, esv, l_index, s_index, l_frame, s_frame, l_trace, s_trace
        



#     def __getitem__(self, idx):
        
#         # Get the appropriate info from the Stanford dataset
#         video, (filename, ef, edv, esv, l_index, s_index, l_frame, s_frame, l_trace, s_trace) = self.stanford[idx]
        
#         # Now get the clip points for this video using the already recorded sizes.
#         idx_sizes = self.sizes.loc[self.sizes.Filename == self.ids[idx]]
        
#         size = idx_sizes.Size.values
#         trim_min = sorted(size)[round(len(size) ** 0.05)]
#         trim_max = sorted(size)[round(len(size) ** 0.95)]
#         trim_range = trim_max - trim_min
#         systole = set(find_peaks(-size, distance=20, prominence=(0.50 * trim_range))[0])
#         diastole = set(find_peaks(size, distance=20, prominence=(0.50 * trim_range))[0])
        
#         clip_pairs = EDESpairs(diastole, systole)
        
#         assert len(clip_pairs) > 0, f'StanfordMotionDataset clips issue: ' \
#                                     f'Video {self.ids[idx]} had diastole {diastole} and systole {systole}.\n'
        
#         videoclips = torch.tensor([])
#         for dframe, sframe in clip_pairs:
#             videoclips = torch.cat((videoclips,
#                                    self.camusizer(video[:,dframe:sframe,...])), 0)
#             if not self.all_clips:
#                 return videoclips, clip_pairs, video, filename, large_index, small_index, ef, edv, esv
#         return videoclips, clip_pairs, video, filename, ef, edv, esv, l_index, s_index, l_frame, s_frame, l_trace, s_trace


&nbsp;

## Testing our Stanford ED/ES clip dataset...

In [6]:
motionSet = StanfordMotionDataset(**vars(args))

100%|██████████| 16/16 [00:01<00:00, 14.37it/s]


In [7]:
videoclips, clip_pairs, video, filename, ef, edv, esv, \
l_index, s_index, l_frame, s_frame, l_trace, s_trace = motionSet[112]

In [8]:
for i in range(len(motionSet)):
    videoclips, clip_pairs, video, filename, ef, edv, esv, \
        l_index, s_index, l_frame, s_frame, l_trace, s_trace = motionSet[i]
    if len(clip_pairs) == 0:
        print(f'{i}: {filename}')

95: 0X1ACB73BE8C1F2C0C.avi
390: 0X350E5D4955052AFA.avi


&nbsp;

## Some looking into problem cases
that maybe don't have any systoles etc.

```
0X5F40FC2C2367EA92.avi (one systole, no diastole)
0X47DBEA2F11240016.avi (one systole, no diastole)
0X350E5D4955052AFA.avi (no systole & no diastole)
For the four videos below, each video has one systole frame and diastole frame, but systole frame comes before the diastole frame. Thus, the one clip of the video is from ES-ED.
0X1ACB73BE8C1F2C0C.avi
0X7DA74EAC9DFC2D5B.avi
0X7F058A3503090EC8.avi
0XEC340BEA3298AE3.avi


Edward pointed to these as cases that we do particularly poorly on. Often
the answers don't look incredibly bad, but still the numbers are way off. 
The first one we get an awful answer, but also the clip itself doesn't look 
ED-ES
812  298*  685  1050  987  916*  511  772*  254*  1053  222*  180*  211*  440  819*
```

In [17]:
videoclips, clip_pairs, video, filename, ef, edv, esv, \
l_index, s_index, l_frame, s_frame, l_trace, s_trace = motionSet[812]

In [18]:
filename

'0X5FE2CC293BF88CCD.avi'

In [19]:
# Now get the clip points for this video using the already recorded sizes.
idx_sizes = motionSet.sizes.loc[motionSet.sizes.Filename == motionSet.ids[812]]

size = idx_sizes.Size.values
_05cut, _85cut, _95cut = np.percentile(size, [5, 85, 95]) 


trim_min = _05cut
trim_max = _95cut
trim_range = trim_max - trim_min
systole = find_peaks(-size, distance=20, prominence=(0.50 * trim_range))[0]
diastole = find_peaks(size, distance=20, prominence=(0.50 * trim_range))[0]

# keep only real diastoles..
diastole = [x for x in diastole if size[x] >= _85cut]
# Add first frame
if np.mean(size[:3]) >= _85cut:
    diastole = [0] + diastole
diastole = np.array(diastole)

clip_pairs = EDESpairs(diastole, systole)

In [20]:
clip_pairs

[(0, 24),
 (58, 78),
 (117, 140),
 (174, 197),
 (234, 256),
 (290, 314),
 (346, 359),
 (379, 389),
 (407, 417),
 (433, 447),
 (466, 475)]

In [21]:
l_index # Diastolic

71

In [22]:
len(motionSet)

1276

In [23]:
plt.figure(figsize=(12,3))
plt.scatter(np.arange(len(size)), size, alpha=.6)
plt.scatter(diastole, size[diastole], s=30, c='k')
plt.scatter(systole, size[systole], s=30, c='k')
clip_eds = [x for x,y in clip_pairs]
clip_ess = [y for x,y in clip_pairs]
plt.scatter(clip_eds, size[clip_eds], s=80, c='y')
plt.scatter(clip_ess, size[clip_ess], s=80, c='r')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

<matplotlib.collections.PathCollection at 0x7f8a1c70fad0>

In [None]:
plt.figure(figsize=(6,3))
plt.hist(size, bins=40);

In [None]:
video.shape

In [None]:
%%capture
vid = echonet.utils.makeVideo(video[0,...], cmap='gray')

In [None]:
vid

In [None]:
l_index, s_index

In [None]:
plt.figure()
lf = l_frame - l_frame.min()
lf /= lf.max()
sf = s_frame - s_frame.min()
sf /= sf.max()
plt.imshow(sf.transpose(1,2,0))

In [None]:
f, ax = plt.subplots(1,2, sharex=True, sharey=True)
ax[0].imshow(lf[0], cmap='gray')
ax[0].imshow(l_trace==1, alpha=.3)
ax[0].set_title('large frame')
ax[1].imshow(sf[0], cmap='gray')
ax[1].imshow(s_trace==1, alpha=.3)
ax[1].set_title('small frame')

In [None]:
video.shape

In [None]:
plt.figure()
plt.imshow(video[0, 60,...])