# A motion-seg dataset, for Edward's [MotionSeg](https://gitlab.com/edward_chen/joint_segmentation_motion_estimation) code.
Stough 7/20

In [1]:
%matplotlib widget
import matplotlib.pyplot as plt
import numpy as np
import torch
import torchvision
import echonet
import os
from argparse import Namespace
import pandas as pd
from scipy.signal import find_peaks
from torch.utils.data import Dataset
from torch.nn.functional import interpolate
from queue import SimpleQueue as squeue

In [12]:
args = Namespace(modelname="deeplabv3_resnet50",
                 pretrained=False,
                 clip_length=10,
                 device="cuda",
                 output=None,
                 num_workers=6,
                 image_size=[256,256],
                 norm=True,
                 all_clips=True)

mean, std = echonet.utils.get_mean_and_std(echonet.datasets.Echo(split="train"))

output = os.path.join("output", "segmentation", "{}_{}".format(args.modelname, "pretrained" if args.pretrained else "random"))

100%|██████████| 16/16 [00:01<00:00, 14.15it/s]


&nbsp;

## We need to define some useful functions.

In [3]:
# Given iterable lists of frame numbers. The first pair 
# is the first diastole and the first larger systole. 
# EDESpairs({38, 73, 96}, {19, 53, 87}) 
#        => [(38, 53), (73, 87)]
def EDESpairs(diastole, systole):
    ret = []
    dq = squeue()
    sq = squeue()
    [dq.put(x) for x in sorted(diastole)];
    [sq.put(x) for x in sorted(systole)];
    
    while not dq.empty():
        dframe = dq.get()
        while not sq.empty():
            t = sq.get()
            if t > dframe:
                ret.append((dframe, t))
                break
    return ret

In [4]:
class CamusizeVideo(object):
    def __init__(self, im_size=[256,256], clip_length=args.clip_length, norm=True):
        self.im_size = im_size
        self.clip_length = clip_length
        self.norm = norm
        
    def _norm(self, video):
        # expecting f x h x w 
        # make 0-1, but in tensors:
        # https://discuss.pytorch.org/t/how-to-efficiently-normalize-a-batch-of-tensor-to-0-1/65122/4
        # print(f'_norm sees videos shape {videos.shape}\n')
        AA = video.clone()
        AA = AA.view(self.clip_length, -1) # each frame is vectorized (256x256 -> 65536)
        AA -= AA.min(1, keepdim=True)[0] # subtract frame min from each frame
        AA /= AA.max(1, keepdim=True)[0] # divide by frame max for each frame
        AA = AA.view(video.shape) # reconstitute the frames (65536 -> 256x256)
        return AA
    
    def _rgb2gray(self, video):
        # Takes the 5D batch/c/f/h/w and collapses c to size 1 by combining the r,g,b components.
        # 0.2989 * R + 0.5870 * G + 0.1140 * B 
        return torch.mul(video, torch.tensor([.2989, .5870, .1140])[:, None, None, None]).sum(0, keepdim=True)
        
    '''
    Object call: Should take video batches and convert to
    CAMUS-acceptable images (in [0-1], and 256x256 single channel):
    video is c(3) x f x h x w. 
    '''
    def __call__(self, video):
        out_video = self._rgb2gray(torch.tensor(video)) # -> 1 x f x h x w
        out_video = interpolate(out_video.unsqueeze(0), size=[self.clip_length] + self.im_size, 
                                 mode='trilinear', align_corners=False) # 1 x clip_length x im_size
        
        out_video = self._norm(out_video.squeeze()) # -> norm each frame to [0,1]
        
        return out_video.unsqueeze(0) # -> 1 x clip_length x im_size

In [13]:
class StanfordMotionDataset(Dataset):
    def __init__(self, 
                 modelname = args.modelname,
                 pretrained = args.pretrained,
                 image_size=args.image_size, 
                 clip_length=args.clip_length, 
                 norm=args.norm,
                 all_clips=args.all_clips,
                 **kwargs
                 ):
        
        mean, std = echonet.utils.get_mean_and_std(echonet.datasets.Echo(split="train"))
        self.output = os.path.join("output", "segmentation", "{}_{}".format(modelname, 
                                                                            "pretrained" if pretrained else "random"))
        self.all_clips = all_clips
        self.image_size = image_size
        self.clip_length = clip_length
        self.norm = norm
        # Need filename for saving, and human-selected frames to annotate
        self.stanford = echonet.datasets.Echo(split="test",
                                              target_type=["Filename", "LargeIndex", "SmallIndex", "EF", "EDV", "ESV"],  
                                              mean=mean, std=std,  # Normalization
                                              length=None, max_length=None, period=1  # Take all frames
                                             )
        self.camusizer = CamusizeVideo(im_size=self.image_size, 
                                       clip_length=self.clip_length, 
                                       norm=self.norm
                                      )
        self.sizes = pd.read_csv(os.path.join(self.output, "size.csv"))
        self.ids = pd.unique(self.sizes.Filename)
        
        
    def __getitem__(self, idx):
        
        # Get the appropriate info from the Stanford dataset
        video, (filename, large_index, small_index, ef, edv, esv) = self.stanford[idx]
        
        # Now get the clip points for this video using the already recorded sizes.
        idx_sizes = self.sizes.loc[self.sizes.Filename == self.ids[idx]]
        
        size = idx_sizes.Size.values
        trim_min = sorted(size)[round(len(size) ** 0.05)]
        trim_max = sorted(size)[round(len(size) ** 0.95)]
        trim_range = trim_max - trim_min
        systole = set(find_peaks(-size, distance=20, prominence=(0.50 * trim_range))[0])
        diastole = set(find_peaks(size, distance=20, prominence=(0.50 * trim_range))[0])
        
        clip_pairs = EDESpairs(diastole, systole)
        
        assert len(clip_pairs) > 0, f'StanfordMotionDataset clips issue: ' \
                                    f'Video {self.ids[idx]} had diastole {diastole} and systole {systole}.\n'
        
        videoclips = torch.tensor([])
        for dframe, sframe in clip_pairs:
            videoclips = torch.cat((videoclips,
                                   self.camusizer(video[:,dframe:sframe,...])), 0)
            if not self.all_clips:
                return videoclips, clip_pairs, video, filename, large_index, small_index, ef, edv, esv
        return videoclips, clip_pairs, video, filename, large_index, small_index, ef, edv, esv

&nbsp;

## Testing our Stanford ED/ES clip dataset...

In [14]:
motionSet = StanfordMotionDataset(**vars(args))

100%|██████████| 16/16 [00:01<00:00, 13.75it/s]


In [23]:
videoclips, clip_pairs, video, filename, large_index, small_index, ef, edv, esv = motionSet[112]

In [24]:
videoclips.shape

torch.Size([2, 10, 256, 256])

In [25]:
clip_pairs

[(38, 53), (73, 87)]

In [26]:
videoclips.shape

torch.Size([2, 10, 256, 256])

In [27]:
ef

50.308453

In [32]:
type(videoclips), videoclips.device

(torch.Tensor, device(type='cpu'))

In [30]:
# Simple way to put the channels dim back into the tensor.
videoclips.unsqueeze(2).shape

torch.Size([2, 10, 1, 256, 256])

In [31]:
videoclips.min(), videoclips.max()

(tensor(0.), tensor(1.))

In [35]:
%%capture
vid = echonet.utils.makeVideo(videoclips[1,...], cmap='gray')

In [36]:
vid