In [None]:
import os
import sys
import time
import torch
import torchvision
import numpy as np
from glob import glob
from tqdm import tqdm

In [None]:
from dataloader.handhygiene import HandHygiene
from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter

from spatial_transforms import Compose
from spatial_transforms import Normalize
from spatial_transforms import Scale
from spatial_transforms import CenterCrop
from spatial_transforms import RandomHorizontalFlip
from spatial_transforms import RandomAffine
from spatial_transforms import RandomRotation
from spatial_transforms import ColorJitter
from spatial_transforms import ToTensor #ExtractSkinColor
from temporal_transforms import TemporalRandomChoice
from temporal_transforms import TemporalRandomCrop
from temporal_transforms import LoopPadding, MirrorPadding, MirrorLoopPadding
from openpose_transforms import MultiScaleTorsoRandomCrop

In [None]:
VIDEO_DIR='./data/images' #/data/videos
CLIP_LEN = 16
CLIP_LEN_AUG = CLIP_LEN/2
BATCH_SIZE = 16

IMG_SIZE = 224
MEAN=[110.63666788, 103.16065604, 96.29023126]
STD=[38.7568578, 37.88248729, 40.02898126]
MEAN=[128, 128, 128]
STD=[128, 128, 128]

scales=np.linspace(1, 1.75, num=1e3)
center=((1+1.75)/2)
openpose_transform = {
    'train':MultiScaleTorsoRandomCrop(scales, IMG_SIZE),
    'val':MultiScaleTorsoRandomCrop(np.linspace(center, center, num=1), IMG_SIZE, centercrop=True)
}

spatial_transform = {
    'train': Compose([Scale(IMG_SIZE),
                      CenterCrop(IMG_SIZE),
                      #RandomHorizontalFlip(),
                      #ColorJitter(brightness=0.1),
                      #RandomAffine(5),
                      #RandomRotation(2.5),
                      ToTensor(1), 
                      Normalize(MEAN, STD)]),
    'val': Compose([Scale(IMG_SIZE), 
                    CenterCrop(IMG_SIZE), 
                    ToTensor(1), 
                    Normalize(MEAN, STD)])}

temporal_transform = Compose([
            TemporalRandomCrop(CLIP_LEN_AUG),
            TemporalRandomChoice([
                LoopPadding(CLIP_LEN),
                MirrorPadding(CLIP_LEN),
                MirrorLoopPadding(CLIP_LEN)])])

In [None]:
dataset_test = HandHygiene(os.path.join(VIDEO_DIR, 'test'), 
                        frames_per_clip=CLIP_LEN,
                        openpose_transform=openpose_transform['val'],
                        spatial_transform=spatial_transform['val'])

In [None]:
dataset = {
    'train': I3DDataset(os.path.join(VIDEO_DIR, 'train'), 
                          frames_per_clip=CLIP_LEN,
                          #temporal_transform=temporal_transform,
                          spatial_transform=spatial_transform['train']),
    'val': I3DDataset(os.path.join(VIDEO_DIR, 'val'), 
                        frames_per_clip=CLIP_LEN,
                        spatial_transform=spatial_transform['val']),
    'test': I3DDataset(os.path.join(VIDEO_DIR, 'test'), 
                        frames_per_clip=CLIP_LEN,
                        spatial_transform=spatial_transform['val'])
}
dataloaders = {
    'train': DataLoader(dataset['train'], batch_size=BATCH_SIZE, shuffle=True, num_workers=16),
    'val': DataLoader(dataset['val'], batch_size=BATCH_SIZE, shuffle=False, num_workers=16)}

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
def show_dataset(item): # item: C x D x 244 x 244
    rgbs = item[0].transpose(0, 1)
    flows = item[1].transpose(0, 1)
    n = len(rgbs)
    shape = np.asarray(flows[0]).shape
    rgb = np.hstack((np.asarray(rgb).transpose(1, 2, 0)+1)/2 for rgb in rgbs)
    tmp = np.zeros((shape[1], shape[2], 1))
    flow = np.hstack((np.dstack((np.asarray(flow).transpose(1, 2, 0), tmp))+1)/2 for flow in flows)

    img = np.vstack((rgb, flow))
    plt.figure(figsize=(50, 10))
    plt.imshow(img)
    plt.axis('off')
    plt.show()
    print(item[1])
    
i=200
phase='val'
show_dataset(dataset_test.__getitem__(i))

In [None]:
model_name = 'i3d'
batch_size = 16
clip_len = 16
sample_duration = clip_len
num_classes = 1

#torch.manual_seed(100)
data_name = 'anesthesia'
dataset_path = os.path.join(os.getcwd(), 'data')

In [None]:
sample_size = 224
#mean=[110.63666788, 103.16065604, 96.29023126]
#std=[38.7568578, 37.88248729, 40.02898126]
mean=[128, 128, 128]
std=[128, 128, 128]

scales=np.linspace(1, 1.75, num=1e3)
center=((1+1.75)/2)
openpose_transform = {
    'train':MultiScaleTorsoRandomCrop(scales, sample_size),
    'val':MultiScaleTorsoRandomCrop(np.linspace(center, center, num=1), sample_size, centercrop=True)
}
spatial_transform = {
    'train': Compose([Scale(sample_size),
                      CenterCrop(sample_size),
                      RandomHorizontalFlip(),
                      ColorJitter(brightness=0.1),
                      RandomAffine(5),
                      RandomRotation(2.5),
                      ToTensor(1), 
                      Normalize(mean, std)]),
    'val': Compose([Scale(sample_size), 
                    CenterCrop(sample_size), 
                    ToTensor(1), 
                    Normalize(mean, std)])}
temporal_transform = {
    'train': TemporalRandomChoice([
            TemporalBeginCrop(sample_duration),
            MirrorPadding(sample_duration)]),
    'val':TemporalBeginCrop(sample_duration)}

In [None]:
dataset = {
    'train':VideoFolder(dataset_path, split='train', clip_len=clip_len, 
                        spatial_transform=spatial_transform['train'],
                        temporal_transform=temporal_transform['train']),
    'val':VideoFolder(dataset_path, split='val', clip_len=clip_len, 
                        spatial_transform=spatial_transform['val'],
                        temporal_transform=temporal_transform['val']),
    'test':VideoFolder(dataset_path, split='test', clip_len=clip_len, 
                        spatial_transform=spatial_transform['val'],
                        temporal_transform=temporal_transform['val'])}

dataloaders = {
    'train': DataLoader(dataset['train'], batch_size=batch_size, shuffle=True, num_workers=16),
    'val': DataLoader(dataset['val'], batch_size=1, shuffle=False, num_workers=16)}

# TEST DATA SPLITTING

In [None]:
from sklearn.model_selection import train_test_split
# for path in sorted(glob('./data/videos/simulate/clean/*.mp4'))[-17:]:
#     !mv $path /data/private/minjee-video/handhygiene/data/videos/simulate/test/clean
for mod in ['images']:
    ext = '.mp4' if mod == 'videos' else ''
    for path in sorted(glob('./data/{}/simulate/notclean/*{}'.format(mod, ext)))[-10:]:
        path = path.replace('videos', mod)
        dst = '/data/private/minjee-video/handhygiene/data/{}/simulate/test/notclean'.format(mod)

In [None]:
for mod in ['images']:
    ext = '.mp4' if mod == 'videos' else ''
    for label in ['notclean']:
        VIDEOS = sorted(glob('./data/{}/simulate/{}/*{}'.format(mod, label, ext)))
        #VIDEO_TRAIN, VIDEO_VAL = train_test_split(VIDEOS, test_size=0.2, random_state=42)
        VIDEO_DATA = {'train': VIDEO_TRAIN, 'val': VIDEO_VAL}
        for phase in ['train', 'val']:
            paths = VIDEO_DATA[phase]
            for path in paths:
                path = path.replace('videos', mod)
                dst = './data/{}/simulate/{}/{}/'.format(mod, phase, label)
                !mv $path $dst

In [None]:
VIDEO_VAL = [os.path.splitext(path.replace('videos','images'))[0] for path in VIDEO_VAL]