In [1]:
import os
import sys
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR, MultiStepLR
import torchvision
import numpy as np
from glob import glob
from tqdm.autonotebook import tqdm



In [2]:
from dataloader.videodataset import VideoDataset
from dataloader.handhygiene import HandHygiene
from torch.utils.data import DataLoader
from tensorboardX import SummaryWriter
from spatial_transforms import Compose
from spatial_transforms import Normalize
from spatial_transforms import Scale
from spatial_transforms import CenterCrop
from spatial_transforms import RandomHorizontalFlip
from spatial_transforms import RandomAffine
from spatial_transforms import RandomRotation
from spatial_transforms import ColorJitter
from spatial_transforms import ToTensor #ExtractSkinColor
from temporal_transforms import TemporalRandomChoice
from temporal_transforms import TemporalRandomCrop
from temporal_transforms import LoopPadding, MirrorPadding, MirrorLoopPadding
from openpose_transforms import MultiScaleTorsoRandomCrop

In [3]:
VIDEO_DIR='./data/images' #/data/videos
CLIP_LEN = 16
CLIP_LEN_AUG = CLIP_LEN/2
BATCH_SIZE = 16
NUM_CLASSES=1
NUM_WORKERS=16

IMG_SIZE = 224
MEAN=[110.63666788, 103.16065604, 96.29023126]
STD=[38.7568578, 37.88248729, 40.02898126]
#MEAN=[128, 128, 128]
#STD=[128, 128, 128]

scales=np.linspace(1, 1.75, num=1e3)
center=((1+1.75)/2)
openpose_transform = {
    'train':MultiScaleTorsoRandomCrop(scales, IMG_SIZE),
    'val':MultiScaleTorsoRandomCrop(np.linspace(center, center, num=1), IMG_SIZE, centercrop=True)
}

spatial_transform = {
    'train': Compose([Scale(IMG_SIZE),
                      CenterCrop(IMG_SIZE),
                      RandomHorizontalFlip(),
                      ColorJitter(brightness=0.1),
                      RandomAffine(5),
                      RandomRotation(2.5),
                      ToTensor(1), 
                      Normalize(MEAN, STD)]),
    'val': Compose([Scale(IMG_SIZE), 
                    CenterCrop(IMG_SIZE), 
                    ToTensor(1), 
                    Normalize(MEAN, STD)])}

temporal_transform = {'train':Compose([
                #TemporalRandomCrop(CLIP_LEN_AUG),
                TemporalRandomChoice([
                    LoopPadding(CLIP_LEN),
                    MirrorPadding(CLIP_LEN),
                    MirrorLoopPadding(CLIP_LEN)])]),
                     'val':LoopPadding(CLIP_LEN)}

In [4]:
dataset = {
    'train': HandHygiene(os.path.join(VIDEO_DIR, 'train'), 
                          frames_per_clip=CLIP_LEN,
                          temporal_transform=temporal_transform['train'],
                          openpose_transform=openpose_transform['train'],
                          spatial_transform=spatial_transform['train']),
    'val': HandHygiene(os.path.join(VIDEO_DIR, 'val'), 
                        frames_per_clip=CLIP_LEN,
                          temporal_transform=temporal_transform['val'],
                          openpose_transform=openpose_transform['val'],
                        spatial_transform=spatial_transform['val']),
    'test': HandHygiene(os.path.join(VIDEO_DIR, 'test'), 
                        frames_per_clip=CLIP_LEN,
                          temporal_transform=temporal_transform['val'],
                          openpose_transform=openpose_transform['val'],
                        spatial_transform=spatial_transform['val'])
}


100%|██████████| 10/10 [00:00<00:00, 54.36it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

Number of ./data/images/train video clips: 2025


100%|██████████| 10/10 [00:00<00:00, 59.79it/s]
100%|██████████| 2/2 [00:00<00:00, 10.89it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

Number of ./data/images/val video clips: 190


100%|██████████| 2/2 [00:00<00:00, 11.76it/s]
100%|██████████| 1/1 [00:00<00:00,  5.29it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

Number of ./data/images/test video clips: 291


100%|██████████| 1/1 [00:00<00:00,  7.06it/s]


In [5]:
dataloaders = {
     'train': DataLoader(dataset['train'], batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS),
    'val': DataLoader(dataset['val'], batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
}

In [None]:
loader = dataloaders['train']
for epoch in range(10):
    for idx, data in enumerate(tqdm(loader)):
        len(loader.dataset[idx])

In [None]:
loader = dataloaders['train']
#loader = dataloaders['val']
for epoch in range(1):
    for idx, data in enumerate(tqdm(loader)):
        print('Epoch {}, idx {}'.format(epoch, idx))

In [None]:
from dataloader.io.video import read_video
video_path='./data/images/val/clean/38_20190119_frames003359'
_,_,info = read_video(video_path, 0, None, True)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
def show_dataset(item): # item: C x D x 244 x 244
    rgbs = item[0].transpose(0, 1)
    flows = item[1].transpose(0, 1)
    n = len(rgbs)
    shape = np.asarray(flows[0]).shape
    rgb = np.hstack((np.asarray(rgb).transpose(1, 2, 0)+1)/2 for rgb in rgbs)
    tmp = np.zeros((shape[1], shape[2], 1))
    flow = np.hstack((np.dstack((np.asarray(flow).transpose(1, 2, 0), tmp))+1)/2 for flow in flows)

    img = np.vstack((rgb, flow))
    plt.figure(figsize=(50, 10))
    plt.imshow(img)
    plt.axis('off')
    plt.show()
    print(item[2])
    
i=0
phase='train'
show_dataset(dataset[phase][i])

In [None]:
phase='train'
flows_clean_mean=[]
flows_clean_std=[]
flows_notclean_mean=[]
flows_notclean_std=[]
for i in tqdm(range(dataset[phase].__len__())):
    flow = dataset[phase][i][1]
    vidx, _ = dataset[phase].video_clips.get_clip_location(i)
    label = dataset[phase].samples[vidx][2]
    if label == 0:
        flows_clean_mean.append(torch.mean(flow))
        flows_clean_std.append(torch.std(flow))
    else:
        flows_notclean_mean.append(torch.mean(flow))
        flows_notclean_std.append(torch.std(flow))

# Training

In [None]:
from train import get_models
from train import train
from torchsummary import summary

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
i3d_rgb, i3d_flow = get_models(NUM_CLASSES, True, 170, load_pt_weights=True) # unfreeze last mix 170, 152
if torch.cuda.device_count() > 1:
    print("Let's use", torch.cuda.device_count(), "GPUs!")
    i3d_rgb = nn.DataParallel(i3d_rgb)
    i3d_flow = nn.DataParallel(i3d_flow)
i3d_rgb.to(device)
i3d_flow.to(device)

criterion = F.binary_cross_entropy
optims={'rgb':None, 'flow':None}
schedulers = {'rgb':None, 'flow':None}
feature_extract=True

def trainable_params(model, mode='rgb'):
    params_to_update = model.parameters()
    print("Params to learn:")
    if feature_extract:
        params_to_update = []
        for name,param in model.named_parameters():
            if param.requires_grad == True:
                params_to_update.append(param)
                print("\t",name)
    else:
        for name,param in model.named_parameters():
            if param.requires_grad == True:
                print("\t",name)
    optims[mode] = optim.SGD(model.parameters(), lr=1e-6, momentum=0.9, weight_decay=1e-7)

trainable_params(i3d_rgb, 'rgb')
trainable_params(i3d_flow, 'flow')
    
schedulers['rgb'] = MultiStepLR(optims['rgb'], milestones=[10], gamma=0.1)
schedulers['flow'] = MultiStepLR(optims['flow'], milestones=[10], gamma=0.1)

In [None]:
train((i3d_rgb, i3d_flow), dataloaders, optims, criterion, schedulers, device, num_epochs=200)

# TEST DATA SPLITTING

In [None]:
from sklearn.model_selection import train_test_split
# for path in sorted(glob('./data/videos/simulate/clean/*.mp4'))[-17:]:
#     !mv $path /data/private/minjee-video/handhygiene/data/videos/simulate/test/clean
for mod in ['images']:
    ext = '.mp4' if mod == 'videos' else ''
    for path in sorted(glob('./data/{}/simulate/notclean/*{}'.format(mod, ext)))[-10:]:
        path = path.replace('videos', mod)
        dst = '/data/private/minjee-video/handhygiene/data/{}/simulate/test/notclean'.format(mod)

In [None]:
for mod in ['images']:
    ext = '.mp4' if mod == 'videos' else ''
    for label in ['notclean']:
        VIDEOS = sorted(glob('./data/{}/simulate/{}/*{}'.format(mod, label, ext)))
        #VIDEO_TRAIN, VIDEO_VAL = train_test_split(VIDEOS, test_size=0.2, random_state=42)
        VIDEO_DATA = {'train': VIDEO_TRAIN, 'val': VIDEO_VAL}
        for phase in ['train', 'val']:
            paths = VIDEO_DATA[phase]
            for path in paths:
                path = path.replace('videos', mod)
                dst = './data/{}/simulate/{}/{}/'.format(mod, phase, label)
                !mv $path $dst

In [None]:
VIDEO_VAL = [os.path.splitext(path.replace('videos','images'))[0] for path in VIDEO_VAL]

In [None]:
import time

N_epoch = 10

loader = dataloaders['train']

time_index_all = {}
time_loader_all = {}

for i in range (2002):
    time_index_all[i] = 0
for i in range (N_epoch):
    time_loader_all[i] = 0


for epoch in range(10):
    a = []
    time_start_loader = time.time()
    for idx, data in enumerate(tqdm(loader)):
        if idx != 0:
            time_end = time.time()
            time_idx = time_end-time_start
            print (idx, ': %.4f' % (time_idx))
            time_index_all[idx-0] += time_idx
        time_start = time.time()
        
        a.append(time)
        
    time_end_loader = time.time()
    time_loader = time_end_loader - time_start_loader
    time_loader_all[epoch] = time_loader
