In [1]:
from __future__ import print_function, division
from collections import OrderedDict
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from torch import nn, optim, as_tensor
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import sys

from PIL import Image
import os
import pickle
from torch.utils.data import Dataset
from torchvision import transforms as T
from torchvision.models.resnet import resnet18, resnet34, resnet50
import torch.backends.cudnn as cudnn

import matplotlib.pyplot as plt
%matplotlib inline

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Matplotlib created a temporary config/cache directory at /tmp/pbs.2740561.pbsha.ib.sockeye/matplotlib-yfflptp4 because the default path (/home/jovyan/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [2]:
num_gpu = torch.cuda.device_count()
print('num GPUs: {}'.format(num_gpu))

num GPUs: 2


In [3]:
kin_data = pd.read_csv('dataset_path/inputs.csv', index_col = 0)
output_data  = pd.read_csv('dataset_path/outputs.csv', index_col = 0)
frame_data = np.load('dataset_path/video_frames.npy') 
seg_data = pd.read_csv('dataset_path/Segmentation.csv', index_col = 0)

In [4]:
Phase_dict = {output_data['Phase'].unique()[i]:i for i in range(len(output_data['Phase'].unique()))}
Step_dict = {output_data['Step'].unique()[i]:i for i in range(len(output_data['Step'].unique()))}
Verb_Left_dict = {output_data['Verb_Left'].unique()[i]:i for i in range(len(output_data['Verb_Left'].unique()))}
Verb_Right_dict = Verb_Left_dict

print('Output Mapping Dictionary: \n')
print('Phase Dict:', Phase_dict)
print('Step Dict:', Step_dict)
print('Verb Left Dict:', Verb_Left_dict)
print('Verb Right Dict:', Verb_Right_dict)


output_data['Phase'] = output_data['Phase'].map(Phase_dict)
output_data['Step'] = output_data['Step'].map(Step_dict)
output_data['Verb_Left'] = output_data['Verb_Left'].map(Verb_Left_dict)
output_data['Verb_Right'] = output_data['Verb_Right'].map(Verb_Right_dict)

Output Mapping Dictionary: 

Phase Dict: {'Idle': 0, 'Transfer Left to Right': 1, 'Transfer Right to Left': 2}
Step Dict: {'Idle': 0, 'Block 1 L2R': 1, 'Block 2 L2R': 2, 'Block 3 L2R': 3, 'Block 4 L2R': 4, 'Block 5 L2R': 5, 'Block 6 L2R': 6, 'Block 1 R2L': 7, 'Block 2 R2L': 8, 'Block 3 R2L': 9, 'Block 4 R2L': 10, 'Block 5 R2L': 11, 'Block 6 R2L': 12}
Verb Left Dict: {'Idle': 0, 'Catch': 1, 'Extract': 2, 'Hold': 3, 'Drop': 4, 'Touch': 5, 'Insert': 6}
Verb Right Dict: {'Idle': 0, 'Catch': 1, 'Extract': 2, 'Hold': 3, 'Drop': 4, 'Touch': 5, 'Insert': 6}


In [5]:
##### Input Features #####
kin_data.head()

Unnamed: 0,px_left,py_left,pz_left,q1_left,q2_left,q3_left,q4_left,ape_angle_left,lin_velo_x_left,lin_velo_y_left,...,q2_right,q3_right,q4_right,ape_angle_right,lin_velo_x_right,lin_velo_y_right,lin_velo_z_right,ang_velo_x_right,ang_velo_y_right,ang_velo_z_right
0,-1.49995,1.0001,2.68221e-07,-0.353608,-0.35359,-0.146429,0.853547,15.0,-4e-06,-4e-06,...,0.353626,0.146404,0.85356,15.0,-2e-06,-4e-06,-3e-06,0.00065,-0.000821,0.000337
1,-1.49995,1.0001,3.57628e-07,-0.353608,-0.353591,-0.146429,0.853547,15.0,-4e-06,-4e-06,...,0.353627,0.146404,0.85356,15.0,-2e-06,-4e-06,-3e-06,0.00065,-0.000821,0.000337
2,-1.49995,1.0001,1.78814e-07,-0.353608,-0.353591,-0.146429,0.853547,15.0,-3e-06,-4e-06,...,0.353627,0.146404,0.85356,15.0,-2e-06,-4e-06,-3e-06,0.000649,-0.000821,0.000338
3,-1.49995,1.0001,1.78814e-07,-0.353608,-0.353591,-0.146429,0.853547,15.0,-3e-06,-4e-06,...,0.353627,0.146404,0.85356,15.0,-2e-06,-4e-06,-3e-06,0.000648,-0.000823,0.00034
4,-1.49995,1.0001,5.96046e-08,-0.353608,-0.353591,-0.146429,0.853547,15.0,-3e-06,-4e-06,...,0.353627,0.146404,0.85356,15.0,-2e-06,-4e-06,-1e-06,0.000681,-0.000816,0.000321


In [6]:
##### Output Features #####
output_data.head()

Unnamed: 0,Phase,Step,Verb_Left,Verb_Right
0,0,0,0,0
1,0,0,0,0
2,0,0,0,0
3,0,0,0,0
4,0,0,0,0


In [7]:
##### frames #####
print('Total No. of Video Frames: ',len(frame_data))

Total No. of Video Frames:  378522


In [8]:
##### Segmentation map #####
seg_data.head()

Unnamed: 0,file_path,folder_name
0,/arc/project/st-anaray02-1/skumar40/petraw_dat...,1
1,/arc/project/st-anaray02-1/skumar40/petraw_dat...,1
2,/arc/project/st-anaray02-1/skumar40/petraw_dat...,1
3,/arc/project/st-anaray02-1/skumar40/petraw_dat...,1
4,/arc/project/st-anaray02-1/skumar40/petraw_dat...,1


In [9]:
class GeneralVideoDataset(Dataset):
    """Dataset Class for Loading Video"""

    def __init__(
        self,
        seg=None,
        frame=None,
        kin=None,
        output=None,
        time_depth=None,
        train= False,
    ):
        """
        Args:
            seg (data_frame/numpy array):  DF during test time /numpy array containing all
                                            imagenet encoded frames (2048) during training
            frame (data_frame/numpy array): DF during test time /numpy array containing all
                                            imagenet encoded frames (2048) during training
            kin (data_frame): pandas DF containing Kinematic data
            output (data_frame): pandas DF containing Output data
            train (bool): False if using for Test data/ True for Train data
        """
        self.kin_df = kin
        self.output_df = output
        self.train = train
        self.time_depth = time_depth
        
        ### For test dataset ###
        if not self.train:
            self.frame_df = frame
            self.seg_df = seg
            self.transform = T.Compose([T.Resize((224, 224)),
                                        T.ToTensor()])
            assert len(self.seg_df) == len(self.frame_df) == len(self.kin_df) == len(self.output_df)
        ### For train dataset ###
        else:
            self.seg_np = seg
            self.frame_np = frame
            assert len(self.seg_np) == len(self.frame_np) == len(self.kin_df) == len(self.output_df)

    def __len__(self):
        return len(self.kin_df)

    def read_image(self, df, idx):
        # Load the image file
        image = Image.open(df.iloc[idx]['file_path'])
        x = self.transform(image)

        return x

    def __getitem__(self, idx):
        
        if not self.train:
            video_frame = self.read_image(self.frame_df, idx)
#             seg_map = self.read_image(self.seg_df, idx)
        else:
            video_frame = torch.from_numpy(self.frame_np[idx]).float()
#             seg_map = torch.from_numpy(self.seg_np[idx]).float()

        kinematic = torch.from_numpy(self.kin_df.iloc[idx].to_numpy()).float().unsqueeze_(0)
        output = torch.from_numpy(self.output_df.iloc[idx].to_numpy())

        sample = {
            "video": video_frame,
#             "segmenation": seg_map,
            "Kinematic": kinematic,
            "output": output,
        }

        return sample

In [10]:
# class NormalizeLayer(torch.nn.Module):
#     """Standardize the channels of a batch of images by subtracting the dataset mean
#       and dividing by the dataset standard deviation.

#       In order to certify radii in original coordinates rather than standardized coordinates, we
#       add the Gaussian noise _before_ standardizing, which is why we have standardization be the first
#       layer of the classifier rather than as a part of preprocessing as is typical.
#       """

#     def __init__(self, means, sds):
#         """
#         :param means: the channel means
#         :param sds: the channel standard deviations
#         """
#         super(NormalizeLayer, self).__init__()
#         self.means = torch.tensor(means).cuda()
#         self.sds = torch.tensor(sds).cuda()

#     def forward(self, input: torch.tensor):
#         (batch_size, num_channels, height, width) = input.shape
#         means = self.means.repeat((batch_size, height, width, 1)).permute(0, 3, 1, 2)
#         sds = self.sds.repeat((batch_size, height, width, 1)).permute(0, 3, 1, 2)
#         return (input - means)/sds

# class Identity(nn.Module):
#     def __init__(self):
#         super(Identity, self).__init__()
        
#     def forward(self, x):
#         return x
    
    
# norm_layer = NormalizeLayer(means=[0.485, 0.456, 0.406],
#                             sds=[0.229, 0.224, 0.225])

# resnet50_imagenet = resnet50(pretrained=False).cuda()
# resnet50_img_weight_path = 'pretrained_model/checkpoint.pth.tar'
# checkpoint = torch.load(resnet50_img_weight_path)
# new_state_dict = OrderedDict()

# for k, v in checkpoint['state_dict'].items():
#     if k[:1]!=str(0):
#         name = k[9:] # remove `module.`
#         new_state_dict[name] = v

# resnet50_imagenet.load_state_dict(new_state_dict)
# resnet50_imagenet.fc = Identity()
# resnet50_imagenet = torch.nn.Sequential(norm_layer, resnet50_imagenet)

# for param50 in resnet50_imagenet.parameters():
#     param50.requires_grad = False
    
# print('resnet 50 loaded and layers freezed')

In [11]:
class petraw_model(nn.Module):
    def __init__(self, imagenet_extractor=None, input_size=28, hidden_layer_size=64, hidden_size=32, output_size=None, train_p=True):
        super(petraw_model, self).__init__()
        self.train_p = train_p
        if not self.train_p:
            self.imagenet_extractor = imagenet_extractor
        
        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size)

        self.linear = nn.Linear(hidden_layer_size, hidden_size)
        self.relu = nn.ReLU()
        
        ### Video Layers
        self.fc_ex1 = nn.Linear(2048, 512)
        self.fc_ex2 = nn.Linear(512, 256)
        self.fc_ex3 = nn.Linear(256, 64)
        

        ## Video Frame-> Imagenet_extractor-> 2048 -> Fc_ex1 -> Fc_ex2 -> Fc_ex3 -> Ourt 1
        
        ### output-layers
        self.fc_F = nn.Linear(hidden_size + 64, output_size)

#         self.fc3 = nn.Linear(hidden_size + 64, 13)

#         self.fc4 = nn.Linear(hidden_size + 64, 7)

#         self.fc5 = nn.Linear(hidden_size + 64, 7)


    def forward(self, input_seq, input_image):
        
        lstm_out, self.hidden_cell = self.lstm(input_seq)
        lin_out = self.linear(lstm_out.view(len(input_seq), -1))
        lin_out = self.relu(lin_out)
        
        if not self.train_p:
            x = self.imagenet_extractor(input_image)
        else:
            x = input_image
        x = self.fc_ex1(x)
        x = self.relu(x)
        x = self.fc_ex2(x)
        x = self.relu(x)
        x = self.fc_ex3(x)
        x = self.relu(x)
        
        y_f = self.fc_F(torch.cat((lin_out, x), dim=1))

        return y_f


In [12]:
kin_train, kin_test, frame_train, frame_test, seg_train, seg_test, output_train, output_test = train_test_split(kin_data, frame_data, seg_data, output_data, test_size=0.3, shuffle=False)

In [13]:
bs = 64
train_data = GeneralVideoDataset(kin=kin_train, 
                                 frame=frame_train, 
                                 seg=seg_train, 
                                 output=output_train,
                                 train=True)
train_loader = DataLoader(train_data, bs, shuffle=False, num_workers=1, pin_memory=True)

### test acts as validation ###
test_data = GeneralVideoDataset(kin=kin_test, 
                                frame=frame_test, 
                                seg=seg_test, 
                                output=output_test,
                                train=True)
test_loader = DataLoader(test_data, bs, shuffle=False, num_workers=1, pin_memory=True)

In [14]:
def calculate_loss(out, y, criterion):
    l1 = torch.tensor(0.0).cuda()
    for (a,b) in zip(out, y.T):
        l1 += criterion(a, b)
    return l1

def calculate_correct(out, y):
    correct = []
    for (a,b) in zip(out, y.T):
        pred = nn.Softmax(dim=1)(a).max(dim=1)[1]
        correct.append(torch.eq(pred, b).sum().item())
    return correct



def train(epochs, train_loader, test_loader, model_list, criterion, print_acc_epoch, name):
    for i in range(epoch):
        
        for m in range(len(model_list)):
            model_list[m][0] = model_list[m][0].train()
            
        epoch_loss_train = [0, 0, 0, 0]
        epoch_loss_test = [0, 0, 0, 0]

        total_train = 0
        correct_train = [0, 0, 0, 0]
        total_test = 0
        correct_test = [0, 0, 0, 0]

        for batch_idx, data_dict in enumerate(train_loader):
            sys.stdout.write("\rBatch: {}".format(batch_idx+1))
            sys.stdout.flush()
            
            X1, X2, y = data_dict['Kinematic'].cuda(), data_dict['video'].cuda(), data_dict['output'].cuda()
            
            output_list = []
            for m in range(len(model_list)):
                model, optimizer = model_list[m]
                out = model(X1, X2)
                loss = criterion(out, y.T[m])
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                epoch_loss_train[m] += loss.item()
                output_list.append(out.detach())
                model_list[m] = [model, optimizer]

            if (i+1)%print_acc_epoch==0:
                total_train += len(y)
                new_corr = calculate_correct(output_list, y)
                correct_train = [correct_train[i] + new_corr[i] for i in range(len(correct_train))]

        epoch_loss_train = [round(epoch_loss_train[k]/(batch_idx+1), 2) for k in range(len(epoch_loss_train))]
    
        model_save_dir = 'trained_models/'
        if not os.path.isdir(model_save_dir):
            os.makedirs(model_save_dir)
            
        model_save_path = model_save_dir + name + '.pth.tar'
        torch.save(model_list, model_save_path)

        if (i+1)%print_acc_epoch!=0:
            sys.stdout.write(' || Epoch: {}, Loss: {}\n'.format(i+1, epoch_loss_train))

        if (i+1)%print_acc_epoch==0:
            with torch.no_grad():
                
                for batch_idx, data_dict in enumerate(test_loader):

                    output_list = []
                    for m in range(len(model_list)):
                        model, optimizer = model_list[m]
                        model.eval()
                        out = model(X1, X2)
                        loss = criterion(out, y.T[m])

                        epoch_loss_test[m] += loss.item()
                        output_list.append(out.detach())

                    total_test += len(y)
                    new_corr = calculate_correct(output_list, y)
                    correct_test = [correct_test[i] + new_corr[i] for i in range(len(correct_test))]

            epoch_loss_test = [round(epoch_loss_test[k]/(batch_idx+1), 2) for k in range(len(epoch_loss_test))]
            test_acc = [round((correct_test[i]/total_test), 2) * 100 for i in range(len(correct_test))]
            train_acc = [round((correct_train[i]/total_train), 2) * 100 for i in range(len(correct_train))]

            sys.stdout.write('\nEpoch: {}\n'.format(i+1))
            sys.stdout.write('Train tasks ACC: {}\n'.format(train_acc))
            sys.stdout.write('Test tasks ACC: {}\n'.format(test_acc))
            sys.stdout.write('Train Loss: {}\n'.format(epoch_loss_train))
            sys.stdout.write('Test Loss: {}\n'.format(epoch_loss_test))




In [15]:
models_opt_list = []
output_size_list = [3,13,7,7]
models_lr_list = [0.01, 0.001, 0.01, 0.01]

for i in range(4):
    kbm = petraw_model(output_size = output_size_list[i]).cuda()
    kbm = nn.DataParallel(kbm)
    cudnn.benchmark = True

    optimizer = optim.Adam(params = kbm.parameters(),
                           lr = models_lr_list[i])
    
    models_opt_list.append([kbm, optimizer])
    

In [16]:
epoch = 100
criterion = torch.nn.CrossEntropyLoss(reduction='mean')


train(epoch, train_loader, test_loader, models_opt_list, criterion, 5, 'multi_model_training')

Batch: 4141 || Epoch: 1, Loss: [2.33, 2.32, 0.63, 0.88]
Batch: 4141 || Epoch: 2, Loss: [0.5, 2.01, 0.42, 0.38]
Batch: 4141 || Epoch: 3, Loss: [0.42, 1.76, 0.39, 0.35]
Batch: 4141 || Epoch: 4, Loss: [0.38, 1.5, 0.36, 0.31]
Batch: 4141
Epoch: 5
Train tasks ACC: [87.0, 50.0, 89.0, 91.0]
Test tasks ACC: [60.0, 100.0, 100.0, 100.0]
Train Loss: [0.36, 1.31, 0.34, 0.29]
Test Loss: [0.65, 0.57, 0.0, 0.15]
Batch: 4141 || Epoch: 6, Loss: [0.35, 1.19, 0.34, 0.28]
Batch: 4141 || Epoch: 7, Loss: [0.33, 1.11, 0.31, 0.27]
Batch: 4141 || Epoch: 8, Loss: [0.31, 1.03, 0.3, 0.26]
Batch: 4141 || Epoch: 9, Loss: [0.3, 0.98, 0.29, 0.25]
Batch: 4141
Epoch: 10
Train tasks ACC: [90.0, 65.0, 91.0, 92.0]
Test tasks ACC: [100.0, 100.0, 100.0, 100.0]
Train Loss: [0.29, 0.94, 0.28, 0.25]
Test Loss: [0.18, 0.16, 0.0, 0.2]
Batch: 4141 || Epoch: 11, Loss: [0.28, 0.89, 0.28, 0.24]
Batch: 4141 || Epoch: 12, Loss: [0.27, 0.86, 0.28, 0.24]
Batch: 4141 || Epoch: 13, Loss: [0.27, 0.83, 0.26, 0.23]
Batch: 4141 || Epoch: 14, 

Exception in thread Thread-752518:
Traceback (most recent call last):
  File "/arc/project/st-anaray02-1/skumar40/jupyter/me_jpy/lib/python3.6/threading.py", line 916, in _bootstrap_inner
    self.run()
  File "/arc/project/st-anaray02-1/skumar40/jupyter/me_jpy/lib/python3.6/threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "/arc/project/st-anaray02-1/skumar40/jupyter/me_jpy/lib/python3.6/site-packages/torch/utils/data/_utils/pin_memory.py", line 25, in _pin_memory_loop
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
  File "/arc/project/st-anaray02-1/skumar40/jupyter/me_jpy/lib/python3.6/multiprocessing/queues.py", line 113, in get
    return _ForkingPickler.loads(res)
  File "/arc/project/st-anaray02-1/skumar40/jupyter/me_jpy/lib/python3.6/site-packages/torch/multiprocessing/reductions.py", line 282, in rebuild_storage_fd
    fd = df.detach()
  File "/arc/project/st-anaray02-1/skumar40/jupyter/me_jpy/lib/python3.6/multiprocessing/resource_sha

KeyboardInterrupt: 