# **Clone the PyTorch I3D model**
https://github.com/piergiaj/pytorch-i3d.git

In [1]:
# ! git clone https://github.com/piergiaj/pytorch-i3d.git

In [2]:
! pwd

/home/pjsimmon/ECE285_Final_Dataset/drive-pytorch-i3d


In [3]:
new_kinect_ir_train_list = "kinect_ir_processed_list_rgb_split0train.txt"

# **Test Functionality of PyTorch I3D model**

### Starting with charades_dataset.py:


In [4]:
import torch
import torch.utils.data as data_utl
from torch.utils.data.dataloader import default_collate

import numpy as np
import json
import csv
import h5py
import random
import os
import os.path

import cv2

In [5]:
# Testing getting a tensor from one of our .npy files:
def video_to_tensor(pic):
    """Convert a ``numpy.ndarray`` to tensor.
    Converts a numpy.ndarray (T x H x W x C)
    to a torch.FloatTensor of shape (C x T x H x W)
    
    Args:
         pic (numpy.ndarray): Video to be converted to tensor.
    Returns:
         Tensor: Converted video.
    """
    return torch.from_numpy(pic.transpose([3,0,1,2]))
    #return torch.from_numpy(pic.transpose([0, 4, 1, 2, 3]))



## Updated with new path to kinect_ir .npy files 

In [6]:
# Old path to .npy files
root = "../../ece285_sp20_team01/kinect_ir/output/"
path_to_npy_files = root + "video_npy/"
path_to_train_list = root + new_kinect_ir_train_list
path_to_test_npy = "../../ece285_sp20_team01/kinect_ir/output/video_npy/run1_2018-05-03-14-08-31.kinect_ir.mp4_000500_000557.npy"

print(os.path.exists(path_to_npy_files))
print(os.path.exists(path_to_train_list))
print(os.path.exists(path_to_test_npy))


False
False
False


In [7]:
test_npy = np.load(path_to_test_npy)
print(test_npy.shape)

PermissionError: [Errno 13] Permission denied: '../../ece285_sp20_team01/kinect_ir/output/video_npy/run1_2018-05-03-14-08-31.kinect_ir.mp4_000500_000557.npy'

## Adapt make_dataset and charades_dataset to our dataset, Drive&Act:

In [None]:
# Adaptation of make_dataset to use out train_list rather than a .json file: 

# Replace charades_dataset.py with this: 
def make_our_dataset(train_list):
    dataset = []
    with open(train_list, 'r') as f: 
        Lines = f.readlines()
        for line in Lines: 
            line = line.strip()
            if (len(line) > 0):
                contents = line.split(" ")
                
                vid_path = contents[0]
                activity = contents[1]
                
                # get number of frames from name of vid_path
                # (using new ir list): 

                start_str = ".mp4_"
                end_str = " "

                num_frames = (vid_path.split(start_str))[1].split(end_str)[0]
                start_frame = int(num_frames.split("_")[0])
                end_frame = int(num_frames.split("_")[1])                

                num_frames = end_frame - start_frame
                
                
                if int(num_frames) < 64: 
                    continue
                    
                dataset.append((vid_path, activity, num_frames))

    return dataset

In [None]:
train_data = make_our_dataset(path_to_train_list)
print(len(train_data))

for row in train_data[0:5]:
    print(row)

## Helper function to balance the training class samples:

In [None]:
def sample_data(train_data):

    len_data = len(train_data)
    num_labels = 39

    sample_labels = np.zeros(num_labels)

    for row in train_data: 
        label = row[1]
        sample_labels[int(label)] += 1

    return sample_labels

In [None]:
# Need to make sure each activity appears at least once in the training set: 

train_sampled_data = sample_data(train_data)
#val_sampled_data = sample_data(val_data)

print(train_sampled_data)
#print(train_sampled_data + val_sampled_data)

In [None]:
# Need to create a *representative* training set.
import random

all_data = train_data
num_classes = 39
data_dict = {}

train_data_sample = []
val_data_sample = []
test_data_sample = []

for i in range(num_classes):
    data_dict[i] = 0

# the train/val/test split will be 65/25/10
for row in all_data: 
    activity = int(row[1])
    if data_dict[activity] < 0.65*train_sampled_data[activity]:
        train_data_sample.append(row)
        data_dict[activity] += 1
    else: 
        prob = random.random()
        if prob < float(25/35): 
            #random 25/35 chance of going into validation: 
            val_data_sample.append(row)
        else:
            #random 10/35 chance of going into test:
            test_data_sample.append(row)

print(len(train_data_sample))
print(len(val_data_sample))
print(len(test_data_sample))

#print(train_data_sample)

train_list_sampled_data = sample_data(train_data_sample)
val_list_sampled_data = sample_data(val_data_sample)
test_list_sampled_data = sample_data(test_data_sample)

print(train_list_sampled_data)
print(val_list_sampled_data)
print(test_list_sampled_data)

In [None]:
# Create a train, val, test list from each of the sampled datas: 

'''
def write_data_to_list(filename, data_list):
    with open(filename, 'w') as f: 
        for row in data_list: 
            f.write(row[0] + " " + row[1])
            f.write("\n")
    
    
write_data_to_list("new_kinect_ir_train.txt", train_data_sample)
write_data_to_list("new_kinect_ir_val.txt", val_data_sample)
write_data_to_list("new_kinect_ir_test.txt", test_data_sample)
'''

In [None]:
# Sample code to create the WeightedRandomSampler
import torch
from torch.utils.data.sampler import Sampler

numDataPoints = 1000
data_dim = 5
bs = 100

# Create dummy data with class imbalance 9 to 1
data = torch.FloatTensor(numDataPoints, data_dim)
target = np.hstack((np.zeros(int(numDataPoints * 0.9), dtype=np.int32),
                    np.ones(int(numDataPoints * 0.1), dtype=np.int32)))

class_sample_count = np.array(
    [len(np.where(target == t)[0]) for t in np.unique(target)])
weight = 1. / class_sample_count
samples_weight = np.array([weight[t] for t in target])

samples_weight = torch.from_numpy(samples_weight)
samples_weigth = samples_weight.double()
sampler = torch.utils.data.sampler.WeightedRandomSampler(samples_weight, len(samples_weight))

print(class_sample_count)
print(weight)
print(samples_weight)

In [None]:
# Logic: we will upsample the small classes and downsample the large classes.
# The classes that are 0 we never see in our train or test sets.

# Link to sampler code: https://discuss.pytorch.org/t/how-to-handle-imbalanced-classes/11264/2

def training_sampler(train_list):
    train_sampled_data = sample_data(train_list)
    train_weight = []
    
    for v in train_sampled_data: 
        if v == 0: 
            train_weight.append(0)
        else:
            train_weight.append(1. / v)

    # Create the weight samples for the WeightedRanomSampler
    samples_weight = []
    for row in train_list: 
        activity = int(row[1])
        samples_weight.append(train_weight[activity])
        
    samples_weight = torch.from_numpy(np.array(samples_weight))
    samples_weight = samples_weight.double()
    sampler = torch.utils.data.sampler.WeightedRandomSampler(samples_weight, len(samples_weight))
    
    return train_weight, samples_weight, sampler

In [None]:
train_weights, train_samples_weights, train_sampler = training_sampler(train_data_sample)
print(train_weights)
print()
print(len(train_samples_weights), len(train_data_sample))
print(train_samples_weights)
print()
print(train_sampler)

In [None]:
# Randomly select 64 frames from the passed in npy file. 

def random64frames(npy_file):
    img_array = np.load(npy_file)
    
    print("shape: ", img_array.shape)
    
    num_frames = img_array.shape[0] / 224.0 
    if mode == 'ir':
        num_channels = 1
    
    img_array = img_array.reshape(int(num_frames), 224, 224, 1)
    print(img_array.shape)
    
    choices = num_frames - 64
    
    rand_start = random.randint(0, choices)
    print(rand_start)
    img_array = img_array[rand_start:rand_start+64, :, :, :]
    
    return img_array

In [None]:
class DriveAndAct(data_utl.Dataset):

    def __init__(self, train_list, mode, root, transforms=None):
        
        self.data = make_our_dataset(train_list)
        self.mode = mode # FLOW, RGB, gray
        self.root = root # root_dir that points to mode .npy files
        self.transforms = transforms # which transforms to perform 

    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is class_index of the target class.
        """
        vid, label, nf = self.data[index]

        # imgs should be a .npy array with the correct dimensions
        if self.mode == 'rgb':
            # TO DO: change to rgb dir
            print("TO DO: change to rgb dir")
            # gray_path = "data/drive_and_act_dataset/simmons_kinect_ir_train/"
            # imgs = np.load(gray_path + self.data[index][0])
            channels = 3
        elif self.mode == 'flow':
            # TO DO: change to flow dir
            print("TO DO: change to flow dir")
            # gray_path = "data/drive_and_act_dataset/simmons_kinect_ir_train/"
            # imgs = np.load(gray_path + self.data[index][0])
            channels = 2
        elif self.mode == 'ir': 
            # new list: 
            gray_path = "../../ece285_sp20_team01/kinect_ir/output/"
            imgs = np.load(gray_path + self.data[index][0] + '.npy')
            imgs = imgs.astype(np.float32)
            imgs = (imgs/255.)*2-1 # normalization
            channels = 1
        elif self.mode == 'depth': 
            gray_path = "/home/yuj010/ECE285/output/"
            imgs = np.load(gray_path + self.data[index][0] + '.npy')
            imgs = imgs.astype(np.float32)
            imgs = (imgs/255.)*2-1 # normalization
            channels = 3
        else: 
            print("did not specify correct mode for data")

        label = self.data[index][1]
        
        # Randomly select 64 frames:
        num_frames = int(imgs.shape[0])
        choices = int(num_frames) - 64
        if choices <= 0: 
            rand_start = 0
        else: 
            rand_start = random.randint(0, choices)
        img_array = imgs[rand_start:rand_start+64, :, :, :]
        
        num_classes = 39

        imgs = np.asarray(img_array, dtype='float32' )

        if channels == 1: 
            imgs = np.repeat(img_array, 3, axis=3)

        label = int(label)
        labels = np.zeros((num_classes,), np.float32)
        labels[int(label)] = 1
        
        return video_to_tensor(imgs), torch.from_numpy(labels)

    def __len__(self):
        return len(self.data)

## **Test functionality of make_our_dataset and DriveAndAct dataset class**


In [None]:
# Create two sample lists: train_list and eval_list: (already done in google drive)
'''
import random
len_ir_dataset = len(drive_data)

original_list = "../drive-pytorch-i3kinect_ir_processed_list_split0train.txt"
with open(original_list, 'r') as f1: 
    with open('../sample_train_list.txt', 'w') as f2: 
        with open('../sample_val_list.txt', 'w') as f3:
            with open('../sample_test_list.txt', 'w') as f4:
                Lines = f1.readlines()
                random.shuffle(Lines)
                for i,line in enumerate(Lines):
                    if i < int(len_ir_dataset * (4 / 5)):
                        f2.write(line) 
                    else:
                        f3.write(line)

!cat ../output/sample_test_list.txt
'''

In [None]:
train_list = "new_kinect_ir_train.txt"
val_list = "new_kinect_ir_val.txt"
test_list = "new_kinect_ir_test.txt"

print(os.path.exists(train_list))
print(os.path.exists(val_list))
print(os.path.exists(test_list))

In [None]:
# Need to be in pytorch-i3d directory
import os
import sys
import argparse

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable

import torchvision
from torchvision import datasets, transforms

import random
import numpy as np

# This is the I3D model used by the Drive&Act paper
from i3dpt import I3D


In [None]:
model_rgb_path = "model_rgb.pth"
model_flow_path = "model_flow.pth"

In [None]:
# Need to be in pytorch-i3d directory

# Choose a mode: 'ir', 'depth', 'rgb', 'flow'. 

# Change batch size equal to 1. 
def drive_run(init_lr=0.01, max_steps=150, train_loss = [], val_loss = [], mode='ir', root="", batch_size=1, save_model='', weighted_sampler=None):
    
    root_path = ""
    train_list = "new_kinect_ir_train.txt"
    val_list = "new_kinect_ir_val.txt"

    train_transforms = None
    test_transforms = None

    # create a dataset from our DriveAndAct dataset: 
    # !ls
    train_dataset = DriveAndAct(train_list, mode, root=root_path, transforms=None)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=1, pin_memory=True, sampler=weighted_sampler)

    val_dataset = DriveAndAct(val_list, mode, root=root_path, transforms=None)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True, sampler=None)

    dataloaders = {'train': train_dataloader, 'val': val_dataloader}
    datasets = {'train': train_dataset, 'val': val_dataset}

    
    # setup the model
    if mode == 'ir':
        i3d = I3D(400, modality = 'rgb')
        i3d.load_state_dict(torch.load('model_rgb.pth'))
    num_classes = 39
    i3d.replace_logits(num_classes)


    i3d.cuda()
    i3d = nn.DataParallel(i3d)

    lr = init_lr
    optimizer = optim.SGD(i3d.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001)
    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [300, 1000])


    num_steps_per_update = 8 # accumulating gradients ("virtual" batch size)
    steps = 0

    # train it
    while steps < max_steps: #for epoch in range(num_epochs):
        print('Step {}/{}'.format(steps, max_steps))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                i3d.train() # Set model to train mode
            else:
                i3d.eval()  # Set model to evaluate mode
                
            tot_loss = 0.0
            tot_loc_loss = 0.0
            tot_cls_loss = 0.0
            num_iter = 0
            optimizer.zero_grad()
            
            # Iterate over data.
            for data in dataloaders[phase]:
                num_iter += 1
                # get the inputs
                inputs, labels = data

                # wrap them in Variable
                inputs = Variable(inputs.cuda())
                #t = inputs.size(2)
                labels = Variable(labels.cuda())
                
                # Size of out is: B x C
                out, per_frame_logits = i3d(inputs)
                
                
                #print(per_frame_logits.size())
                # upsample to input size
                #per_frame_logits = np.array(per_frame_logits, dtype = 'float')
                #per_frame_logits = torch.from_numpy(per_frame_logits)
                #per_frame_logits = F.upsample(per_frame_logits, t, mode='linear')
                
                # compute localization loss
                #loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels)
                #tot_loc_loss += loc_loss.item()
                loc_loss = 0
                
                # compute classification loss (B x C)
                cls_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels)
                
                #print(per_frame_logits, labels[:,:,-1])
                #cls_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels[:,:,-1])
                tot_cls_loss += cls_loss.item()

                loss = (cls_loss)/num_steps_per_update
                tot_loss += loss.item()
                loss.backward()



                if num_iter == num_steps_per_update and phase == 'train':
                    steps += 1
                    num_iter = 0

                    # weights don't update until optimizer.step() called
                    optimizer.step()

                    optimizer.zero_grad()
                    lr_sched.step()

                    # make sure model is being saved in case we get kicked off datahub
                    if steps % 10 == 0:
                        print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/(10*num_steps_per_update), tot_cls_loss/(10*num_steps_per_update), tot_loss/10))
                        # save model
                        if steps % 20 == 0:
                            torch.save(i3d.module.state_dict(), save_model+str(steps).zfill(6)+'.pt')
                        train_loss.append(tot_loss/10)
                        tot_loss = tot_loc_loss = tot_cls_loss = 0.
                        print("Model saved: ", save_model+str(steps).zfill(6)+'.pt')
                        
                    
                if phase == 'val':
                    print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/num_iter, tot_cls_loss/num_iter, (tot_loss*num_steps_per_update)/num_iter))
                    val_loss.append((tot_loss*num_steps_per_update)/num_iter)


if __name__ == '__main__':
    t_loss = []
    v_loss = []
    drive_run(train_loss = t_loss, val_loss = v_loss, weighted_sampler=train_sampler)


In [None]:
#print(t_loss)
#print(v_loss)

In [None]:
from matplotlib import pyplot as plt
plt.figure(figsize=(15,12))
plt.subplot(211)
plt.plot(t_loss)
plt.title('Training Loss')
plt.ylabel('Loss')
plt.xlabel('10 epoch')
plt.legend(['Train Loss'], loc='upper left')


plt.subplot(212)
plt.plot(v_loss)
plt.title('Validation Loss')
plt.ylabel('Loss')
plt.xlabel('Validation Phase')
plt.legend(['Val loss'], loc='upper left')
plt.show()

In [None]:
# Smooth out the losses in training and validation: 
window = 5

new_t_loss = []
new_v_loss = []

for i in range(0,window):
    new_t_loss.append(t_loss[i])
    new_v_loss.append(v_loss[i])

for i in range(window, len(t_loss)):
    new_t_loss.append(sum(t_loss[i:i+window]) / window)
    new_v_loss.append(sum(v_loss[i:i+window]) / window)
    
    
#print(new_t_loss)

plt.figure(figsize=(15,12))
plt.subplot(211)
plt.plot(new_t_loss)
plt.title('Training Loss')
plt.ylabel('Loss')
plt.xlabel('10 epoch')
plt.legend(['Train Loss'], loc='upper left')


plt.subplot(212)
plt.plot(new_v_loss)
plt.title('Validation Loss')
plt.ylabel('Loss')
plt.xlabel('Validation Phase')
plt.legend(['Val loss'], loc='upper left')
plt.show()

## TO DO: Make a new notebook for the testing code: 

In [None]:
from tqdm import tqdm
def test(mode='ir', batch_size=1, accuracy_per_frame = False):
    correct = 0
    total = 0
    
    test_transforms=None
    
    root_path = ""
    train_list = "new_kinect_ir_train.txt"
    val_list = "new_kinect_ir_val.txt"
    test_list = "new_kinect_ir_test.txt"
    
    val_list = test_list

    train_dataset = DriveAndAct(train_list, mode=mode, root='', transforms=test_transforms)
    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True)
    
    val_dataset = DriveAndAct(val_list, mode=mode, root='', transforms=test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True)

    dataloaders = {'train': train_dataloader, 'val': val_dataloader}
    datasets = {'train': train_dataset, 'val': val_dataset}
    
    
    num_classes = 39
    
    # setup the model
    if mode == 'ir':
        i3d = I3D(400, modality = 'rgb')
        i3d.replace_logits(num_classes)
        i3d.load_state_dict(torch.load('saved_models000420.pt'))
    num_classes = 39
    i3d.replace_logits(num_classes)
    
    i3d.cuda()
    i3d = nn.DataParallel(i3d)
    preds = []
    
    # Evaluate the model since testing
    i3d.eval()   
    
    with torch.no_grad():  # Set model to evaluate mode

      # Iterate over data.
      for data in tqdm(dataloaders['val']): # modified
          
          # get the inputs
          inputs, labels = data

          # wrap them in Variable
          inputs = Variable(inputs.cuda())
          # t = inputs.size(2)
          labels = Variable(labels.cuda())

          # probs and logits are B x C
          probs, logits = i3d(inputs)
          # upsample to input size
          # per_frame_logits = F.upsample(per_frame_logits, t, mode='linear')
          
          # compute accuracy
          _, pred_indices = logits.max(1)    # pred_values gets probabilities, pred_indices gets output class, 
            
          # Probability corresponding to predicted class  
          pred_prob = probs[pred_indices]  
            
          actual_values, actual_indices = labels.max(1)
          pred_indices = pred_indices.squeeze()
          actual_indices = actual_indices.squeeze()

          preds.append(pred_indices.item())
            
          #label = actual_indices.item()
          #pred = pred_indices.item() 
          #print("prediction: ", pred, " label: ", label)

          if pred_indices == actual_indices:
              correct += 1
          total += 1
               
          
    return correct, total, preds
        
    #print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/num_iter, tot_cls_loss/num_iter, (tot_loss*num_steps_per_update)/num_iter))

In [None]:
correct, total, preds = test(mode='ir')

In [None]:
print(correct / total)

In [None]:
from collections import Counter
print(preds)
print(Counter(preds))

In [None]:
# Generate a confusion matrix: 
