In [0]:
cd /content/drive/My Drive/DL/CS 189/hw6/resources/problem4

/content/drive/My Drive/DL/CS 189/hw6/resources/problem4


In [0]:
pip install ipdb



## Data Loader

In [0]:
import torch
from torch.utils import data
import pandas as pd
import random
import json
import numpy as np
from skimage import io, transform
from PIL import Image

class Mds189(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, label_file, loader, transform):
        'Initialization'
        self.label_file = label_file
        self.loader = loader
        self.transform = transform
        self.label_map = ['reach','squat','pushup','inline',
                          'hamstrings','lunge','deadbug','stretch']
        self.data= pd.read_csv(self.label_file,header=None)

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.data)

    def map_label_to_int(self,y):
        'The labels need to be integers'
        label_map = {'reach_both': 0,        # the key frames are labeled with the side
                     'squat_both': 1,
                     'inline_left': 2,
                     'inline_right': 2,
                     'lunge_left': 3,
                     'lunge_right': 3,
                     'hamstrings_left': 4,
                     'hamstrings_right': 4,
                     'stretch_left': 5,
                     'stretch_right': 5,
                     'deadbug_left': 6,
                     'deadbug_right': 6,
                     'pushup_both': 7,
                     'reach': 0,            # the video frames don't have information about which side is moving 
                     'squat': 1,
                     'inline': 2,
                     'lunge': 3,
                     'hamstrings': 4,
                     'stretch': 5,
                     'deadbug': 6,
                     'pushup': 7,
                     'label': -1           # label is the placeholder in `videoframe_data_test.txt` for the kaggle frame labels
                    }
        return label_map[y]

    def __getitem__(self,idx):
        'Generates one sample of data'
        path,target = self.data.iloc[idx]
        sample = self.loader(path)
        if self.transform is not None:
            sample = self.transform(sample)
        movement = self.map_label_to_int(target)

        return sample,movement

## Train

In [0]:
import torch
import torch.nn as nn
from torch.utils import data
import numpy as np
from skimage import io, transform
import ipdb
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
import torchvision.models as models
from PIL import Image
import time

import matplotlib.pyplot as plt

start = time.time()

In [0]:
# Helper functions for loading images.
def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')

def accimage_loader(path):
    import accimage
    try:
        return accimage.Image(path)
    except IOError:
        # Potentially a decoding problem, fall back to PIL.Image
        return pil_loader(path)

def default_loader(path):
    from torchvision import get_image_backend
    if get_image_backend() == 'accimage':
        return accimage_loader(path)
    else:
        return pil_loader(path)

Network structure

In [0]:
class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.conv1 = nn.Conv2d(3,8,11,4)
        self.mp1 = nn.MaxPool2d(5, 2)
        self.conv2 = nn.Conv2d(8,16,5,1,2)
        self.mp2 = nn.MaxPool2d(5, 2)
        self.conv3 = nn.Conv2d(16,32,3,1,1)
        self.conv4 = nn.Conv2d(32,64,3,1,1)
        self.mp3 = nn.MaxPool2d(5, 2)        
        self.fc1 = nn.Linear(2816, 2048)
        self.fc2 = nn.Linear(2048, 1024)
        self.fc3 = nn.Linear(1024,8)
        nn.init.xavier_normal_(self.conv1.weight)
        nn.init.xavier_normal_(self.conv2.weight)
        nn.init.xavier_normal_(self.conv3.weight)
        nn.init.xavier_normal_(self.conv4.weight)
        nn.init.xavier_normal_(self.fc1.weight)
        nn.init.xavier_normal_(self.fc2.weight)
        nn.init.xavier_normal_(self.fc3.weight)

    def forward(self, x):
        x = F.rrelu(self.mp1(self.conv1(x)))
        x = F.rrelu(self.mp2(self.conv2(x)))
        x = self.mp3(self.conv4(self.conv3(x)))
        x = x.view(int(x.size(0)), -1)
        x = F.dropout(F.rrelu(self.fc1(x)))
        x = F.rrelu(self.fc2(x))
        x = self.fc3(x)
        return x

Load data

In [0]:
def mds_loader(params, is_key_frame=True):
    # Datasets
    # TODO: put the path to your train, test, validation txt files
    if is_key_frame:
        label_file_train =  'dataloader_files/keyframe_data_train.txt'
        label_file_val  =  'dataloader_files/keyframe_data_val.txt'
    else:
        label_file_train = 'dataloader_files/videoframe_data_train.txt'
        label_file_val = 'dataloader_files/videoframe_data_val.txt'
        label_file_test = 'dataloader_files/videoframe_data_test.txt'

    mean_keytrain = [134.010302198,118.599587912,102.038804945]
    std_keytrain = [23.5033438916,23.8827343458,24.5498666589]
    mean_randtrain = [133.714058398,118.396875912,102.262895484]
    std_randtrain = [23.2021839891,23.7064439547,24.3690056102]

    train_dataset = Mds189(label_file_train,loader=default_loader,transform=transforms.Compose([
                                                transforms.ColorJitter(hue=.05, saturation=.05),
                                                transforms.RandomHorizontalFlip(p=0.33),
                                                transforms.RandomRotation(degrees=15),    
                                                transforms.ToTensor(),
                                                transforms.Normalize(mean_keytrain, std_keytrain)
                                            ]))
    train_loader = data.DataLoader(train_dataset, **params)

    val_dataset = Mds189(label_file_val,loader=default_loader,transform=transforms.Compose([
                                                transforms.ToTensor(),
                                                transforms.Normalize(mean_randtrain, std_randtrain)
                                            ]))
    val_loader = data.DataLoader(val_dataset, **params)

    if is_key_frame:
        return train_loader, val_loader

    elif not is_key_frame:
        test_dataset = Mds189(label_file_test,loader=default_loader,transform=transforms.Compose([
                                                    transforms.ToTensor(),
                                                    transforms.Normalize(mean_randtrain, std_randtrain)
                                                ]))
        test_loader = data.DataLoader(test_dataset, **params)
        return train_loader, val_loader, test_loader

Trainning

In [0]:
def train(model, device, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    # Train the model
    # Loop over epochs
    print('Beginning training..')
    total_step = len(train_loader)
    train_losses, val_losses = [], []
    for epoch in range(num_epochs):
        # Training
        print('epoch {}'.format(epoch))
        for i, (local_batch,local_labels) in enumerate(train_loader):
            print(i)
            # Transfer to GPU
            local_ims, local_labels = local_batch.to(device), local_labels.to(device)
            
            # Forward pass
            outputs = model.forward(local_ims)
            loss = criterion(outputs, local_labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

            if (i+1) % 4 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                    .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
        for i, (local_batch,local_labels) in enumerate(val_loader):
            local_ims, local_labels = local_batch.to(device), local_labels.to(device)
            outputs = model.forward(local_ims)
            loss = criterion(outputs, local_labels)
            val_losses.append(loss.item())
        print('finished epoch {}, Training Loss: {:.4f}, Validation Loss: {:.4f}'
                .format(epoch+1, train_losses[epoch], val_losses[epoch]))

    end = time.time()
    print('Time: {}'.format(end - start))

    # Save the model checkpoint
    torch.save(model.state_dict(), './model/model.ckpt')

    return train_losses, val_losses

Testing

In [0]:
def test(model, device, test_loader):
    print('Beginning Testing..')
    with torch.no_grad():
        correct = 0
        total = 0
        predicted_list = []
        groundtruth_list = []
        for (local_batch,local_labels) in test_loader:
            # Transfer to GPU
            local_ims, local_labels = local_batch.to(device), local_labels.to(device)

            outputs = model.forward(local_ims)
            _, predicted = torch.max(outputs.data, 1)
            total += local_labels.size(0)
            predicted_list.extend(predicted)
            groundtruth_list.extend(local_labels)
            correct += (predicted == local_labels).sum().item()

        print('Accuracy of the network on the {} test images: {} %'.format(total, 100 * correct / total))

    pl = [p.cpu().numpy().tolist() for p in predicted_list]
    gt = [p.cpu().numpy().tolist() for p in groundtruth_list]


    label_map = ['reach','squat','inline','lunge','hamstrings','stretch','deadbug','pushup']
    for id in range(len(label_map)):
        print('{}: {}'.format(label_map[id],sum([p and g for (p,g) in zip(np.array(pl)==np.array(gt),np.array(gt)==id)])/(sum(np.array(gt)==id)+0.)))

Plot loss

In [0]:
def plot_loss(train,val):
    mt = sum(train)/len(train)
    mv = sum(val)/len(val)
    plt.title(" Avg Train Loss: "+str(round(mt,4))+", Avg Val Loss: "+str(round(mv,4)))
    plt.plot([i+1 for i in range(len(train))], train, 'r', label="train")
    plt.plot([i+1 for i in range(len(val))], val, 'b', label="validation")
    plt.xlabel("steps")
    plt.ylabel("loss")
    plt.legend()
    plt.savefig("loss.jpg")

In [0]:
def main(params, num_epochs, learning_rate, is_train=True, is_key_frame=True):
    
    model_to_load = './model/model.ckpt' 
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")
    if is_key_frame:
        train_loader, val_loader = mds_loader(params, is_key_frame=is_key_frame)
    if not is_key_frame:
        train_loader, val_loader, test_loader = mds_loader(params, is_key_frame=is_key_frame)

    model = NeuralNet().to(device)

    if is_train:
        # Loss and optimizer
        criterion = nn.CrossEntropyLoss() 
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        train_losses, val_losses = train(model, device, train_loader, val_loader, criterion, optimizer, num_epochs)
        plot_loss(train_losses, val_losses)
    if not is_train:
        num_epochs = 0
        model.load_state_dict(torch.load(model_to_load))
        test(model, device, val_loader)

In [0]:
is_train=True
is_key_frame=True
params = {'batch_size': 64,
      'shuffle': True,
      'num_workers': 1
      }
num_epochs = 10
learning_rate = 1e-4
main(params, num_epochs, learning_rate)

Beginning training..
epoch 0
0
1
2
3
Epoch [1/10], Step [4/46], Loss: 3.0813
4
5
6
