In [1]:
%matplotlib inline
import os
from glob import glob
import numpy as np
import pandas as pd
import h5py
import matplotlib.pyplot as plt
from PIL import Image
# import keras
# import keras.backend as K
# from skimage.util.montage import montage2d
# from skimage.io import imread
# from scipy.io import loadmat # for loading mat files
# from tqdm import tqdm_notebook
# root_mpi_dir = os.path.join('..', 'data', 'MPII')
# data_dir = os.path.join(root_mpi_dir, 'Data')
# annot_dir = os.path.join(root_mpi_dir, 'Annotation Subset') # annotations the important part of the data
# img_dir = os.path.join(data_dir, 'Original')

import torch
import torchvision
from torch.utils import data
from torch.utils.data import Dataset
import os
from os import listdir
import torchvision.transforms as transforms

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import scipy.io as sio
from os import listdir
from os.path import isfile, join
import json

from pathlib import Path

In [2]:
index_to_label = {
    0: 'rowing',
    1: 'badminton',
    2: 'polo',
    3: 'bocce',
    4: 'snowboarding',
    5: 'croquet',
    6: 'sailing',
    7: 'rockclimbing',
}

In [3]:
label_to_index = {v: k for k, v in index_to_label.items()}

In [4]:
# mpii_path = '../data/MPII'
images_path = os.path.join('../data/')

In [5]:
image_files = [f for f in listdir(images_path) if isfile(join(images_path, f))]


## Set up Data

In [6]:
image_dict = {}
label_dict = {}

index = 0
# for i in range(len(image_files)):
for i in range(100):
    im = image_files[i]
    if 'Thumb' in im or '.DS' in im:
        continue
    if 'rar' in im:
        continue
#     print(im)
    label_str = im.split('_')[2].lower()
    image_dict[index] = im
    label_dict[index] = label_to_index[label_str]
    index += 1

In [7]:
class UIUC_Actions_Dataset(data.Dataset):
#       '''Characterizes a dataset for PyTorch'''
    def __init__(self, labels, images, images_path):
        '''Initialization'''
        self.labels = labels
        self.images = images
        
        self.images_path = images_path
        
        self.transform = transforms.Compose(
                [
                    transforms.Resize((96, 96)),
                    transforms.ToTensor(),
#                     transforms.CenterCrop(10),
                 
                 transforms.Normalize((0.5, 0.5, 0.5), 
                                      (0.5, 0.5, 0.5))])

    def __len__(self):
        '''Denotes the total number of samples'''
        return len(self.labels)

    def __getitem__(self, index):
        '''Generates one sample of data'''
        # Select sample
        image_filename = self.images[index]
        path_to_image = os.path.join(self.images_path, image_filename)

        # Load data and get label
        image = Image.open(path_to_image)
        image = self.transform(image).float()
        x = image
#         y = torch.tensor(np.array(self.labels[index])).float()

#         print(y)
        
        y = int(self.labels[index])

        return x, y

## Make Dataset

In [8]:
train_dataset = UIUC_Actions_Dataset(label_dict, 
                              image_dict, images_path)

train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                          batch_size=256,
                                          shuffle=True,
                                         )


In [9]:
print("len(train_dataset) = ", len(train_dataset))

len(train_dataset) =  100


In [14]:

class ActionNet(nn.Module):
    def __init__(self):
        super(ActionNet, self).__init__()
        # torch.Size([256, 3, 96, 96])
        # 3 input image channel (RGB), #6 output channels, 4x4 kernel 
        self.conv1 = nn.Conv2d(3, 32, kernel_size=(3,3), stride=1, 
                               padding=1, dilation=1, groups=1, 
                               bias=True, padding_mode='reflect')
        
        self.conv2 = nn.Conv2d(32, 96, kernel_size=(3,3), stride=1, 
                               padding=1, dilation=1, groups=1, 
                               bias=True, padding_mode='reflect')
        
        self.conv3 = nn.Conv2d(96, 256, kernel_size=(3,3), stride=1, 
                               padding=1, dilation=1, groups=1, 
                               bias=True, padding_mode='reflect')
        
        
        self.drop1 = nn.Dropout(p=0.1)
        self.norm1 = nn.LayerNorm([48, 48])
        self.norm2 = nn.LayerNorm([24, 24])
        
        self.fc1 = nn.Linear(36864, 4096)
        self.fc2 = nn.Linear(4096, 1028)
        self.fc3 = nn.Linear(1028, 8)
        
    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = self.norm1(x)
#         print(x.shape)
        
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = self.norm2(x)
#         print(x.shape)
        
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
#         print(x.shape)

        x = torch.flatten(x, 1)
#         print(x.shape)
        
        x = self.fc1(x)
        x = F.relu(x)
        x = self.drop1(x)
        
        x = self.fc2(x)
        x = F.relu(x)
        x = self.drop1(x)
        
        x = self.fc3(x)
        x = F.relu(x)
#         x = self.drop1(x)
        
#         output = x
        output = F.log_softmax(x, dim=1)
        return output
        
        

In [11]:
max_epochs = 100 
lr = 0.01
momentum = 0.9

action_net1 = ActionNet().double()
# Try different optimzers here [Adam, SGD, RMSprop]
optimizer = optim.SGD(action_net1.parameters(), lr=lr, momentum=momentum)


In [12]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

# Parameters
# params = {'batch_size': 64,
#           'shuffle': True,
#           'num_workers': 6}


training_losses = []

# Generators
training_set = train_dataset
training_generator = train_data_loader

loss_fn = torch.nn.NLLLoss()
# loss_fn = nn.CrossEntropyLoss()

action_net1.train()

# Loop over epochs
for epoch in range(max_epochs):
    print("epoch: ", epoch)
    # Training
    total_epoch_loss = 0
    for batch_idx, (batch_data, batch_labels) in enumerate(training_generator):
        
        batch_data = batch_data.double()
        batch_labels = batch_labels
        
        predicted_output = action_net1(batch_data)
                                        
        predicted_output = predicted_output.double()                                
        target_output = batch_labels
        
        print(predicted_output)
        print()
        print(target_output)
        
        
        
        loss = loss_fn(predicted_output, target_output)
#         loss = F.nll_loss(predicted_output, target_output)   # Compute loss

        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()  
        
        total_epoch_loss += loss.item()
    
        if batch_idx % 25 == 0:
            print('Train Epoch: {} \tLoss: {:.6f}'.format(
                epoch, total_epoch_loss))
    
#     if epoch % 100 == 0:
#         with open('../saved_models/pose_network_1.pkl', 'wb') as f:
#             torch.save(pose_net1.state_dict(), f)
            
    training_losses.append(total_epoch_loss)
    
# with open('../saved_models/pose_network_1_final.pkl', 'wb') as f:
#     torch.save(pose_net1.state_dict(), f)
    
# with open('../saved_models/training_losses_1.npy', 'wb') as f:
#     np.save(f, np.array(training_losses))

epoch:  0


KeyboardInterrupt: 

In [15]:
action_net1 = ActionNet().double()
path_model = '../saved_models/uiuc_action_network_1.pkl'
action_net1.load_state_dict(torch.load(path_model))
action_net1.eval()

ActionNet(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=reflect)
  (conv2): Conv2d(32, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=reflect)
  (conv3): Conv2d(96, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=reflect)
  (drop1): Dropout(p=0.1, inplace=False)
  (norm1): LayerNorm((48, 48), eps=1e-05, elementwise_affine=True)
  (norm2): LayerNorm((24, 24), eps=1e-05, elementwise_affine=True)
  (fc1): Linear(in_features=36864, out_features=4096, bias=True)
  (fc2): Linear(in_features=4096, out_features=1028, bias=True)
  (fc3): Linear(in_features=1028, out_features=8, bias=True)
)

# Take a look at output

In [23]:
# Generators
training_set = train_dataset
training_generator = train_data_loader

loss_fn = torch.nn.NLLLoss()


correct = 0
# Loop over epochs
# Training
total_epoch_loss = 0
for batch_idx, (batch_data, batch_labels) in enumerate(training_generator):
        
    batch_data = batch_data.double()
    batch_labels = batch_labels

    predicted_output = action_net1(batch_data)

    predicted_output = predicted_output.double() 
    
    pred = predicted_output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    
    target_output = batch_labels

    print(pred)
    print()
    print(target_output)



    loss = loss_fn(predicted_output, target_output)
    print(loss)
    break


tensor([[6],
        [3],
        [1],
        [5],
        [0],
        [1],
        [4],
        [1],
        [1],
        [5],
        [0],
        [2],
        [2],
        [0],
        [0],
        [6],
        [0],
        [0],
        [0],
        [7],
        [4],
        [4],
        [7],
        [5],
        [0],
        [2],
        [4],
        [4],
        [2],
        [4],
        [7],
        [1],
        [0],
        [2],
        [4],
        [3],
        [0],
        [6],
        [6],
        [4],
        [3],
        [5],
        [1],
        [7],
        [6],
        [1],
        [2],
        [6],
        [1],
        [6],
        [2],
        [1],
        [3],
        [6],
        [7],
        [7],
        [2],
        [1],
        [3],
        [5],
        [5],
        [1],
        [5],
        [6],
        [6],
        [4],
        [5],
        [3],
        [0],
        [5],
        [7],
        [0],
        [7],
        [1],
        [6],
        [4],
        [6],

In [24]:
print('loss', loss)

loss tensor(0.0003, dtype=torch.float64, grad_fn=<NllLossBackward>)


In [27]:
pred[2]

tensor([1])

In [26]:
target_output[2]

tensor(1)