# Training a ConvNet in PyTorch for Human Action Recognition on UCI dataset


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader,sampler,Dataset
import torchvision.datasets as dset
import torchvision.transforms as T
import timeit
from PIL import Image
import os
import numpy as np
import scipy.io


## Load Datasets

In this part, we will load the action recognition dataset for the neural network. In order to load data from our custom dataset, we need to write a custom Dataloader. If you put q3_2_data.mat, /valClips,/trainClips,/testClips under the folder of ./data/ , you do not need to change anything in this part.

First, load the labels of the dataset, you should write your path of the q3_2_data.mat file.

In [2]:
label_mat=scipy.io.loadmat('./data/q3_2_data.mat')
label_train=label_mat['trLb']
print(len(label_train))
label_val=label_mat['valLb']
print(len(label_val))

7770
2230


### Dataset class

torch.utils.data.Dataset is an abstract class representing a dataset. The custom dataset should inherit Dataset and override the following methods:

    __len__ so that len(dataset) returns the size of the dataset.
    __getitem__ to support the indexing such that dataset[i] can be used to get ith sample

Let’s create a dataset class for our action recognition dataset. We will read images in __getitem__. This is memory efficient because all the images are not stored in the memory at once but read as required.

Sample of our dataset will be a dict {'image':image,'img_path':img_path,'Label':Label}. Our datset will take an optional argument transform so that any required processing can be applied on the sample. 

In [3]:

class ActionDataset(Dataset):
    """Action dataset."""

    def __init__(self,  root_dir,labels=[], transform=None):
        """
        Args:
            root_dir (string): Directory with all the images.
            labels(list): labels if images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.root_dir = root_dir
        self.transform = transform
        self.length=len(os.listdir(self.root_dir))
        self.labels=labels
    def __len__(self):
        return self.length*3

    def __getitem__(self, idx):
        
        folder=idx/3+1
        imidx=idx%3+1
        folder = int(folder)
        folder=format(folder,'05d')
        
        imgname=str(imidx)+'.jpg'
        img_path = os.path.join(self.root_dir,
                                folder,imgname)
        image = Image.open(img_path)
        if len(self.labels)!=0:
            Label=self.labels[int(idx/3)][0]-1
        if self.transform:
            image = self.transform(image)
        if len(self.labels)!=0:
            sample={'image':image,'img_path':img_path,'Label':Label}
        else:
            sample={'image':image,'img_path':img_path}
        return sample
  

Iterating over the dataset by a for loop.

In [4]:
image_dataset=ActionDataset(root_dir='./data/trainClips/',\
                            labels=label_train,transform=T.ToTensor())

#iterating though the dataset
for i in range(10):
    sample=image_dataset[i]
    print(sample['image'].shape)
    print(sample['Label'])
    print(sample['img_path'])
     
   

torch.Size([3, 64, 64])
0.0
./data/trainClips/00001/1.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00001/2.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00001/3.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00002/1.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00002/2.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00002/3.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00003/1.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00003/2.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00003/3.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00004/1.jpg


In [5]:
image_dataloader = DataLoader(image_dataset, batch_size=4,
                        shuffle=True, num_workers=4)


for i,sample in enumerate(image_dataloader):
    sample['image']=sample['image'].cuda()
    print(i,sample['image'].shape,sample['img_path'],sample['Label'])
    if i>20: 
        break

0 torch.Size([4, 3, 64, 64]) ['./data/trainClips/05571/1.jpg', './data/trainClips/03060/3.jpg', './data/trainClips/06254/2.jpg', './data/trainClips/00159/1.jpg'] 
 6
 3
 7
 0
[torch.DoubleTensor of size 4]

1 torch.Size([4, 3, 64, 64]) ['./data/trainClips/04265/2.jpg', './data/trainClips/05144/2.jpg', './data/trainClips/02134/3.jpg', './data/trainClips/03774/2.jpg'] 
 5
 6
 2
 4
[torch.DoubleTensor of size 4]

2 torch.Size([4, 3, 64, 64]) ['./data/trainClips/03296/3.jpg', './data/trainClips/05521/2.jpg', './data/trainClips/00811/3.jpg', './data/trainClips/02166/2.jpg'] 
 3
 6
 0
 2
[torch.DoubleTensor of size 4]

3 torch.Size([4, 3, 64, 64]) ['./data/trainClips/03027/1.jpg', './data/trainClips/06928/3.jpg', './data/trainClips/00378/3.jpg', './data/trainClips/05205/3.jpg'] 
 3
 8
 0
 6
[torch.DoubleTensor of size 4]

4 torch.Size([4, 3, 64, 64]) ['./data/trainClips/06140/2.jpg', './data/trainClips/05659/1.jpg', './data/trainClips/00545/2.jpg', './data/trainClips/03807/3.jpg'] 
 7
 7
 0


Dataloaders for the training, validationg and testing set. 

In [6]:
image_dataset_train=ActionDataset(root_dir='./data/trainClips/',labels=label_train,transform=T.ToTensor())

image_dataloader_train = DataLoader(image_dataset_train, batch_size=32,
                        shuffle=True, num_workers=4)
image_dataset_val=ActionDataset(root_dir='./data/valClips/',labels=label_val,transform=T.ToTensor())

image_dataloader_val = DataLoader(image_dataset_val, batch_size=32,
                        shuffle=False, num_workers=4)
image_dataset_test=ActionDataset(root_dir='./data/testClips/',labels=[],transform=T.ToTensor())

image_dataloader_test = DataLoader(image_dataset_test, batch_size=32,
                        shuffle=False, num_workers=4)

In [7]:
dtype = torch.FloatTensor # the CPU datatype
# Constant to control how frequently we print train loss
print_every = 100
# This is a little utility that we'll use to reset the model
# if we want to re-initialize all our parameters
def reset(m):
    if hasattr(m, 'reset_parameters'):
        m.reset_parameters()

When I input  data into fully connected affine layers, I want each datapoint to be represented by a single vector -- it's no longer useful to segregate the different channels, rows, and columns of the data. So, I use a "Flatten" operation to collapse the C x H x W values per representation into a single long vector. The Flatten function below first reads in the N, C, H, and W values from a given batch of data, and then returns a "view" of that data. "View" is analogous to numpy's "reshape" method: it reshapes x's dimensions to be N x ??, where ?? is allowed to be anything (in this case, it will be C x H x W, but we don't need to specify that explicitly). 

In [8]:
class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size() # read in N, C, H, W
        return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

In [14]:
def train(model, loss_fn, optimizer, dataloader, num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, sample in enumerate(dataloader):
            x_var = Variable(sample['image']).cuda()
            y_var = Variable(sample['Label'].long()).cuda()

            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % print_every == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data[0]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def check_accuracy(model, loader):
    '''
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')  
    '''
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    for t, sample in enumerate(loader):
        x_var = Variable(sample['image']).cuda()
        y_var = sample['Label']
        scores = model(x_var)
        y_var=y_var.cpu()
        _, preds = scores.data.max(1)#scores.data.cpu().max(1)
        #print(preds)
        #print(y_var)
        preds = preds.cpu()
        y_var=y_var.cpu()
        num_correct += (preds.numpy() == y_var.numpy()).sum()
        num_samples += preds.size(0)
#     y_var=y_var.cpu()
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
    
    



### Check the accuracy of the model.

Getting a training loss of around 1.0-1.2, and a validation accuracy of around 50-60%. If I train for more epochs, the performance improves past these numbers. However that is a sign of overfitting

In [15]:
torch.random.manual_seed(12345)
# fixed_model.cpu()
fixed_model.apply(reset) 
fixed_model.train() 
train(fixed_model, loss_fn, optimizer,image_dataloader_train, num_epochs=5) 
check_accuracy(fixed_model, image_dataloader_train)# check accuracy on the training set


Starting epoch 1 / 5
t = 100, loss = 2.2323
t = 200, loss = 2.1437
t = 300, loss = 1.7899
t = 400, loss = 1.7177
t = 500, loss = 1.6700
t = 600, loss = 1.3837
t = 700, loss = 1.2119
Starting epoch 2 / 5
t = 100, loss = 1.3968
t = 200, loss = 1.3100
t = 300, loss = 1.2680
t = 400, loss = 1.2891
t = 500, loss = 0.8881
t = 600, loss = 0.7787
t = 700, loss = 1.3149
Starting epoch 3 / 5
t = 100, loss = 0.8357
t = 200, loss = 0.7589
t = 300, loss = 0.8152
t = 400, loss = 0.6862
t = 500, loss = 0.5893
t = 600, loss = 0.5960
t = 700, loss = 0.7713
Starting epoch 4 / 5
t = 100, loss = 0.8090
t = 200, loss = 0.6732
t = 300, loss = 0.5251
t = 400, loss = 0.3244
t = 500, loss = 0.6170
t = 600, loss = 0.5740
t = 700, loss = 0.8652
Starting epoch 5 / 5
t = 100, loss = 0.4717
t = 200, loss = 0.4895
t = 300, loss = 0.3519
t = 400, loss = 0.4562
t = 500, loss = 0.5509
t = 600, loss = 0.3944
t = 700, loss = 0.3992
Got 19835 / 23310 correct (85.09)


In [16]:
check_accuracy(fixed_model, image_dataloader_val)#check accuracy on the validation set

Got 3539 / 6690 correct (52.90)


### Training a better  model for action recognition

In [17]:
model = nn.Sequential( 
    #########1st To Do  (10 points)###################
    nn.Conv2d(3, 64, kernel_size=3, stride=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(3, 62, kernel_size=3, stride=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),
    
    nn.Conv2d(3, 128, kernel_size=3, stride=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(3, 128, kernel_size=3, stride=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),
    
    nn.Conv2d(3, 256, kernel_size=3, stride=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(3, 256, kernel_size=3, stride=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(3, 256, kernel_size=3, stride=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(3, 256, kernel_size=3, stride=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),
    
    nn.Conv2d(3, 512, kernel_size=3, stride=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(3, 512, kernel_size=3, stride=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(3, 512, kernel_size=3, stride=1),
    nn.ReLU(inplace=True),
    nn.Conv2d(3, 512, kernel_size=3, stride=1),
    nn.ReLU(inplace=True),
    nn.MaxPool2d(kernel_size=2, stride=2),
    
    Flatten(), # see above for explanation
    nn.ReLU(inplace=True),
    nn.Linear(7, 10), # affine layer
    nn.Linear(10, 10), # affine layer
    nn.Linear(10, 10), # affine layer
    ####################################
    )

fixed_model = fixed_model_base.type(dtype)
fixed_model = fixed_model.cuda()
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(fixed_model_base.parameters(), lr=1e-4)
train(fixed_model_base, loss_fn, optimizer,image_dataloader_train, num_epochs=10) 
check_accuracy(fixed_model, image_dataloader_val)

Starting epoch 1 / 10
t = 100, loss = 0.5821
t = 200, loss = 0.6408
t = 300, loss = 0.4928
t = 400, loss = 0.5139
t = 500, loss = 0.2888
t = 600, loss = 0.7220
t = 700, loss = 0.3304
Starting epoch 2 / 10
t = 100, loss = 0.4616
t = 200, loss = 0.6672
t = 300, loss = 0.4131
t = 400, loss = 0.4303
t = 500, loss = 0.4448
t = 600, loss = 0.1903
t = 700, loss = 0.2739
Starting epoch 3 / 10
t = 100, loss = 0.2287
t = 200, loss = 0.3276
t = 300, loss = 0.2743
t = 400, loss = 0.5678
t = 500, loss = 0.2609
t = 600, loss = 0.2605
t = 700, loss = 0.2144
Starting epoch 4 / 10
t = 100, loss = 0.2837
t = 200, loss = 0.1910
t = 300, loss = 0.2030
t = 400, loss = 0.1438
t = 500, loss = 0.2707
t = 600, loss = 0.1373
t = 700, loss = 0.2113
Starting epoch 5 / 10
t = 100, loss = 0.2774
t = 200, loss = 0.1830
t = 300, loss = 0.2819
t = 400, loss = 0.1720
t = 500, loss = 0.4199
t = 600, loss = 0.0932
t = 700, loss = 0.2310
Starting epoch 6 / 10
t = 100, loss = 0.3639
t = 200, loss = 0.1214
t = 300, loss = 0

### What I tried
I tried the VGG-19 from ResNet paper: https://arxiv.org/pdf/1512.03385.pdf.
The convolutional layers have a kernel_size of 3 throughout. After successive conv, activations, there are maxpool layers, with a kernel size of 2, which reduce the size of output by 2. After every Maxpooling layer, the number of filters is doubled. At the end I have used three fully connected layers 

In [18]:
def predict_on_test(model, loader):
    '''
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')  
    '''
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    results=open('results.csv','w')
    count=0
    results.write('Id'+','+'Class'+'\n')
    for t, sample in enumerate(loader):
        x_var = Variable(sample['image']).cuda()
        scores = model(x_var)
        _, preds = scores.data.max(1)
        for i in range(len(preds)):
            results.write(str(count)+','+str(preds[i])+'\n')
            count+=1
    results.close()
    return count
    
count=predict_on_test(fixed_model, image_dataloader_test)
print(count)

9810


### 3D Convolution on video clips
3D convolution is for videos, it has one more dimension than 2d convolution. Documentationu: http://pytorch.org/docs/master/nn.html#torch.nn.Conv3dIn. In our dataset, each clip is a video of 3 frames. Lets classify the each clip rather than each image using 3D convolution.

In [26]:
class ActionClipDataset(Dataset):
    """Action Landmarks dataset."""

    def __init__(self,  root_dir,labels=[], transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        
        self.root_dir = root_dir
        self.transform = transform
        self.length=len(os.listdir(self.root_dir))
        self.labels=labels

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        
        folder=idx+1
        folder=format(folder,'05d')
        clip=[]
        if len(self.labels)!=0:
            Label=self.labels[idx][0]-1
        for i in range(3):
            imidx=i+1
            imgname=str(imidx)+'.jpg'
            img_path = os.path.join(self.root_dir,
                                    folder,imgname)
            image = Image.open(img_path)
            image=np.array(image)
            clip.append(image)
        if self.transform:
            clip=np.asarray(clip)
            clip=np.transpose(clip, (0,3,1,2))
            clip = torch.from_numpy(np.asarray(clip))
        if len(self.labels)!=0:
            sample={'clip':clip,'Label':Label,'folder':folder}
        else:
            sample={'clip':clip,'folder':folder}
        return sample

clip_dataset=ActionClipDataset(root_dir='./data/trainClips/',\
                               labels=label_train,transform=T.ToTensor())#/home/tqvinh/Study/CSE512/cse512-s18/hw2data/trainClips/
for i in range(10):
    sample=clip_dataset[i]
    print(sample['clip'].shape)
    print(sample['Label'])
    print(sample['folder'])

torch.Size([3, 3, 64, 64])
0.0
00001
torch.Size([3, 3, 64, 64])
0.0
00002
torch.Size([3, 3, 64, 64])
0.0
00003
torch.Size([3, 3, 64, 64])
0.0
00004
torch.Size([3, 3, 64, 64])
0.0
00005
torch.Size([3, 3, 64, 64])
0.0
00006
torch.Size([3, 3, 64, 64])
0.0
00007
torch.Size([3, 3, 64, 64])
0.0
00008
torch.Size([3, 3, 64, 64])
0.0
00009
torch.Size([3, 3, 64, 64])
0.0
00010


In [27]:
clip_dataloader = DataLoader(clip_dataset, batch_size=4,
                        shuffle=True, num_workers=4)


for i,sample in enumerate(clip_dataloader):
    print(i,sample['clip'].shape,sample['folder'],sample['Label'])
    if i>20: 
        break

0 torch.Size([4, 3, 3, 64, 64]) ['06022', '05268', '00255', '04873'] 
 7
 6
 0
 5
[torch.DoubleTensor of size 4]

1 torch.Size([4, 3, 3, 64, 64]) ['02839', '07351', '06344', '02079'] 
 3
 9
 7
 2
[torch.DoubleTensor of size 4]

2 torch.Size([4, 3, 3, 64, 64]) ['01639', '03745', '01607', '03902'] 
 1
 4
 1
 4
[torch.DoubleTensor of size 4]

3 torch.Size([4, 3, 3, 64, 64]) ['00192', '02320', '01268', '06225'] 
 0
 2
 1
 7
[torch.DoubleTensor of size 4]

4 torch.Size([4, 3, 3, 64, 64]) ['05605', '02751', '05921', '06391'] 
 6
 3
 7
 7
[torch.DoubleTensor of size 4]

5 torch.Size([4, 3, 3, 64, 64]) ['02794', '01617', '04111', '02448'] 
 3
 1
 4
 2
[torch.DoubleTensor of size 4]

6 torch.Size([4, 3, 3, 64, 64]) ['06879', '02780', '03076', '01699'] 
 8
 3
 3
 1
[torch.DoubleTensor of size 4]

7 torch.Size([4, 3, 3, 64, 64]) ['03253', '00348', '00244', '02229'] 
 3
 0
 0
 2
[torch.DoubleTensor of size 4]

8 torch.Size([4, 3, 3, 64, 64]) ['06505', '00034', '06407', '03791'] 
 8
 0
 7
 4
[torch

In [28]:
clip_dataset_train=ActionClipDataset(root_dir='./data/trainClips/',labels=label_train,transform=T.ToTensor())

clip_dataloader_train = DataLoader(clip_dataset_train, batch_size=16,
                        shuffle=True, num_workers=4)
clip_dataset_val=ActionClipDataset(root_dir='./data/valClips/',labels=label_val,transform=T.ToTensor())

clip_dataloader_val = DataLoader(clip_dataset_val, batch_size=16,
                        shuffle=True, num_workers=4)
clip_dataset_test=ActionClipDataset(root_dir='./data/testClips/',labels=[],transform=T.ToTensor())

clip_dataloader_test = DataLoader(clip_dataset_test, batch_size=16,
                        shuffle=False, num_workers=4)

### Flatten function for 3d covolution feature maps.

In [29]:
class Flatten3d(nn.Module):
    def forward(self, x):
        N, C, D, H, W = x.size() # read in N, C, D, H, W
        return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

Network using 3D convolution on videos for video classification.

In [30]:
fixed_model_3d = nn.Sequential( # You fill this in!
    nn.Conv3d(3, 8, kernel_size=3, stride=1, padding=2),
    nn.BatchNorm3d(8),
    nn.ReLU(inplace=True),
    nn.MaxPool3d(kernel_size=2, stride=2),
    nn.Conv3d(8, 16, kernel_size=3, stride=1, padding=2),
    nn.BatchNorm3d(16),
    nn.ReLU(inplace=True),
    nn.MaxPool3d(kernel_size=2, stride=2),
    Flatten3d(),
    nn.ReLU(inplace=True),
    nn.Linear(9248, 10), # affine layer
)

fixed_model_3d = fixed_model_3d.type(dtype).cuda()
x = torch.randn(32,3, 3, 64, 64).type(dtype)
x_var = Variable(x).type(dtype).cuda() # Construct a PyTorch Variable out of your input data
ans = fixed_model_3d(x_var) 
np.array_equal(np.array(ans.size()), np.array([32, 10]))


True

### What I did
I have used a two layer network with a fixed filter size of 3. The number of filters for 1st conv layer is 8 with padding 2, and then it is doubled (16) for the next conv layer, again with padding 2. After each convolutional layer, I have used BatchNorm. After RELU activations, I have used Max Pooling with a kernel size of 2 and stride 2.

In [31]:
loss_fn = nn.CrossEntropyLoss().type(dtype)
optimizer = optim.RMSprop(fixed_model_3d.parameters(), lr=1e-4)

In [32]:
def train_3d(model, loss_fn, optimizer,dataloader,num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, sample in enumerate(dataloader):
            x_var = Variable(sample['clip'].type(dtype)).cuda()
            y_var = Variable(sample['Label'].type(dtype).long()).cuda()

            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % print_every == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data[0]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def check_accuracy_3d(model, loader):
    '''
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')  
    '''
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    for t, sample in enumerate(loader):
        x_var = Variable(sample['clip'].type(dtype)).cuda()
        y_var = sample['Label'].type(dtype).cuda()
        scores = model(x_var)
        y_var=y_var.cpu()
        _, preds = scores.data.cpu().max(1)
        #print(preds)
        #print(y_var)
        y_var = y_var.cpu()
        preds = preds.cpu()
        num_correct += (preds.numpy() == y_var.numpy()).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [33]:
torch.cuda.random.manual_seed(12345)
fixed_model_3d.apply(reset) 
fixed_model_3d.train() 
train_3d(fixed_model_3d, loss_fn, optimizer,clip_dataloader_train, num_epochs=10) 
fixed_model_3d.eval() 
check_accuracy_3d(fixed_model_3d, clip_dataloader_val)

Starting epoch 1 / 10
t = 100, loss = 1.2843
t = 200, loss = 1.0668
t = 300, loss = 1.0014
t = 400, loss = 0.5186
Starting epoch 2 / 10
t = 100, loss = 0.6908
t = 200, loss = 0.5488
t = 300, loss = 0.1692
t = 400, loss = 0.3790
Starting epoch 3 / 10
t = 100, loss = 0.7274
t = 200, loss = 0.2435
t = 300, loss = 0.1792
t = 400, loss = 0.2836
Starting epoch 4 / 10
t = 100, loss = 0.3808
t = 200, loss = 0.4830
t = 300, loss = 0.2828
t = 400, loss = 0.1042
Starting epoch 5 / 10
t = 100, loss = 0.1287
t = 200, loss = 0.0491
t = 300, loss = 0.0802
t = 400, loss = 0.1436
Starting epoch 6 / 10
t = 100, loss = 0.0758
t = 200, loss = 0.0392
t = 300, loss = 0.0659
t = 400, loss = 0.0713
Starting epoch 7 / 10
t = 100, loss = 0.0444
t = 200, loss = 0.0638
t = 300, loss = 0.0753
t = 400, loss = 0.0290
Starting epoch 8 / 10
t = 100, loss = 0.0982
t = 200, loss = 0.1066
t = 300, loss = 0.0125
t = 400, loss = 0.2513
Starting epoch 9 / 10
t = 100, loss = 0.0458
t = 200, loss = 0.0170
t = 300, loss = 0.16

In [34]:
def predict_on_test_3d(model, loader):
    '''
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')  
    '''
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    results=open('results_3d.csv','w')
    count=0
    results.write('Id'+','+'Class'+'\n')
    for t, sample in enumerate(loader):
        x_var = Variable(sample['clip'].type(dtype)).cuda()
        scores = model(x_var)
        _, preds = scores.data.max(1)
        for i in range(len(preds)):
            results.write(str(count)+','+str(preds[i])+'\n')
            count+=1
    results.close()
    return count
    
count=predict_on_test_3d(fixed_model_3d, clip_dataloader_test)
print(count)

3270
