# Training a ConvNet PyTorch

 


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader,sampler,Dataset
import torchvision.datasets as dset
import torchvision.transforms as T
import timeit
from PIL import Image
import os
import numpy as np
import scipy.io
import torchvision.models as models


## Load Datasets



In [1]:
label_mat=scipy.io.loadmat('./data/data.mat')
label_train=label_mat['trLb']
print(len(label_train))
label_val=label_mat['valLb']
print(len(label_val))

NameError: name 'scipy' is not defined

### Dataset class



In [3]:

class ActionDataset(Dataset):
    """Action dataset."""

    def __init__(self,  root_dir,labels=[], transform=T.Resize((224,224))):
        """
        Args:
            root_dir (string): Directory with all the images.
            labels(list): labels if images.
            transform (callable, optional): Optional transform to be applied on a sample.
        """
        self.root_dir = root_dir
        self.transform = transform
        #self.transform_1=T.Resize((224,224))
        self.length=len(os.listdir(self.root_dir))
        self.labels=labels
    def __len__(self):
        return self.length*3

    def __getitem__(self, idx):
        
        folder=int(idx/3)+1
        imidx=idx%3+1
        folder=format(folder,'05d')
        imgname=str(imidx)+'.jpg'
        img_path = os.path.join(self.root_dir,
                                folder,imgname)
        image = Image.open(img_path)
        if len(self.labels)!=0:
            Label=self.labels[int(idx/3)][0]-1
        if self.transform:
            #image = self.transform_1(image)
            image = self.transform(image)
            
        if len(self.labels)!=0:
            sample={'image':image,'img_path':img_path,'Label':Label}
        else:
            sample={'image':image,'img_path':img_path}
        return sample
  

Iterating over the dataset by a for loop.

In [4]:
image_dataset=ActionDataset(root_dir='./data/trainClips/',\
                            labels=label_train,transform=T.ToTensor())

#iterating though the dataset
for i in range(10):
    sample=image_dataset[i]
    print(sample['image'].shape)
    print(sample['Label'])
    print(sample['img_path'])
     
   

torch.Size([3, 64, 64])
0.0
./data/trainClips/00001/1.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00001/2.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00001/3.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00002/1.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00002/2.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00002/3.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00003/1.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00003/2.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00003/3.jpg
torch.Size([3, 64, 64])
0.0
./data/trainClips/00004/1.jpg


We can iterate over the created dataset with a 'for' loop as before. However, we are losing a lot of features by using a simple for loop to iterate over the data. In particular, we are missing out on:

* Batching the data
* Shuffling the data
* Load the data in parallel using multiprocessing workers.

torch.utils.data.DataLoader is an iterator which provides all these features. 

In [15]:
image_dataloader = DataLoader(image_dataset, batch_size=4,
                        shuffle=True, num_workers=4)


for i,sample in enumerate(image_dataloader):
    sample['image']=sample['image']
    print(i,sample['image'].shape,sample['img_path'],sample['Label'])
    if i>20: 
        break

0 torch.Size([4, 3, 64, 64]) ['./data/trainClips/03295/3.jpg', './data/trainClips/02391/3.jpg', './data/trainClips/03242/3.jpg', './data/trainClips/04701/3.jpg'] tensor([3., 2., 3., 5.], dtype=torch.float64)
1 torch.Size([4, 3, 64, 64]) ['./data/trainClips/01994/1.jpg', './data/trainClips/07124/1.jpg', './data/trainClips/03797/3.jpg', './data/trainClips/02245/2.jpg'] tensor([2., 8., 4., 2.], dtype=torch.float64)
2 torch.Size([4, 3, 64, 64]) ['./data/trainClips/07681/2.jpg', './data/trainClips/00900/3.jpg', './data/trainClips/05928/3.jpg', './data/trainClips/02609/3.jpg'] tensor([9., 0., 7., 2.], dtype=torch.float64)
3 torch.Size([4, 3, 64, 64]) ['./data/trainClips/07252/2.jpg', './data/trainClips/05717/3.jpg', './data/trainClips/06010/2.jpg', './data/trainClips/00032/1.jpg'] tensor([9., 7., 7., 0.], dtype=torch.float64)
4 torch.Size([4, 3, 64, 64]) ['./data/trainClips/07203/2.jpg', './data/trainClips/02396/2.jpg', './data/trainClips/02335/1.jpg', './data/trainClips/06890/2.jpg'] tensor

Dataloaders for the training, validationg and testing set. 

In [16]:
image_dataset_train=ActionDataset(root_dir='./data/trainClips/',labels=label_train,transform=T.ToTensor())

image_dataloader_train = DataLoader(image_dataset_train, batch_size=32,
                        shuffle=True, num_workers=4)
image_dataset_val=ActionDataset(root_dir='./data/valClips/',labels=label_val,transform=T.ToTensor())

image_dataloader_val = DataLoader(image_dataset_val, batch_size=32,
                        shuffle=False, num_workers=4)
image_dataset_test=ActionDataset(root_dir='./data/testClips/',labels=[],transform=T.ToTensor())

image_dataloader_test = DataLoader(image_dataset_test, batch_size=32,
                        shuffle=False, num_workers=4)

In [17]:
dtype = torch.FloatTensor # the CPU datatype
# Constant to control how frequently we print train loss
print_every = 100
# This is a little utility that we'll use to reset the model
# if we want to re-initialize all our parameters
def reset(m):
    if hasattr(m, 'reset_parameters'):
        m.reset_parameters()

In [18]:
class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size() # read in N, C, H, W
        return x.view(N, -1)  # "flatten" the C * H * W values into a single vector per image

In [19]:
# Here's where we define the architecture of the model... 
simple_model = nn.Sequential(
                nn.Conv2d(3, 32, kernel_size=7, stride=2),
                nn.ReLU(inplace=True),
                Flatten(), # see above for explanation
                nn.Linear(26912, 10), # affine layer
              )

# Set the type of all data in this model to be FloatTensor 
simple_model.type(dtype)

loss_fn = nn.CrossEntropyLoss().type(dtype)
optimizer = optim.Adam(simple_model.parameters(), lr=1e-2) # lr sets the learning rate of the optimizer

## Training a specific model



* 7x7 Convolutional Layer with 8 filters and stride of 1
* ReLU Activation Layer
* 2x2 Max Pooling layer with a stride of 2
* 7x7 Convolutional Layer with 16 filters and stride of 1
* ReLU Activation Layer
* 2x2 Max Pooling layer with a stride of 2
* Flatten the feature map
* ReLU Activation Layer
* Affine layer to map input units to 10 outputs, you need to figure out the input size here.


In [2]:
fixed_model_base = nn.Sequential( 
   
   
     nn.Conv2d(3, 8, kernel_size=7, stride=1),
     nn.ReLU(inplace=True),
     nn.MaxPool2d(2, stride = 2),
     nn.Conv2d(8, 16, kernel_size=7, stride=1),
     nn.ReLU(inplace=True),
     nn.MaxPool2d(2, stride = 2),
     Flatten(),
     nn.ReLU(inplace=True),
     nn.Linear(1936, 10)
    
    
    
            )
fixed_model = fixed_model_base.type(dtype)



NameError: name 'nn' is not defined

To make sure you're doing the right thing, use the following tool to check the dimensionality of your output (it should be 32 x 10, since our batches have size 32 and the output of the final affine layer should be 10, corresponding to our 10 classes):

In [22]:
## Now we're going to feed a random batch into the model you defined and make sure the output is the right size
x = torch.randn(32, 3, 64, 64).type(dtype)
x_var = Variable(x.type(dtype)) # Construct a PyTorch Variable out of your input data
ans = fixed_model(x_var)        # Feed it through the model! 

# Check to make sure what comes out of your model
# is the right dimensionality... this should be True
# if you've done everything correctly
print(np.array(ans.size()))
np.array_equal(np.array(ans.size()), np.array([32, 10]))   


[32 10]


True

### Train the model.



In [23]:

optimizer = torch.optim.RMSprop(fixed_model_base.parameters(), lr = 1e-4)
loss_fn = nn.CrossEntropyLoss()



In [24]:
# This sets the model in "training" mode. 
# This is relevant for some layers that may have different behavior
# in training mode vs testing mode, such as Dropout and BatchNorm. 
fixed_model.train()

# Load one batch at a time.
for t, sample in enumerate(image_dataloader_train):
    x_var = Variable(sample['image'])
    #print(type(x_var.data))
    #print(x_var.shape)
    y_var = Variable(sample['Label']).long()

    # This is the forward pass: predict the scores for each class, for each x in the batch.
    scores = fixed_model(x_var)
    
    # Use the correct y values and the predicted y values to compute the loss.
    loss = loss_fn(scores, y_var)
    
    if (t + 1) % print_every == 0:
        print('t = %d, loss = %.4f' % (t + 1, loss.data[0]))

    # Zero out all of the gradients for the variables which the optimizer will update.
    optimizer.zero_grad()
    
    # This is the backwards pass: compute the gradient of the loss with respect to each 
    # parameter of the model.
    loss.backward()
    
    # Actually update the parameters of the model using the gradients computed by the backwards pass.
    optimizer.step()
   



t = 100, loss = 1.8475
t = 200, loss = 1.6279
t = 300, loss = 1.5965
t = 400, loss = 1.3759
t = 500, loss = 1.5776
t = 600, loss = 1.4893
t = 700, loss = 1.4259


Now you've seen how the training process works in PyTorch. To save you writing boilerplate code, we're providing the following helper functions to help you train for multiple epochs and check the accuracy of your model:

In [25]:
def train(model, loss_fn, optimizer, dataloader, num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, sample in enumerate(dataloader):
            x_var = Variable(sample['image'])
            y_var = Variable(sample['Label'].long())

            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % print_every == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data[0]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def check_accuracy(model, loader):
    '''
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')  
    '''
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    for t, sample in enumerate(loader):
        x_var = Variable(sample['image'])
        y_var = sample['Label']
        #y_var=y_var.cpu()
        scores = model(x_var)
        _, preds = scores.data.max(1)#scores.data.cpu().max(1)
        #print(preds)
        #print(y_var)
        num_correct += (preds.numpy() == y_var.numpy()).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))
    
    



### Check the accuracy of the model.



In [26]:
torch.random.manual_seed(12345)
fixed_model.cpu()
fixed_model.apply(reset) 
fixed_model.train() 
train(fixed_model, loss_fn, optimizer,image_dataloader_train, num_epochs=2) 
check_accuracy(fixed_model, image_dataloader_train)# check accuracy on the training set


Starting epoch 1 / 2


  del sys.path[0]


t = 100, loss = 2.2959
t = 200, loss = 2.2642
t = 300, loss = 2.1472
t = 400, loss = 1.8851
t = 500, loss = 1.7492
t = 600, loss = 1.5473
t = 700, loss = 1.5067
Starting epoch 2 / 2
t = 100, loss = 1.7724
t = 200, loss = 1.4399
t = 300, loss = 1.0912
t = 400, loss = 1.0742
t = 500, loss = 1.2031
t = 600, loss = 1.0958
t = 700, loss = 1.0333
Got 14191 / 23310 correct (60.88)


In [16]:
check_accuracy(fixed_model, image_dataloader_val)#check accuracy on the validation set

Process Process-11:
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

KeyboardInterrupt
Traceback (most recent call last):
  File "/home/parth/anaconda3/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/parth/anaconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/parth/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 106, in _worker_loop
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "/home/parth/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 106, in <listcomp>
    samples = collate_fn([dataset[i] for i in batch_indices])
  File "<ipython-input-3-c1173a00001a>", line 27, in __getitem__
    image = Image.open(img_path)
  File "/home/parth/anaconda3/lib/python3.6/site-packages/PIL/Image.py", line 2552, in open
    prefix = fp.read(16)
Pro

Traceback (most recent call last):
  File "/home/parth/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-16-428707e4970d>", line 1, in <module>
    check_accuracy(fixed_model, image_dataloader_val)#check accuracy on the validation set
  File "<ipython-input-15-9cf324fe90e7>", line 29, in check_accuracy
    for t, sample in enumerate(loader):
  File "/home/parth/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 330, in __next__
    idx, batch = self._get_batch()
  File "/home/parth/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 309, in _get_batch
    return self.data_queue.get()
  File "/home/parth/anaconda3/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/home/parth/anaconda3/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = se

KeyboardInterrupt: 

##### Train a better  model for action recognition!



In [29]:
new_model_base = nn.Sequential( 
                nn.Conv2d(3, 64, kernel_size=3, stride=1),
                nn.BatchNorm2d(64),
                nn.Conv2d(64, 64, kernel_size=3, stride=1),
                nn.BatchNorm2d(64),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2, stride = 2),
    
                nn.Conv2d(64, 128, kernel_size=3, stride=1),
                nn.BatchNorm2d(128),
                nn.Conv2d(128, 128, kernel_size=3, stride=1),
                nn.BatchNorm2d(128),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2, stride = 2),
    
                nn.Conv2d(128, 256, kernel_size=3, stride=1),
                nn.BatchNorm2d(256),
                nn.Conv2d(256, 256, kernel_size=3, stride=1),
                nn.BatchNorm2d(256),
                nn.Conv2d(256, 256, kernel_size=1, stride=1),
                nn.BatchNorm2d(256),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2, stride = 2),
                
                nn.Conv2d(256, 512, kernel_size=1, stride=1),
                nn.BatchNorm2d(512),
                nn.Conv2d(512, 512, kernel_size=1, stride=1),
                nn.BatchNorm2d(512),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2, stride = 2),
               
                Flatten(),
                nn.Linear(2048, 1024),
                nn.Linear(1024, 10),
                nn.LogSoftmax()
            )

new_model = new_model_base.type(dtype)

optimizer = torch.optim.SGD(new_model.parameters(), lr = 0.0001)
loss_fn = nn.CrossEntropyLoss()
train(new_model, loss_fn, optimizer,image_dataloader_train, num_epochs=10) 
check_accuracy(new_model, image_dataloader_val)

Starting epoch 1 / 10


  input = module(input)
Process Process-36:
Process Process-33:
Process Process-34:
Process Process-35:

KeyboardInterrupt



### Describe what you did 

In the cell below you should write an explanation of what you did, any additional features that you implemented, and any visualizations or graphs that you make in the process of training and evaluating your network.

### Testing the model 

In [None]:
def predict_on_test(model, loader):
    '''
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')  
    '''
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    results=open('results.csv','w')
    count=0
    results.write('Id'+','+'Class'+'\n')
    for t, sample in enumerate(loader):
        x_var = Variable(sample['image'])
        scores = model(x_var)
        _, preds = scores.data.max(1)
        for i in range(len(preds)):
            results.write(str(count)+','+str(preds[i])+'\n')
            count+=1
    results.close()
    return count
    
count=predict_on_test(fixed_model, image_dataloader_test)
print(count)

GPU 

In [None]:
# Verify that CUDA is properly configured and you have a GPU available

torch.cuda.is_available()

In [None]:
import copy
gpu_dtype = torch.cuda.FloatTensor

fixed_model_gpu = copy.deepcopy(fixed_model_base)#.type(gpu_dtype)
fixed_model_gpu.cuda()
x_gpu = torch.randn(4, 3, 64, 64).cuda()#.type(gpu_dtype)
x_var_gpu = Variable(x_gpu)#type(gpu_dtype)) # Construct a PyTorch Variable out of your input data
ans = fixed_model_gpu(x_var_gpu)        # Feed it through the model! 

# Check to make sure what comes out of your model
# is the right dimensionality... this should be True
# if you've done everything correctly
np.array_equal(np.array(ans.size()), np.array([4, 10]))


Run the following cell to evaluate the performance of the forward pass running on the CPU:

In [None]:
%%timeit 
ans = fixed_model(x_var)

... and now the GPU:

In [None]:
%%timeit 
torch.cuda.synchronize() # Make sure there are no pending GPU computations
ans = fixed_model_gpu(x_var_gpu)        # Feed it through the model! 
torch.cuda.synchronize() # Make sure there are no pending GPU computations

In [None]:
loss_fn = nn.CrossEntropyLoss().cuda()
optimizer = optim.RMSprop(fixed_model_gpu.parameters(), lr=1e-4)

In [None]:
def train(model, loss_fn, optimizer, dataloader, num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, sample in enumerate(dataloader):
            x_var = Variable(sample['image'].cuda())
            y_var = Variable(sample['Label'].cuda().long())

            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % print_every == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data[0]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def check_accuracy(model, loader):
    '''
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')  
    '''
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    for t, sample in enumerate(loader):
        x_var = Variable(sample['image'].cuda())
        y_var = sample['Label'].cuda()
        y_var=y_var.cpu()
        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        #print(preds)
        #print(y_var)
        num_correct += (preds.numpy() == y_var.numpy()).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

Run on GPU!

In [None]:
torch.cuda.random.manual_seed(12345)

fixed_model_gpu.apply(reset) 
fixed_model_gpu.train() 
train(fixed_model_gpu, loss_fn, optimizer,image_dataloader_train, num_epochs=1) 
check_accuracy(fixed_model_gpu, image_dataloader_train)# check accuracy on the training set


### 3D Convolution on video clips 

In [4]:
class ActionClipDataset(Dataset):
    """Action Landmarks dataset."""

    def __init__(self,  root_dir,labels=[], transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        
        self.root_dir = root_dir
        self.transform = transform
        self.length=len(os.listdir(self.root_dir))
        self.labels=labels

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        
        folder=idx+1
        folder=format(folder,'05d')
        clip=[]
        if len(self.labels)!=0:
            Label=self.labels[idx][0]-1
        for i in range(3):
            imidx=i+1
            imgname=str(imidx)+'.jpg'
            img_path = os.path.join(self.root_dir,
                                    folder,imgname)
            image = Image.open(img_path)
            image=np.array(image)
            clip.append(image)
        if self.transform:
            clip=np.asarray(clip)
            clip=np.transpose(clip, (0,3,1,2))
            clip = torch.from_numpy(np.asarray(clip))
        if len(self.labels)!=0:
            sample={'clip':clip,'Label':Label,'folder':folder}
        else:
            sample={'clip':clip,'folder':folder}
        return sample

clip_dataset=ActionClipDataset(root_dir='./data/trainClips/',\
                               labels=label_train,transform=T.ToTensor())#/home/tqvinh/Study/CSE512/cse512-s18/hw2data/trainClips/
for i in range(10):
    sample=clip_dataset[i]
    print(sample['clip'].shape)
    print(sample['Label'])
    print(sample['folder'])

torch.Size([3, 3, 64, 64])
0.0
00001
torch.Size([3, 3, 64, 64])
0.0
00002
torch.Size([3, 3, 64, 64])
0.0
00003
torch.Size([3, 3, 64, 64])
0.0
00004
torch.Size([3, 3, 64, 64])
0.0
00005
torch.Size([3, 3, 64, 64])
0.0
00006
torch.Size([3, 3, 64, 64])
0.0
00007
torch.Size([3, 3, 64, 64])
0.0
00008
torch.Size([3, 3, 64, 64])
0.0
00009
torch.Size([3, 3, 64, 64])
0.0
00010


In [5]:
clip_dataloader = DataLoader(clip_dataset, batch_size=4,
                        shuffle=True, num_workers=4)


for i,sample in enumerate(clip_dataloader):
    print(i,sample['clip'].shape,sample['folder'],sample['Label'])
    if i>20: 
        break

0 torch.Size([4, 3, 3, 64, 64]) ['02342', '05494', '06004', '04818'] tensor([2., 6., 7., 5.], dtype=torch.float64)
1 torch.Size([4, 3, 3, 64, 64]) ['00930', '02848', '02892', '00980'] tensor([1., 3., 3., 1.], dtype=torch.float64)
2 torch.Size([4, 3, 3, 64, 64]) ['01885', '02775', '02211', '04220'] tensor([2., 3., 2., 5.], dtype=torch.float64)
3 torch.Size([4, 3, 3, 64, 64]) ['04939', '06908', '07512', '05743'] tensor([5., 8., 9., 7.], dtype=torch.float64)
4 torch.Size([4, 3, 3, 64, 64]) ['07242', '06811', '07282', '03199'] tensor([9., 8., 9., 3.], dtype=torch.float64)
5 torch.Size([4, 3, 3, 64, 64]) ['06964', '01432', '00998', '03533'] tensor([8., 1., 1., 4.], dtype=torch.float64)
6 torch.Size([4, 3, 3, 64, 64]) ['01655', '00484', '05357', '03996'] tensor([1., 0., 6., 4.], dtype=torch.float64)
7 torch.Size([4, 3, 3, 64, 64]) ['07747', '04355', '07478', '02768'] tensor([9., 5., 9., 3.], dtype=torch.float64)
8 torch.Size([4, 3, 3, 64, 64]) ['03529', '01874', '00866', '02525'] tensor([4.,

In [6]:
clip_dataset_train=ActionClipDataset(root_dir='./data/trainClips/',labels=label_train,transform=T.ToTensor())

clip_dataloader_train = DataLoader(clip_dataset_train, batch_size=16,
                        shuffle=True, num_workers=4)
clip_dataset_val=ActionClipDataset(root_dir='./data/valClips/',labels=label_val,transform=T.ToTensor())

clip_dataloader_val = DataLoader(clip_dataset_val, batch_size=16,
                        shuffle=True, num_workers=4)
clip_dataset_test=ActionClipDataset(root_dir='./data/testClips/',labels=[],transform=T.ToTensor())

clip_dataloader_test = DataLoader(clip_dataset_test, batch_size=16,
                        shuffle=False, num_workers=4)

Write the Flatten for 3d covolution feature maps.

In [30]:
class Flatten3d(nn.Module):
    def forward(self, x):
        
        N, C, D, H, W = x.size() 
        return x.view(N, -1) 

Design a network using 3D convolution on videos for video classification.

In [2]:
fixed_model_3d = nn.Sequential( # You fill this in!
#     ###############7th TODO (20 points)#########################
    nn.Conv3d(3, 64, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm3d(64),
    nn.ReLU(inplace=True),
    nn.MaxPool3d(kernel_size=(1,2,2)),
    
    nn.Conv3d(64, 128, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm3d(128),
    nn.ReLU(inplace=True),
    nn.MaxPool3d(kernel_size=(1,2,2)),
    
    nn.Conv3d(128, 256, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm3d(256),
    nn.ReLU(inplace=True),
    nn.Dropout3d(0.2),
    nn.MaxPool3d(kernel_size=(1,2,2)),
    
    nn.Conv3d(256, 512, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm3d(512),
    nn.ReLU(inplace=True),
    nn.Dropout3d(0.2),
    nn.MaxPool3d(kernel_size=(1,2,2),stride=(1,2,2)),
    
    nn.Conv3d(512, 1024, kernel_size=3, stride=1, padding=1),
    nn.BatchNorm3d(1024),
    nn.ReLU(inplace=True),
    nn.Dropout3d(0.2),
    nn.MaxPool3d(kernel_size=(2,2,2),stride=(1,2,2)),
    
    
    
    Flatten3d(),
    nn.Linear(8192, 4096),
    nn.Linear(4096,10)

)

fixed_model_3d = fixed_model_3d.type(dtype)
x = torch.randn(32,3, 3, 64, 64).type(dtype)
x_var = Variable(x).type(dtype) # Construct a PyTorch Variable out of your input data
ans = fixed_model_3d(x_var) 
np.array_equal(np.array(ans.size()), np.array([32, 10]))


NameError: name 'nn' is not defined

In [18]:
loss_fn = nn.CrossEntropyLoss().type(dtype)
optimizer = optim.SGD(fixed_model_3d.parameters(), lr=1e-4)


In [19]:
def train_3d(model, loss_fn, optimizer,dataloader,num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, sample in enumerate(dataloader):
            x_var = Variable(sample['clip'].type(dtype))
            y_var = Variable(sample['Label'].type(dtype).long())

            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % print_every == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data[0]))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def check_accuracy_3d(model, loader):
    '''
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')  
    '''
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    for t, sample in enumerate(loader):
        x_var = Variable(sample['clip'].type(dtype))
        y_var = sample['Label'].type(dtype)
        y_var=y_var.cpu()
        scores = model(x_var)
        _, preds = scores.data.cpu().max(1)
        #print(preds)
        #print(y_var)
        num_correct += (preds.numpy() == y_var.numpy()).sum()
        num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [20]:
torch.cuda.random.manual_seed(12345)
fixed_model_3d.apply(reset) 
fixed_model_3d.train() 
train_3d(fixed_model_3d, loss_fn, optimizer,clip_dataloader_train, num_epochs=1) 
fixed_model_3d.eval() 
check_accuracy_3d(fixed_model_3d, clip_dataloader_val)

Starting epoch 1 / 1


  del sys.path[0]


t = 100, loss = 1.0273
t = 200, loss = 1.1261
t = 300, loss = 0.5587
t = 400, loss = 0.8737
Got 1308 / 2230 correct (58.65)


In [None]:
def predict_on_test_3d(model, loader):
    '''
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')  
    '''
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    results=open('results_3d.csv','w')
    count=0
    results.write('Id'+','+'Class'+'\n')
    for t, sample in enumerate(loader):
        x_var = Variable(sample['clip'].type(dtype))
        scores = model(x_var)
        _, preds = scores.data.max(1)
        for i in range(len(preds)):
            results.write(str(count)+','+str(preds[i])+'\n')
            count+=1
    results.close()
    return count
    
count=predict_on_test_3d(fixed_model_3d, clip_dataloader_test)
print(count)

In [1]:
for t, sample in enumerate(image_dataloader_train):
    x_var = Variable(sample['image'])
    print(type(x_var.data))
    print(x_var.shape)
    y_var = Variable(sample['Label']).long()
    print(y_var.shape)
    p=T.Resize((224,224))
    for i in range(0,3):
        x_var[0,:,:]

NameError: name 'image_dataloader_train' is not defined