In [2]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
from torch.utils.data.dataset import Dataset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from PIL import Image
import skvideo.io

plt.ion()   # interactive mode

use_gpu = True and torch.cuda.is_available()

In [3]:
%matplotlib inline

FOLDER_DATASET = "data/"

transformation = transforms.Compose([
transforms.RandomCrop(224),
transforms.ToTensor(),
])

class FrameData(Dataset):

    def __init__(self, FOLDER_DATASET, file_name, timestep = 12,transform=None):
        self.__xs = []
        self.__ys = []
        self.transform = transform
        self.timestep = timestep
        # counter = 0
        with open(FOLDER_DATASET + file_name) as f:
            for line in f:
                # counter += 1
#                 print(line.split(" ")[0].split("/")[1])
#                 print(np.float32(line.split()[1]) - 1)
#                 break
            
                self.__xs.append(FOLDER_DATASET + "UCF101/" + line.split(" ")[0])
                self.__ys.append(np.float32(line.split(" ")[1]))
#             print(counter)

    # Override to give PyTorch access to any image on the dataset
    def __getitem__(self, index):
        vid = skvideo.io.vread(self.__xs[index])
        frames, width, height, channels = vid.shape
        time_steps = int(frames/self.timestep) #sample rate is number of frames to extract per second 
        vid = torch.from_numpy(vid[::self.timestep].transpose(0,3,1,2)).float()
        label = torch.from_numpy(np.asarray(self.__ys[index]).reshape([1])).long()
        return vid, label

    # Override to give PyTorch size of dataset
    def __len__(self):
        return len(self.__xs)


In [4]:
data_set = {x : FrameData(FOLDER_DATASET, x + "1.txt") for x in ['train', 'validation']}                  
# data_loader = {'train' : DataLoader(data_set['train'], batch_size=1, shuffle=True, num_workers=1),
#                'validation' : DataLoader(data_set[x], batch_size=1, shuffle=False, num_workers=1),
#               }
# for x in ['train','validation']:
#     for data in data_loader[x]:
#         frames, label = data
#         print(type(frames))
#         break

In [None]:
def training_bceloss(data, optimizer, model, criterion,batch_size=1):
    model.train()
    input1, input2, labels = data
    if batch_size == 1:
        input1._squeeze()
        input2._squeeze()
        
    if torch.cuda.is_available():
        input1, input2, labels = Variable(input1.cuda()), Variable(input2.cuda()), Variable(
            labels.view(-1).cuda())
    else:
        input1, input2, labels = Variable(input1), Variable(input2), Variable(labels.view(-1))

    optimizer.zero_grad()
    outputs = model(input1, input2)
    loss = criterion(outputs.view(-1), labels)
    loss.backward()
    optimizer.step()
    return loss.data[0]


In [10]:
class CNNGRU(nn.Module):
    def __init__(self):
        super(CNNGRU, self).__init__()
        self.input_dim = 1000
        self.hidden_layers = 200
        self.rnn_layers = 2
        self.classes = 101
        self.sample_rate = 12
        
        self.conv = torchvision.models.resnet18(pretrained=True)
        for param in self.conv.parameters():
            param.requires_grad = False

        self.lstm = nn.LSTM(self.input_dim, self.hidden_layers, self.rnn_layers)
        self.gru = nn.GRU(self.input_dim, self.hidden_layers, self.rnn_layers)
        self.linear = nn.Linear(
            in_features=self.hidden_layers, out_features=self.classes)

    def forward(self, video):
        conv_output = self.conv(video) #convolve allframes 
        conv_output = torch.unsqueeze(conv_output,1)        

        out, _ = self.gru(conv_output) # pass convolution to gru
        lstm_output = out[-1, :, :]
        output = self.linear(lstm_output) #linear layer 
        return output

In [11]:
def train_model(model, criterion, optimizer, scheduler, data_set, use_gpu, num_epochs=25):
    since = time.time()

    data_loader = {'train' : DataLoader(data_set['train'], batch_size=1, shuffle=True, num_workers=1),
               'validation' : DataLoader(data_set['validation'], batch_size=1, shuffle=False, num_workers=1),
              }
    dataset_sizes = {x: len(data_set[x]) for x in ['train', 'validation']}

    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'validation']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for data in data_loader[phase]:
                # get the inputs
                inputs, labels = data
                inputs = inputs.squeeze()
                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
#                 print(outputs.view(-1), labels.view(1))
                loss = criterion(outputs, labels.view(1))

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                
                # statistics
                running_loss += loss.data[0]
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [12]:
model_ft = CNNGRU()
if use_gpu:
    model_ft = model_ft.cuda()
# print(model_ft)
criterion = nn.CrossEntropyLoss()

#Remove all parameters not to be optimized
ignored_params = list(map(id, model_ft.conv.parameters()))
base_params = filter(lambda p: id(p) not in ignored_params,
                     model_ft.parameters())
                     
# Observe that all parameters are being optimized
optimizer_ft = optim.SGD([{'params': base_params}], lr=0.001, momentum=0.9)

# Observe that all parameters are being optimized
# optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)


In [None]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, data_set, use_gpu, num_epochs=1)

Epoch 1/1
----------


In [None]:
ignored_params = list(map(id, model_ft.conv.parameters()))
base_params = filter(lambda p: id(p) not in ignored_params,
                     model_ft.parameters())

In [None]:
len(base_params)

In [None]:
optimizer = torch.optim.SGD([
            {'params': base_params},
            {'params': model.fc.parameters(), 'lr': opt.lr}
        ], lr=opt.lr*0.1, momentum=0.9)
