In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import glob
import os
from skimage.transform import resize
import cv2

In [2]:
ucf_data_path = "UCF-101"
train_video_path = ucf_data_path+"/Train"
test_video_path = ucf_data_path+"/Test"

train_archery = train_video_path + "/Archery"
train_makeup = train_video_path + "/Makeup"

test_archery = test_video_path + "/Archery"
test_makeup = test_video_path + "/Makeup"



In [None]:
def im_toarray(im):
    array = plt.imread(im)
    array = resize(array,(224,224))
    return array
    

def create_dataset():
    Y_archery = []
    archery_whole_frames = []
    for folders in glob.glob(train_archery+"/*"):
        one_video_frames = []
        Y_archery.append(1)
        for jpgs in glob.glob(folders+"/*.jpg"):
            arch_im = im_toarray(jpgs)
            one_video_frames.append(arch_im)
        
        archery_whole_frames.append(one_video_frames)
    
    Y_makeup = []
    makeup_whole_frames = []
    for folders in glob.glob(train_makeup+"/*"):
        one_video_frames = []
        Y_makeup.append(0)
        for jpgs in glob.glob(folders+"/*.jpg"):
            makeup_im = im_toarray(jpgs)
            one_video_frames.append(makeup_im)
        
        makeup_whole_frames.append(one_video_frames)
        
    X_train = np.concatenate((np.array(archery_whole_frames),np.array(makeup_whole_frames)),axis = 0)
    Y_train = np.concatenate((np.array(Y_archery),np.array(Y_makeup)))
    
    m = X_train.shape[0]
    nf = X_train.shape[1]
    nw = X_train.shape[2]
    nh = X_train.shape[3]
    nc = X_train.shape[4]
    
    return X_train.reshape(m,nf,nc,nw,nh),Y_train.reshape(16,1)
    
X_train,Y_train = create_dataset()
X_train.shape


In [4]:
device = torch.device("cuda:0")
torch.set_default_tensor_type('torch.cuda.FloatTensor')
model_cnn = models.vgg16(pretrained=True)
for param in model_cnn.parameters():
    param.requires_grad = False


class CNN_LSTM(nn.Module):
    def __init__(self, original_model):
        super(CNN_LSTM, self).__init__()
        self.feature_extractor = original_model.features
        self.rnn =  nn.GRUCell(512*7*7,1)
        self.hidden = torch.randn(1,1)
        self.linear = nn.Linear(9,1,bias=True)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        x = self.feature_extractor(x)
        
        
        o = torch.Tensor()
        for i in range(9):
            inp = x[i,:,:,:].view(-1,512*7*7)
            self.hidden = self.rnn(inp,self.hidden)
            o = torch.cat((o,self.hidden))
            
        x = self.linear(o.view(1,9))
        x = self.sigmoid(x)
        return x
        
cnn_lstm = CNN_LSTM(model_cnn).to(device)


In [5]:
for param in cnn_lstm.rnn.parameters():
    param.requires_grad = True
for param in cnn_lstm.feature_extractor[23:30].parameters():
    param.requires_grad = True
for param in cnn_lstm.linear.parameters():
    param.requires_grad = True

In [None]:
use_gpu = torch.cuda.is_available()
if use_gpu:
    print("using gpu")
    out = cnn_lstm(X[0,:,:,:,:])
else:
    out = cnn_lstm(X[0,:,:,:,:])
out


In [6]:
def compute_loss(out,Y):
    loss = torch.mean((out-Y)**2)
    return loss
    

In [8]:
def train(model,X_train,Y_train,nb_epoch,lr):
    
    optimizer = optim.Adam(filter(lambda p: p.requires_grad,model.parameters()),lr=lr)
    
    X_train, Y_train = torch.from_numpy(X_train).to(device).float(), torch.from_numpy(Y_train).to(device).float()
    
    for epoch in range(1,nb_epoch+1):
        epoch_loss = 0.0
        for video_no in range(16):
            X_batch, Y_batch = X_train[video_no,:,:,:,:], Y_train[video_no,0]
            
            optimizer.zero_grad()
            out = model(X_batch)
            loss = compute_loss(out,Y_batch)
            loss.backward(retain_graph=True)
            optimizer.step()
            
            epoch_loss += loss.item()
            
        print("epoch: "+str(epoch))
        print("loss: "+str(epoch_loss/16.0))
        print(out)
        print(Y_batch)
            
        
        

In [10]:
train(cnn_lstm,X_train,Y_train,3,5.e-2)

epoch: 1
loss: 0.554722058965126
tensor([[ 0.9419]])
tensor(0.)
epoch: 2
loss: 0.2724910478864331
tensor([[ 0.3023]])
tensor(0.)
epoch: 3
loss: 0.33887993649113923
tensor([[ 0.1575]])
tensor(0.)


In [None]:
Y_train.shape