In [1]:
from reader import readShortVideo
from reader import getVideoList
import matplotlib.pyplot as plt
from os import listdir
import os
import pandas as pd
import numpy as np
import pickle

import torchvision
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
%matplotlib inline
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


# read pre-defined feature and train RNN

In [2]:
with open("../train_features_d12.pkl", "rb") as f:
    train_features = pickle.load(f)
with open("../valid_features_d12.pkl", "rb") as f:
    valid_features = pickle.load(f)
    
with open("../train_y.pkl", "rb") as f:
    train_y = pickle.load(f)
with open("../valid_y.pkl", "rb") as f:
    valid_y = pickle.load(f)

In [19]:
class GRU(nn.Module):
    def __init__(self, input_size, hidden_size=512, n_layers=2, dropout=0.2):
        super(GRU, self).__init__()
        self.hidden_size =  hidden_size
        self.gru = nn.GRU(input_size, self.hidden_size, n_layers,
                          dropout=(0 if n_layers == 1 else dropout), bidirectional=False)
        self.bn_0 = nn.BatchNorm1d(self.hidden_size)
        self.fc_1 = nn.Linear(self.hidden_size, int(self.hidden_size/2))
        self.bn_1 = nn.BatchNorm1d(int(self.hidden_size/2))
        self.fc_2 = nn.Linear(int(self.hidden_size), 11)
        self.softmax = nn.Softmax(1)
        self.relu = nn.ReLU()
    def forward(self, padded_sequence, input_lengths, hidden=None):
        packed = torch.nn.utils.rnn.pack_padded_sequence(padded_sequence, input_lengths)
        outputs, hidden = self.gru(packed, hidden) # output: (seq_len, batch, hidden*n_dir)
#         outputs, _ = torch.nn.utils.rnn.pad_packed_sequence(outputs)
#         outputs = outputs[:, :, :self.hidden_size] + outputs[:, : ,self.hidden_size:] # Sum bidirectional outputs (1, batch, hidden)
        outputs = self.bn_0(hidden[-1])
#         print(hidden.size())
        outputs = self.softmax(self.fc_2(outputs))
        return outputs


In [20]:
def single_batch_padding(train_X_batch, train_y_batch, test = False):
    if test==True:
        padded_sequence = nn.utils.rnn.pad_sequence(train_X_batch)
        label = torch.LongTensor(train_y_batch)
        length = [len(train_X_batch[0])]
    else:
        length = [len(x) for x in train_X_batch]
        perm_index = np.argsort(length)[::-1]

        # sort by sequence length
        train_X_batch = [train_X_batch[i] for i in perm_index]
        length = [len(x) for x in train_X_batch]
        padded_sequence = nn.utils.rnn.pad_sequence(train_X_batch)
        label = torch.LongTensor(np.array(train_y_batch)[perm_index])
    return padded_sequence, label, length

In [21]:
input_valid_X, input_valid_y, valid_lengths = single_batch_padding([valid_features[0]], [valid_y[0]],
                                                                  test=True)

In [22]:
feature_size = 1024*7*7
model = rnn = GRU(feature_size,hidden_size=512).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
BATCH_SIZE = 64
loss_function = nn.CrossEntropyLoss()
max_accuracy = 0
model.train()
for epoch in range(15):
    print("Epoch:", epoch+1)
    CE_loss = 0.0
    total_length = len(train_features)
    # shuffle
    perm_index = np.random.permutation(len(train_features))
    train_X_sfl = [ train_features[i] for i in perm_index]
    train_y_sfl = np.array(train_y)[perm_index]
    # construct training batch
    for index in range(0,total_length ,BATCH_SIZE):
        if index+BATCH_SIZE > total_length:
            break
            
        # zero the parameter gradients
        optimizer.zero_grad()
        input_X = train_X_sfl[index:index+BATCH_SIZE]
        input_y = train_y_sfl[index:index+BATCH_SIZE]
        
        # pad the sequence
        input_X, input_y, length = single_batch_padding(input_X, input_y)
        
        # use GPU
        input_X = input_X.cuda()
        # forward + backward + optimize
        output = model(input_X, length)
        loss = loss_function(output, input_y.cuda())
        loss.backward()
        optimizer.step()
        CE_loss += loss.cpu().data.numpy()
    print("training loss",CE_loss)
    
    # validation
    same_difference = []
    with torch.no_grad():
        model.eval()
        for i in range(len(valid_y)):
            input_valid_X, input_valid_y, valid_lengths = single_batch_padding([valid_features[i]], 
                                                                               [valid_y[i]],
                                                                               test=True)
            output = model(input_valid_X.cuda(),valid_lengths)
            output_label = torch.argmax(output,1).cpu().data
            same_difference.append((output_label == input_valid_y).numpy())
        accuracy = np.mean(same_difference)
        print("validation accuracy: ",accuracy)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        torch.save(model.state_dict(), "../models/RNN_FC_model.pkt")
    model.train()

Epoch: 1
training loss 115.2590401172638
validation accuracy:  0.30754352030947774
Epoch: 2
training loss 106.55433082580566
validation accuracy:  0.38878143133462284
Epoch: 3
training loss 100.63670098781586
validation accuracy:  0.4758220502901354
Epoch: 4
training loss 94.97878885269165
validation accuracy:  0.4584139264990329
Epoch: 5
training loss 90.07201898097992
validation accuracy:  0.46421663442940037
Epoch: 6
training loss 86.38283801078796
validation accuracy:  0.4951644100580271
Epoch: 7
training loss 84.02891755104065
validation accuracy:  0.47195357833655704
Epoch: 8
training loss 82.30059111118317
validation accuracy:  0.4506769825918762
Epoch: 9
training loss 81.10160660743713
validation accuracy:  0.4584139264990329
Epoch: 10
training loss 80.1844789981842
validation accuracy:  0.4584139264990329
Epoch: 11
training loss 79.35345506668091
validation accuracy:  0.4796905222437137
Epoch: 12
training loss 78.8776183128357
validation accuracy:  0.4448742746615087
Epoch: 13

In [None]:
torch.cuda.empty_cache()

In [None]:
# 先加 hidden dimension
# 加 fc layer
# 加 dropout