In [2]:

# coding: utf-8

# In[42]:


import torch.nn as nn
from torch.autograd import Variable as V
import torch as th
from torchvision import models
import os
import torch.optim as optim
import random
import numpy as np
import cv2 as cv2

class AlexLSTM(nn.Module):
    def __init__(self, n_layers=2, h_size=420):
        super(AlexLSTM, self).__init__()
        self.h_size = h_size
        self.n_layers = n_layers

        alexnet = models.alexnet(pretrained=True)
        self.conv = nn.Sequential(*list(alexnet.children())[:-1])

        self.lstm = nn.LSTM(68096, h_size, dropout=0.2, num_layers=n_layers)
        self.fc = nn.Sequential(
            nn.Linear(h_size, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        # (batch_size, 3, time_stamp, 480, 640)
        batch_size, timesteps = x.size()[0], x.size()[2]
        state = self._init_state(b_size=batch_size)

        convs = []
        for t in range(timesteps):
            conv = self.conv(x[:, :, t, :, :])
#             print("conv shape : ", conv.size())
            conv = conv.view(batch_size, -1)
#             print("conv reshape :", conv.size())
            convs.append(conv)
        convs = th.stack(convs, 0)
        print("alex output shape : ",convs.size()) # ([20, 5, 68096]) (seq_len, batch, input_size)
        print("lstm input shape : ",convs.size())
        lstm, _ = self.lstm(convs, state) # lstm input (seq_len, batch, input_size)
        print("lstm output shape : ",lstm.size()) # torch.Size([20, 5, 420]) (seq_len, batch, hidden_size * num_directions)
        print("fc input shape : ",lstm.size())
        logit = self.fc(lstm) # seq_len, batch, input_size ([20, 5, 1])
        print("fc output shape : ",logit.size())
        
        logit = logit.transpose(1,0).squeeze(2)
        return logit

    def _init_state(self, b_size=1):
        weight = next(self.parameters()).data
        return (
            V(weight.new(self.n_layers, b_size, self.h_size).normal_(0.0, 0.01)),
            V(weight.new(self.n_layers, b_size, self.h_size).normal_(0.0, 0.01))
        )
    
batch_size = 5
time_stamp = 20
train_dataset = os.listdir("../img/")
total_img_num = len(train_dataset)
iteration_per_epoch = int(total_img_num / (batch_size*time_stamp))

def fetch_image_and_label(batch_size, time_stamp):
    numbers = []
    while(len(numbers) != batch_size):
        a = random.randint(0,total_img_num-time_stamp)
        if a not in numbers:
            numbers.append(a)
    label = []
    file_in = open('../data/train.txt', 'r')
    for line in file_in.readlines():
        label.append(float(line))
    
    x = np.zeros((batch_size, time_stamp, 480, 640, 3))
    y = np.zeros((batch_size, time_stamp))
    for i in range(batch_size):
        for j in range(time_stamp):
            img_name = numbers[i] + j
            image_path = '../img/frame' + str(img_name) + ".jpg"
            img = cv2.imread(image_path)
            x[i,j] = img
            y[i,j] = label[numbers[i] + j]
            
            print(img_name, y[i,j])
    x = x.transpose(0, 4, 1, 2, 3) # (batch_size, 3, time_stamp, 480, 640)
    return x, y

def train():
    net = AlexLSTM()
    criterion = nn.MSELoss(False)
    lr = 0.0001
    min_loss = 100
    for epoch in range(20):  # loop over the dataset multiple times
        running_loss = 0.0
        for i in range(iteration_per_epoch):
            x,y = fetch_image_and_label(batch_size, time_stamp)

            # wrap them in Variable
            x = V(th.from_numpy(x).float())
            y = V(th.from_numpy(y).float())

            optimizer = optim.Adam(net.parameters(), lr=lr)
            optimizer.zero_grad()# zero the parameter gradients
            # forward + backward + optimize
            predict = net(x)

            print("predict shape : ", predict.size())
            print("label shape : ", y.size())
            print("------ PREDICT start------")
            print(predict)
            print("------ PREDICT   end------")
            print("------ LABEL start------")
            print(y)
            print("------ LABEL   end------")
            loss = criterion(predict, y)
            loss.backward()
            optimizer.step()

            # print statistics
            print("loss shape : ",loss.data.size())
            running_loss += loss.data[0]
            if running_loss <= min_loss :
                min_loss = running_loss
                print("Saving model ...")
                th.save(net.state_dict(), '../weight/%d_%s.p' % (i, epoch))
            print('[epoch : %d, iteration : %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
        print("Saving model ...")
        th.save(net.state_dict(), '../weight/epoch_%s.p' % (epoch))

    print('Finished Training')


if __name__ == '__main__':
#     train()
    fetch_image_and_label(batch_size, time_stamp)

15459 2.232426
15460 2.128592
15461 2.037299
15462 2.062311
15463 1.986056
15464 1.879097
15465 1.91522
15466 1.817642
15467 1.729141
15468 1.753493
15469 1.668238
15470 1.638134
15471 1.634793
15472 1.533554
15473 1.51216
15474 1.500449
15475 1.452962
15476 1.448734
15477 1.421214
15478 1.329434
2968 22.17625
2969 22.133444
2970 22.149298
2971 22.100728
2972 22.060368
2973 21.988514
2974 21.975832
2975 21.958813
2976 21.906158
2977 21.90997
2978 21.863792
2979 21.854736
2980 21.83847
2981 21.78667
2982 21.804916
2983 21.792359
2984 21.774163
2985 21.761638
2986 21.775377
2987 21.753186
8837 10.205999
8838 10.182577
8839 10.158643
8840 10.182892
8841 10.127035
8842 10.115971
8843 10.113069
8844 10.082145
8845 10.039093
8846 10.041261
8847 10.026185
8848 9.975841
8849 9.951443
8850 9.945091
8851 9.940427
8852 9.886067
8853 9.888366
8854 9.881791
8855 9.856683
8856 9.843443
12782 9.374195
12783 9.299342
12784 9.301712
12785 9.17977
12786 9.229411
12787 9.136078
12788 9.112845
12789 9.134