In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import os
import os.path as path
import copy

from gait_analysis import AnnotationsCasia as Annotations
from gait_analysis import CasiaDataset
from gait_analysis.Config import Config
from gait_analysis import Composer


# Settings 
## Dataset

In [2]:
crop_im_size = [186,250]
c = Config()
c.config['indexing']['grouping'] = 'person_sequence_angle'
c.config['pose']['load'] = True
c.config['flow']['load'] = True
c.config['heatmaps']['load'] = True
c.config['dataset_output'] = {
        'data': ["scenes","flows","heatmaps_LAnkle","heatmaps_RAnkle"],
        'label': "annotations"}
composer = Composer()
transformer = composer.compose()
dataset = CasiaDataset(transform=transformer)


loading configuration  default


## neural network

In [13]:
#DESIGN PARAMETERS FOR NEURAL NETWORK
NR_LSTM_UNITS = 2
IMAGE_INPUT_SIZE_W = 640
IMAGE_INPUT_SIZE_H = 480

IMAGE_AFTER_CONV_SIZE_W = 18
IMAGE_AFTER_CONV_SIZE_H = 13
#for 3x3 kernels, n=num_layers: len_in = 2^n*len_out + sum[i=1..n](2^i)
#CONV_LAYER_LENGTH = 5

LSTM_IO_SIZE = 18*13
LSTM_HIDDEN_SIZE = 18*13

RGB_CHANNELS = 3
TIMESTEPS = 10
BATCH_SIZE = 5 #until now just batch_size = 1

#USE RANDOM IMAGES TO SET UP WORKING EXAMPLE
class TEST_CNN_LSTM(nn.Module):
    def __init__(self):
        super(TEST_CNN_LSTM, self).__init__()
        self.conv1 = nn.Conv2d(3,6,3) #input 640x480
        self.pool1 = nn.MaxPool2d(2,2) #input 638x478 output 319x239
        self.conv2 = nn.Conv2d(6,16,3) # input 319x239 output 317x237
        self.pool2 = nn.MaxPool2d(2,2) # input 317x237 output 158x118
        self.conv3 = nn.Conv2d(16,6,3) # input 158x118 output 156x116
        self.pool3 = nn.MaxPool2d(2,2) # input 156x116 output 78x58
        self.conv4 = nn.Conv2d(6,3,3)  # input 78x58 output 76x56
        self.pool4 = nn.MaxPool2d(2,2) # input 76x56 output 39x29
        self.conv5 = nn.Conv2d(3,1,3)  # input 39x29 output 37x27
        self.pool5 = nn.MaxPool2d(2,2) #output 37x27 output 18x13
        self.lstm = nn.LSTM(LSTM_IO_SIZE,
                            LSTM_HIDDEN_SIZE,
                            NR_LSTM_UNITS)
        self.fc1 = nn.Linear(LSTM_IO_SIZE,120)
        self.fc2 = nn.Linear(120,20)
        self.fc3 = nn.Linear(20,3)
        
        #initialize hidden states of LSTM
        self.hidden = self.init_hidden()
        #print("Hidden:", _hidden)
    def init_hidden(self):
        return (torch.randn(NR_LSTM_UNITS, BATCH_SIZE, LSTM_HIDDEN_SIZE), 
                torch.randn(NR_LSTM_UNITS, BATCH_SIZE, LSTM_HIDDEN_SIZE))
    def forward(self,x):
        # print("Input:", x.size())
        x_arr = torch.zeros(TIMESTEPS,BATCH_SIZE,1,IMAGE_AFTER_CONV_SIZE_H,IMAGE_AFTER_CONV_SIZE_W)
        ## print("X arr size", x_arr.size())
        for i in range(TIMESTEPS):#parallel convolutions which are later concatenated for LSTM
            x_tmp_c1 = self.pool1(F.relu(self.conv1(x[i].float())))
            x_tmp_c2 = self.pool2(F.relu(self.conv2(x_tmp_c1)))
            x_tmp_c3 = self.pool3(F.relu(self.conv3(x_tmp_c2)))
            x_tmp_c4 = self.pool4(F.relu(self.conv4(x_tmp_c3)))
            x_tmp_c5 = self.pool5(F.relu(self.conv5(x_tmp_c4)))
            x_arr[i] = x_tmp_c5 # torch.squeeze(x_tmp_c5)
        # x_tmp_c1 = self.pool1(F.relu(self.conv1(x)))
        # x_tmp_c2 = self.pool2(F.relu(self.conv2(x_tmp_c1)))
        # x_tmp_c3 = self.pool3(F.relu(self.conv3(x_tmp_c2)))
        # x_tmp_c4 = self.pool4(F.relu(self.conv4(x_tmp_c3)))
        # x_tmp_c5 = self.pool5(F.relu(self.conv5(x_tmp_c4)))
        # x_arr = x_tmp_c5 # torch.squeeze(x_tmp_c5)
        
        x, hidden = self.lstm(x_arr.view(TIMESTEPS,BATCH_SIZE,-1), self.hidden)
        # the reshaping was taken from the documentation... and makes scense
        x = x.view(TIMESTEPS,5,1,LSTM_HIDDEN_SIZE) #output.view(seq_len, batch, num_directions, hidden_size)
        x = torch.squeeze(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = x.permute(1,2,0)
        return x
print("Class defined")

Class defined


## training addaptation

In [None]:
#rand_arr = np.random.rand(TIMESTEPS,RGB_CHANNELS,IMAGE_INPUT_SIZE,IMAGE_INPUT_SIZE)
arr_1 = np.full((1,RGB_CHANNELS,IMAGE_INPUT_SIZE_H,IMAGE_INPUT_SIZE_W),0)
arr_2 = np.full((1,RGB_CHANNELS,IMAGE_INPUT_SIZE_H,IMAGE_INPUT_SIZE_W),1)
arr_3 = np.full((1,RGB_CHANNELS,IMAGE_INPUT_SIZE_H,IMAGE_INPUT_SIZE_W),2)
arr_full = np.concatenate((arr_1, arr_2, arr_3, arr_1, arr_2, arr_3, arr_1, arr_2, arr_3, arr_1))
print("Shape", np.shape(arr_full))
test_images = torch.from_numpy(arr_full)
test_labels = torch.tensor([0,1,2,0,1,2,0,1,2,0]) #DIFFICULT
#test_labels = torch.tensor([0,0,0,1,1,1,2,2,2,2])#EASY

#TRAINING
test_net = TEST_CNN_LSTM()
criterion = nn.CrossEntropyLoss()
#criterion = nn.BCELoss()
optimizer = optim.SGD(test_net.parameters(), lr=0.1, momentum=0.9)


print('Start training...')
for epoch in range(100): 
    print("Epoch:", epoch)
    running_loss = 0.0
    #for i in range(TIMESTEP):
    inputs = test_images
    labels = test_labels

    optimizer.zero_grad() 
    outputs = test_net(inputs)
    #print("Out:", len(outputs), outputs)
    #print("Labels:", len(labels), labels)
    loss = criterion(outputs, labels)
    loss.backward() 
    
    optimizer.step()

    running_loss += loss.item()
    print("Loss:", running_loss)
print('...Training finished')

## Training actual data

In [14]:
#TRAINING
test_net = TEST_CNN_LSTM()
criterion = nn.CrossEntropyLoss()
#criterion = nn.BCELoss()
optimizer = optim.SGD(test_net.parameters(), lr=0.1, momentum=0.9)

dataloader = DataLoader(dataset, batch_size=5)
n_batches = len(dataloader)

#Time for printing
training_start_time = time.time()
    
print('Start training...')
for epoch in range(100): 
    running_loss = 0.0
    print_every = n_batches // 10
    start_time = time.time()
    total_train_loss = 0
    
    print("Epoch:", epoch)
    running_loss = 0.0
    for i, batch in enumerate(dataloader):
        
        inputs, labels_all = batch
        labels = labels_all[:,0:TIMESTEPS]
        optimizer.zero_grad() 
        outputs = test_net(inputs['scenes'])
        # print("Out:", len(outputs), outputs.size())
        # print("Labels_all:", len(labels_all), labels_all.size())
        # print("Labels:", len(labels), labels.size())
        loss = criterion(outputs,labels.long() )
        loss.backward() 
        optimizer.step()
        
        #Print statistics
        # print(loss.data.item())
        running_loss += loss.data.item()
        total_train_loss += loss.data.item()
        
        #Print every 10th batch of an epoch
        if (i + 1) % (print_every + 1) == 0:
            print("Epoch {}, {:d}% \t train_loss: {:.2f} took: {:.2f}s".format(
                    epoch+1, int(100 * (i+1) / n_batches), running_loss / print_every, time.time() - start_time))
            #Reset running loss and time
            running_loss = 0.0
            start_time = time.time()

print('...Training finished')

Start training...
Epoch: 0
Epoch 1, 11% 	 train_loss: 2.13 took: 36.04s
Epoch 1, 22% 	 train_loss: 1.96 took: 39.29s
Epoch 1, 33% 	 train_loss: 2.34 took: 42.42s
Epoch 1, 44% 	 train_loss: 2.95 took: 41.19s
Epoch 1, 55% 	 train_loss: 1.86 took: 32.58s
Epoch 1, 66% 	 train_loss: 2.13 took: 37.13s
Epoch 1, 77% 	 train_loss: 2.30 took: 41.78s
Epoch 1, 88% 	 train_loss: 1.88 took: 35.47s
Epoch 1, 100% 	 train_loss: 2.11 took: 42.99s
Epoch: 1
Epoch 2, 11% 	 train_loss: 2.04 took: 36.51s
Epoch 2, 22% 	 train_loss: 1.54 took: 39.71s
Epoch 2, 33% 	 train_loss: 2.58 took: 42.68s


KeyboardInterrupt: 