# Baseline - LSTM


"""
Author: ZHANG Yu

The code is used to do calsssification of quickdraw dataset using LSTM. 
The data used here has been dealt with by generate_data.py

The simplified process is:
1. Get train data and test data as well as their label
2. Zero padding the data to the same length
3. Choose hyperparameters
4. Construct and build the LSTM network
5. Train the network
6. Evaluate the network using test data

To run the code:
1. Download quick_draw_output file
2. Change path and parameters
3. Move away '#' before the code containing 'device' if you want to run on gpu
4. Run the code

"""

In [79]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import math
import time
import pandas as pd
import pickle
import os.path as path
import numpy as np

### maybe use GPU


In [None]:
#device= torch.device("cuda")
#device= torch.device("cpu")
#print(device)

### Dataset
### Prepare train data and test data



In [48]:

data_path='C:/Users/YU007/CE7454_2018/project/quick_draw_output' # change path here

with open(path.join(data_path,'data_X'),'rb') as f:
    X=pickle.load(f)

with open(path.join(data_path,'data_Y_int'),'rb') as f:
    Y=pickle.load(f)

len_train_X=int(len(X)*0.8)



train_X=np.array(X[:len_train_X]) # data
train_Y=np.array(Y[:len_train_X]) # label
test_X=np.array(X[len_train_X:])
test_Y=np.array(Y[len_train_X:])



### zero padding train data to the same length

* final train data: train_Xdata_tensor
* train label: train_Y

In [49]:
stroke_no = np.zeros(len(train_X))
point_no = []

for i in range(len(train_X)): # number of pictures, ith picture
    stroke_no[i] = len(train_X[i])
    for j in range(len(train_X[i])): # number of strokes for each picture, jth stroke
        point_no.append(len(train_X[i][j][0]))

stroke_no_max = int(max(stroke_no))
point_no_max = int(max(point_no))

print ('max stroke number in an image =',stroke_no_max,'\n max point number in a stroke =', point_no_max)
print ('training image number =', len(train_X),'\n testing image number =', len(test_X))
train_Xdata = np.zeros((len(train_X),stroke_no_max,2,point_no_max))

for i in range(len(train_X)):  
    for j in range(len(train_X[i])):        
        train_Xdata[i][j][0][:len(train_X[i][j][0])]=train_X[i][j][0][:]
        train_Xdata[i][j][1][:len(train_X[i][j][0])]=train_X[i][j][1][:]



max stroke number in an image = 47 
 max point number in a stroke = 123
training image number = 3657 
 testing image number = 915


### zero padding test data to the same length
* final test data: test_Xdata_tensor
* test label: test_Y

In [50]:
stroke_no_test = np.zeros(len(test_X))
point_no_test = []

for i in range(len(test_X)): # number of pictures, ith picture
    stroke_no_test[i] = len(test_X[i])
    for j in range(len(test_X[i])): # number of strokes for each picture, jth stroke
        point_no_test.append(len(test_X[i][j][0]))

stroke_no_maxtest = int(max(stroke_no_test))
point_no_maxtest = int(max(point_no_test))

print ('max stroke number in an image =',stroke_no_maxtest,'\n max point number in a stroke =', point_no_maxtest)

test_Xdata = np.zeros((len(test_X),stroke_no_maxtest,2,point_no_maxtest))

for i in range(len(test_X)):  
    for j in range(len(test_X[i])):        
        test_Xdata[i][j][0][:len(test_X[i][j][0])]=test_X[i][j][0][:]
        test_Xdata[i][j][1][:len(test_X[i][j][0])]=test_X[i][j][1][:]
        
test_Xdata_tensor = torch.Tensor(test_Xdata)
test_Xdata_tensor = test_Xdata_tensor.permute(0, 1, 3, 2)
test_Xdata_tensor = test_Xdata_tensor.reshape(len(test_X), stroke_no_maxtest*point_no_maxtest, 2)
print (test_Xdata_tensor.shape)

max stroke number in an image = 32 
 max point number in a stroke = 66
torch.Size([915, 2112, 2])


### Hyper parameters

In [51]:
bs = 20 # batch size, each batch has n images
seq_len = point_no_max * stroke_no_max # 47*123, number of feature points in each image
input_size = 2 # number of features,  point
hidden_size = 200
output_size = 5 # n calsses
num_layers = 1 # number of recurrent layers
EPOCH = 3 # train the training data n times


### Make a recurrent net class

In [71]:
class LSTM_net(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM_net, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM( input_size , hidden_size , num_layers, batch_first=True  ) # recurrent layer, batch first
        self.fc = nn.Linear(    hidden_size , output_size   ) # linear layer

        
    def forward(self, X, h0, c0 ):
        # X shape: bs * seq_len * input_size
          
        h_seq , _  =   self.lstm( X , (h0, c0) )      # bs*seq_len*hidden_size
        out  =   self.fc( h_seq[:, -1, :] )   # bs * output_size, use last feature
        
        return out

### Build the net.

In [72]:
net = LSTM_net(input_size, hidden_size, num_layers, output_size)

print(net)


LSTM_net(
  (lstm): LSTM(2, 200, batch_first=True)
  (fc): Linear(in_features=200, out_features=5, bias=True)
)


### Send the weights of the networks to the GPU

In [None]:
#net = net.to(device)

### Set up manually the weights of the Linear module

In [62]:
net.fc.weight.data.uniform_(-0.1, 0.1)

print('')




### Choose the criterion, as well as the following important hyperparameters: 
* initial learning rate: my_lr

In [63]:
criterion = nn.CrossEntropyLoss()

my_lr = 1

### evaluate the network on the test data

In [76]:
def eval_on_test_set():
    
    running_loss=0
    num_batches=0  
    
    correct_no = 0
    total_no = 0
       
    # set the initial h and c to be the zero vector
    h = torch.zeros( num_layers, bs, hidden_size)
    c = torch.zeros( num_layers, bs, hidden_size)

    # send them to the gpu    
    # h=h.to(device)
    # c=c.to(device)
       
    for count in range( 0 , len(test_X)-1-bs ,  bs) :
               
        minibatch_data =  test_Xdata_tensor[ count : count+bs ]
        minibatch_label = test_Y[ count : count+bs ]
        minibatch_label = torch.LongTensor(minibatch_label)
        
        # minibatch_data=minibatch_data.to(device)
        # minibatch_label=minibatch_label.to(device)
                                  
        scores  = net( minibatch_data, h , c )
         
        
        loss = criterion(  scores ,  minibatch_label )    
        
        h=h.detach()
        c=c.detach()
            
        running_loss += loss.item()
        num_batches += 1        
        
        _, predicted = torch.max(scores.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    total_loss = running_loss/num_batches 
    print('test: exp(loss) = ', math.exp(total_loss)  )
    print ('Test accuracy:{}%'.format(100 * correct / total))


### Do EPOCH passes through the training set.

In [78]:
start=time.time()

for epoch in range(EPOCH):
    
    # divide the learning rate by 3 except after the first epoch
    if epoch >= 2:
        my_lr = my_lr / 3
    
    # create a new optimizer at the beginning of each epoch: give the current learning rate.   
    optimizer=torch.optim.SGD( net.parameters() , lr=my_lr )
        
    # set the running quatities to zero at the beginning of the epoch
    running_loss=0
    num_batches=0    
       
    # set the initial h and c to be the zero vector
    h = torch.zeros( num_layers, bs, hidden_size)
    c = torch.zeros( num_layers, bs, hidden_size)

    # send them to the gpu    
    # h=h.to(device)
    # c=c.to(device)
    
    for count in range( 0 , len(train_X)-1-bs ,  bs):
        
        # Set the gradients to zeros
        optimizer.zero_grad()
        
        # create a minibatch
        minibatch_data =  train_Xdata_tensor[ count : count+bs ] # bs*seq_len*2
        minibatch_label = train_Y[ count : count+bs ]    
        minibatch_label = torch.LongTensor(minibatch_label)

        
        # send them to the gpu
        # minibatch_data=minibatch_data.to(device)
        # minibatch_label=minibatch_label.to(device)
        
        # Detach to prevent from backpropagating all the way to the beginning
        # Then tell Pytorch to start tracking all operations that will be done on h and c
        h=h.detach()
        c=c.detach()
        h=h.requires_grad_()
        c=c.requires_grad_()
                       
        # forward the minibatch through the net        
        scores = net( minibatch_data, h , c )
        
       
        
        # Compute the average of the losses of the data points in this huge batch
        loss = criterion(  scores ,  minibatch_label )
        
        # backward pass to compute dL/dR, dL/dV and dL/dW
        loss.backward()

       
        optimizer.step()
        
            
        # update the running loss  
        running_loss += loss.item()
        num_batches += 1
        
        
        
    # compute stats for the full training set
    total_loss = running_loss/num_batches
    elapsed = time.time()-start
    
    print('')
    print('epoch=',epoch, '\t time=', elapsed,'\t lr=', my_lr, '\t exp(loss)=',  math.exp(total_loss))
    eval_on_test_set()
   


epoch= 0 	 time= 3161.2105412483215 	 lr= 1 	 exp(loss)= 4.898336101568618

epoch= 1 	 time= 6364.750329017639 	 lr= 1 	 exp(loss)= 4.888860776750387

epoch= 2 	 time= 9352.907896518707 	 lr= 0.3333333333333333 	 exp(loss)= 4.842653726983257
