# Baseline - LSTM

In [8]:
"""
Author: ZHANG Yu

The code is used to do calsssification of quickdraw dataset using LSTM. 
The data used here has been dealt with by generate_data.py

"""

'\nAuthor: ZHANG Yu\n\nThe code is used to do calsssification of quickdraw dataset using LSTM. \nThe data used here has been dealt with by generate_data.py\n\n'

### The simplified process is:
1. Get train data and test data as well as their label
2. Zero padding the data to the same length
3. Choose hyperparameters
4. Construct and build the LSTM network
5. Train the network
6. Evaluate the network using test data

### To run the code:
1. Download quick_draw_output file
2. Change path and parameters
3. Move away '#' before the code containing 'device' if you want to run on gpu
4. Run the code

In [9]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import math
import time
import pandas as pd
import pickle
import os.path as path
import numpy as np

### maybe use GPU


In [3]:
#device= torch.device("cuda")
#device= torch.device("cpu")
#print(device)

### Dataset
### Prepare train data and test data



In [10]:

data_path='C:/Users/YU007/CE7454_2018/project/quick_draw_output' # change path here

with open(path.join(data_path,'data_X'),'rb') as f:
    X=pickle.load(f)

with open(path.join(data_path,'data_Y_int'),'rb') as f:
    Y=pickle.load(f)

len_train_X=int(len(X)*0.8) # traindata : test data = 4:1



train_X=np.array(X[:len_train_X]) # data
train_Y=np.array(Y[:len_train_X]) # label
test_X=np.array(X[len_train_X:])
test_Y=np.array(Y[len_train_X:])



In [11]:
def data_convert(Xdata):

    point = []
    stroke = []
    image = []
    data = []
    for i in range(len(Xdata)): # number of pictures, ith picture
        z = 0
        for j in range(len(Xdata[i])): # number of strokes for each picture, jth stroke
            
            x = 0
            y = 0
           
        
            for k in range(len(Xdata[i][j][0])):
                x = float(Xdata[i][j][0][k])
                y = float(Xdata[i][j][1][k])
                point.append([x, y])
                
        
            
        data.append([point])
        point = []
        
    return data

### train data

* final train data: train_data
* train label: train_Y

In [12]:
train_data = data_convert(train_X)
train_label = train_Y
print ('training image number =', len(train_X))


training image number = 3657


### test data
* final test data: test_data
* test label: test_Y

In [13]:
test_data = data_convert(test_X)
test_label = test_Y

print ('testing image number =', len(test_X))


testing image number = 915


### Hyper parameters

In [29]:
bs = 1 # batch size, each batch has n images
#seq_len = point_no_max * stroke_no_max # 47*123, number of feature points in each image
input_size = 2 # number of features,  point
hidden_size = 200
output_size = 5 # n calsses
num_layers = 1 # number of recurrent layers
EPOCH = 8 # train the training data n times


### Make a recurrent net class

In [15]:
class LSTM_net(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM_net, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM( input_size , hidden_size , num_layers, batch_first=True  ) # recurrent layer, batch first
        self.fc = nn.Linear(    hidden_size , output_size   ) # linear layer

        
    def forward(self, X, h0, c0 ):
        # X shape: bs * seq_len * input_size
          
        h_seq , _  =   self.lstm( X , (h0, c0) )      # bs*seq_len*hidden_size
        out  =   self.fc( h_seq[:, -1, :] )   # bs * output_size, use last feature
        
        return out

### Build the net.

In [16]:
net = LSTM_net(input_size, hidden_size, num_layers, output_size)

print(net)


LSTM_net(
  (lstm): LSTM(2, 200, batch_first=True)
  (fc): Linear(in_features=200, out_features=5, bias=True)
)


### Send the weights of the networks to the GPU

In [None]:
#net = net.to(device)

### Set up manually the weights of the Linear module

In [17]:
net.fc.weight.data.uniform_(-0.1, 0.1)

print('')




### Choose the criterion, as well as the following important hyperparameters: 
* initial learning rate: my_lr

In [30]:
criterion = nn.CrossEntropyLoss()

my_lr = 5

### evaluate the network on the test data

In [31]:
def eval_on_test_set():
    
    running_loss=0
    num_batches=0  
    
    correct = 0
    total = 0
       
    # set the initial h and c to be the zero vector
    h = torch.zeros( num_layers, bs, hidden_size)
    c = torch.zeros( num_layers, bs, hidden_size)

    # send them to the gpu    
    # h=h.to(device)
    # c=c.to(device) len(test_X)
    
    for count in range( 50) :
               
        minibatch_data =  test_data[ count ] # bs*seq_len*2
        minibatch_data = torch.Tensor(minibatch_data)
        minibatch_label = test_label[ count]    
        minibatch_label = torch.tensor([minibatch_label.item()])
        
        # minibatch_data=minibatch_data.to(device)
        # minibatch_label=minibatch_label.to(device)
                                  
        scores  = net( minibatch_data, h , c )
         
        
        loss = criterion(  scores ,  minibatch_label )    
        
        h=h.detach()
        c=c.detach()
            
        running_loss += loss.item()
        num_batches += 1        
        
        _, predicted = torch.max(scores.data, 1)
        total += minibatch_label.size(0)
        correct += (predicted == minibatch_label).sum().item()
    
    total_loss = running_loss/num_batches 
    print('test: exp(loss) = ', math.exp(total_loss)  )
    print ('Test accuracy:{}%'.format(100 * correct / total))


### Do EPOCH passes through the training set.

In [32]:
start=time.time()

for epoch in range(EPOCH):
    
    # divide the learning rate by 3 except after the first epoch
    if epoch >= 2:
        my_lr = my_lr / 3
    
    # create a new optimizer at the beginning of each epoch: give the current learning rate.   
    optimizer=torch.optim.SGD( net.parameters() , lr=my_lr )
        
    # set the running quatities to zero at the beginning of the epoch
    running_loss=0
    num_batches=0    
       
    # set the initial h and c to be the zero vector
    h = torch.zeros( num_layers, bs, hidden_size)
    c = torch.zeros( num_layers, bs, hidden_size)

    # send them to the gpu    
    # h=h.to(device)
    # c=c.to(device) len(train_X)
    
    for count in range(200):
        
        # Set the gradients to zeros
        optimizer.zero_grad()
        
        # create a minibatch
        minibatch_data =  train_data[ count ] # bs*seq_len*2
        minibatch_data = torch.Tensor(minibatch_data)
        minibatch_label = train_label[ count]    
        minibatch_label = torch.tensor([minibatch_label.item()])

        
        # send them to the gpu
        # minibatch_data=minibatch_data.to(device)
        # minibatch_label=minibatch_label.to(device)
        
        # Detach to prevent from backpropagating all the way to the beginning
        # Then tell Pytorch to start tracking all operations that will be done on h and c
        h=h.detach()
        c=c.detach()
        h=h.requires_grad_()
        c=c.requires_grad_()
                       
        # forward the minibatch through the net        
        scores = net( minibatch_data, h , c )
       
       
        # Compute the average of the losses of the data points in this huge batch
        loss = criterion(  scores ,  minibatch_label )
        
        # backward pass to compute dL/dR, dL/dV and dL/dW
        loss.backward()

       
        optimizer.step()
        
            
        # update the running loss  
        running_loss += loss.item()
        num_batches += 1
        
        
        
    # compute stats for the full training set
    total_loss = running_loss/num_batches
    elapsed = time.time()-start
    
    print('')
    print('epoch=',epoch, '\t time=', elapsed,'\t lr=', my_lr, '\t exp(loss)=',  math.exp(total_loss))
    
    eval_on_test_set()



epoch= 0 	 time= 5.373099088668823 	 lr= 5 	 exp(loss)= 3.879211123449398e+27
test: exp(loss) =  1.4332634818086385e+24
Test accuracy:24.0%

epoch= 1 	 time= 12.796756982803345 	 lr= 5 	 exp(loss)= 6.653434829159341e+21
test: exp(loss) =  4.783384357049398e+27
Test accuracy:24.0%

epoch= 2 	 time= 20.453410625457764 	 lr= 1.6666666666666667 	 exp(loss)= 429465661.6957357
test: exp(loss) =  6340189.552171972
Test accuracy:24.0%

epoch= 3 	 time= 29.54923677444458 	 lr= 0.5555555555555556 	 exp(loss)= 7416.534777913035
test: exp(loss) =  26113.835449791568
Test accuracy:24.0%

epoch= 4 	 time= 42.730584383010864 	 lr= 0.1851851851851852 	 exp(loss)= 280.6540479000455
test: exp(loss) =  3787.407860818083
Test accuracy:6.0%

epoch= 5 	 time= 51.02922606468201 	 lr= 0.0617283950617284 	 exp(loss)= 107.03575011306961
test: exp(loss) =  1555.0556400832231
Test accuracy:20.0%

epoch= 6 	 time= 60.394447565078735 	 lr= 0.0205761316872428 	 exp(loss)= 81.82388563963734
test: exp(loss) =  1274.5

In [None]:
# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')