# CNN(embedding)+RNN model

Author: Liu Chang

Description: 

This notebook will demonstrate CNN+RNN hybrid model performance 

Date:
Week 9

In [1]:
import os.path as path
import pickle
import torch
from torch import nn
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

## Read Data

Our data has 5 calsses, overall 633244 items.

The 5 classes are:  'calendar', 'snowman', 'penguin', 'blackberry', 'teddy-bear'

In [None]:
data_path='/raid5/liuchang/quick_draw_output'
from read_data import get_dataset

_,train_X,train_Y,_,test_X,test_Y=get_dataset(data_path,'1102_05b633244')

In [None]:
labels_count=len(set(test_Y))
print("The number of classes=",labels_count)
print("The number of items=",len(train_X)+len(test_X))

### Data Preprocessing Method 

Most of the data preprocessing is done when generating our dataset.

`unpack()` is a data padding method, it is ued on minibatched data.

It pads each data item(one paint) so that the size is exactly 30x200 (which is the maximal size in our data)

In [None]:
def unpack(x,max_strock,max_len):
    x_new=torch.zeros(torch.Size([len(x),max_strock,3,max_len]))
    for i,item in enumerate(x):
        for j,strock in enumerate(item):
            strock=torch.Tensor(strock)
            x_new[i,j,0,:len(strock[0])]=strock[0]
            x_new[i,j,1,:len(strock[0])]=strock[1]
        x_new[i,0,2,0]=len(item)
    return x_new

def get_max_len(x,xx):
    max_len=0
    max_strock=0
    for i,item in enumerate(x):
        max_strock=max(max_strock,len(item))
        for j,strock in enumerate(item):
            max_len=max(max_len,len(strock[0])) 
    for i,item in enumerate(xx):
        max_strock=max(max_strock,len(item))
        for j,strock in enumerate(item):
            max_len=max(max_len,len(strock[0])) 
    return max_strock,max_len

max_strock,max_len=get_max_len(train_X,test_X)
print("Maximal Size=",max_strock,"x",max_len)

Method for obtaining accuracy score

In [None]:
def get_acc(test_X,test_Y):
    cur_len=0
    acc=0
    while cur_len<len(test_X):
        model.zero_grad()
        minibatch_X=test_X[cur_len:cur_len+batch_size]
        minibatch_X=unpack(minibatch_X,max_strock,max_len).cuda()
        minibatch_Y=test_Y[cur_len:cur_len+batch_size]
        minibatch_Y=torch.LongTensor(minibatch_Y).cuda()
        y_predict=model(minibatch_X)
        y_predict=torch.argmax(y_predict,dim=1)
        cur_len+=batch_size
        acc+=(y_predict==minibatch_Y).sum().item()
    return acc/cur_len

## Model Define

Here we define out CRNN model.

First we define our CNN. it is a 4-layer CNN model as a embedding method, to encode one stroke with size (200,), to a vector with size (64,).

During tunning process, I find that 
1. **The final convolution layer should have a big reception field (kerenel size) to guarantee a better model performance**
2. **The linear (dense) vector as a final layer is needed. Ending with convolution layer can not have a good perfomance**

In [None]:
class CNN(nn.Module):
    def __init__(self,output_size):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv1d(
                in_channels=3,      # input height
                out_channels=6,    # n_filters
                kernel_size=3      # filter size
            ),
            nn.ReLU(),    # activation
            nn.MaxPool1d(kernel_size=2)
        )
        self.conv2 = nn.Sequential(
             nn.Conv1d(6,12, 3),
             nn.ReLU(),  # activation
             nn.MaxPool1d(kernel_size=2)
         )
        self.conv3 = nn.Sequential(
             nn.Conv1d(12,32, 20),
             nn.ReLU(),  # activation
             nn.MaxPool1d(kernel_size=10)
         )
        self.out = nn.Linear(64, output_size)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x=self.conv3(x)
        x = x.view(x.size(0), -1)
        output = self.out(x)
        return output

Here we define our CRNN model

In [None]:
class CRNN(nn.Module):
    def __init__(self):
        super(CRNN, self).__init__()
        self.middle_size=middle_size=40
        self.gru=nn.GRU(input_size=middle_size,hidden_size=5,num_layers=3,bias=True)
        # self.gru=nn.GRU(input_size=3,hidden_size=5,num_layers=1,bias=True)
        self.cnn=CNN(output_size=middle_size)
        self.out=nn.Softmax(dim=-1)
    def forward(self, x):
        x=x.view(-1,3,max_len)
        xx = self.cnn(x)
        xx=xx.view(-1,max_strock,self.middle_size).transpose(0,1)
        output,_ = self.gru(xx)
        to_send=torch.Tensor(torch.Size([xx.shape[1],5])).cuda()
        for i in range(xx.shape[1]):
            to_send[i]=output[int(x[i*max_strock,2,0])-1,i]
        output = self.out(to_send)
        return output

## Training model

Here we set our batch_size to be very big because each data do not have a big size (just 30x200))

We use Checkpoint to save the current best model. Also by using this method we don't have to keep this notebook open to ensure the ouptus been saved.

We train our model on NVIDIA Tesla K80 with 11GB memory, training about 12 hours.

In [1]:
import torch.optim as optim
import pickle
print(0)
print(1)
model=CRNN().cuda()
batch_size=512
loss=torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
EPOCH=200

bst_acc=0.00
for i in range(EPOCH):
    total_loss=0
    cur_len=0       
    while cur_len<len(train_X):
        model.zero_grad()
        minibatch_X=train_X[cur_len:cur_len+batch_size]
        minibatch_X=unpack(minibatch_X,max_strock,max_len).cuda()
        
        minibatch_Y=train_Y[cur_len:cur_len+batch_size]
        minibatch_Y=torch.LongTensor(minibatch_Y).cuda()
        y_predict=model(minibatch_X)
        
        
        output_loss=loss(y_predict,minibatch_Y)
        output_loss.backward()
        optimizer.step()
        cur_len+=batch_size
        total_loss+=output_loss.item()
    if i%1==0:
        acc=get_acc(test_X,test_Y)
        record_file=open('record','a')
        record_file.write("{}/{}, loss={},acc={}".format(i,EPOCH,total_loss,acc))
        record_file.close()
        if acc>bst_acc:
            torch.save(model,"checkpoint.pkl")
            bst_acc=acc

 0/200, loss=1392.487428188324,acc=0.7514569682459677
 1/200, loss=1278.0826328992844,acc=0.8673056325604839
 2/200, loss=1254.408109664917,acc=0.8779926915322581
 3/200, loss=1245.3379324674606,acc=0.8845766129032258
 4/200, loss=1240.8834501504898,acc=0.8928065146169355
 5/200, loss=1237.5869688987732,acc=0.898894279233871
 6/200, loss=1234.294924736023,acc=0.8970435357862904
 7/200, loss=1231.2056339979172,acc=0.9066437752016129
 8/200, loss=1229.548694729805,acc=0.9063208795362904
 9/200, loss=1228.1950545310974,acc=0.912077872983871
 10/200, loss=1226.7557901144028,acc=0.9128890498991935
 11/200, loss=1226.493235707283,acc=0.9126449092741935
 12/200, loss=1223.6590828895569,acc=0.9166456653225806
 13/200, loss=1223.37473154068,acc=0.9122275075604839
 14/200, loss=1222.13602745533,acc=0.9184412802419355
 15/200, loss=1221.5133837461472,acc=0.9183940272177419
 16/200, loss=1221.1248507499695,acc=0.9180553805443549
 17/200, loss=1220.1645115613937,acc=0.9139522429435484
 18/200, loss