Quick Drawing is a kaggle challenge for classifying the sketch pictures. The total number of category is 340 and 
there are 50M pictures in the dataset. Each catogory has 25,000 pictures. The difficulty is the drawing might be 
incomplete or the drawing does not match the label. We are trying to develop a CNN to solve the issue.   

According to the challenge, there are 4 kinds of dataset. 

    - Raw data (.ndjson)
    - Simplified drawings (.ndjson)
    - Binary data (.bin)
    - Bitmap data (.npy)
Raw data and Simplified drawings are provided in a vector information of strokes. Bitmap data is rendered into 
28 * 28 grayscale bitmap in the numpy format.  

Maybe we could consider the raw data to improve the performance since it inlcudes the sequence information.


In [1]:
import torch
import time
import torch
import os
from tqdm import tqdm
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler
import torchvision.datasets as dset
import torchvision.transforms as T
import numpy as np
from torch.utils import data
import QDdata as QD
import matplotlib.pyplot as plt
%matplotlib inline
from torch.utils.data import DataLoader
import resnet as RN
from torchvision import transforms
import csv

In [2]:
USE_GPU = True

dtype = torch.float32 # we will be using float throughout this tutorial

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

# Constant to control how frequently we print train loss
print_every = 75

print('using device:', device)


using device: cuda


# Data Augmentation

In [3]:
trans = transforms.Compose([transforms.RandomHorizontalFlip(0.5), transforms.ToTensor()])

# Center Feature Per Class

In [4]:
alpha = 0.01
cf_class = torch.from_numpy(np.load("center_feature.npy"))
cf_class = cf_class.to(device=device, dtype=torch.float32)

In [5]:
args = {"learning_rate" : 3e-3}

In [6]:
# The following code comes from https://github.com/pytorch/examples/blob/master/imagenet/main.py
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def adjust_learning_rate(optimizer, epoch, args):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = args['learning_rate'] * (0.5 ** epoch )
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

# Resnet50 96X96

In [7]:
# The following codes comes from cs231N.
from average_precision import mapk
def mean_ap_torch(y_val,y_pred_score,k):
    y_pred = [torch.sort(c,descending=True)[::-1][1][:k] for c in y_pred_score]
    return mapk(y_val,y_pred,k)

def mean_ap_3(y_val,y_pred_score,k):
    y_pred = [np.argsort(c)[::-1][:k] for c in y_pred_score]
    return mapk(y_val,y_pred,k)

def check_accuracy(loader, model):
    
    model.eval()  # set model to evaluation mode
    
    meanAP = []
    meanAP3 = []
    with torch.no_grad():
        for i ,(x, y) in enumerate(loader):
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            y_pred,_ = model(x)
            y_val = y.reshape(-1,1)
            y_pred = y_pred.data.cpu().numpy()
            
            mAP = 100 * mean_ap_3(y_val,y_pred,1)
            mAP3 = 100 * mean_ap_3(y_val,y_pred,3)
            meanAP.append(mAP)
            meanAP3.append(mAP3)
            if i > 100:
                break
        
        m1 = np.mean(meanAP)
        m3 = np.mean(meanAP3)
        

        if (m3 > QDtrain["mAP3"] + 1) and (m3 > 20):
            if os.path.exists(QDtrain["file"]):
                os.remove(QDtrain["file"])
            QDtrain["file"] = str("model" + QDtrain["prefix"] + str(m3))
            torch.save(model.state_dict(),QDtrain["file"])
            QDtrain["mAP3"] = m3
            QDtrain["mAP"] = m1
        print('Got correct (%.2f,%.2f)' % (m1,m3 ))
        
        
def train(model, optimizer, epochs,args):
    """
    Train a model on quickdrawing.
    
    Inputs:
    - model: A PyTorch Module giving the model to train.
    - optimizer: An Optimizer object we will use to train the model
    - epochs: A Python integer giving the number of epochs to train for
    - args: argumetns for learning rate
    
    Returns: Nothing, but prints model accuracies during training.
    """

    model = model.to(device=device)  # move the model parameters to CPU/GPU
    loader_val = QD.QDloadStrokeData(val = True)
    loader_val = DataLoader(dataset=loader_val, batch_size=256,shuffle=False)
    
    # We try to aggregate the several batches together 
    # so that we could have a big batchsize to fill in GPU.
    # real_batch size = aggregated_batches * batch_size
    aggregated_batches = 1
    
    for e in range(epochs):

        train_map1 = AverageMeter()
        train_map3 = AverageMeter()
        
        #Learning rate decay
        adjust_learning_rate(optimizer, e, args)
        
        # We split the whole train dataset into 100 segments.
        for i in range(100):
            t1 = time.time()
            total_loss = 0
            train_dataset = QD.QDloadStrokeData(no=i,val = False,transforms = trans)          
            train_loader = DataLoader(dataset=train_dataset, batch_size=512,shuffle=False)
            for t, (x, y) in enumerate(train_loader):
                model.train()  # put model to training mode
                x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
                y = y.to(device=device, dtype=torch.long)

                #add the center feature returned from resnet 
                scores,cf_pred = model(x)
                
                #Caculate entropy loss
                entropy_loss = F.cross_entropy(scores, y)
                
                #Caculate the center loss 
                center_loss = F.mse_loss(cf_pred,cf_class[y])
                
                loss = entropy_loss + alpha * center_loss
                
                total_loss += loss
                #Calculate train accuracy
                y_pred = scores.data.cpu().numpy()
                y_val = y.reshape(-1,1)
                
                mAP = 100 * mean_ap_3(y_val,y_pred,1)
                mAP3 = 100 * mean_ap_3(y_val,y_pred,3)
             
                train_map1.update(mAP,x.size(0))
                train_map3.update(mAP3,x.size(0))         
                #acc1,acc3 = accuracy(scores, y, topk=(1, 3))
                #train_map1.update(acc1,x.size(0))
                #train_map3.update(acc3,x.size(0))
                                  
                if t % aggregated_batches == 0:                    
                    avg_loss = total_loss / aggregated_batches

                    optimizer.zero_grad()
                    avg_loss.backward()
                    optimizer.step()
                    total_loss = 0

                    t2 = time.time()

                    if t % (print_every) == 0:
                        print('Epoch %d,Iteration %d,loss = %.4f,time %.4f,train accuracy(%.2f,%.2f)' % 
                              (e,t, avg_loss.item(),t2-t1,train_map1.avg,train_map3.avg))
                        check_accuracy(loader_val, model)
                        t1 = time.time()
                        

In [8]:
model = RN.resnet50(num_classes=40)
optimizer = optim.Adam(model.parameters(),lr = args['learning_rate'])
QDtrain = {"mAP":0,"mAP3":0,"file":"","prefix":"resnet50"}

In [9]:
train(model, optimizer, epochs=5,args=args)

No = 0 and total number of items 20480
No = 0 and total number of items 42546
Epoch 0,Iteration 0,loss = 3.9462,time 6.1264,train accuracy(3.12,5.11)
Got correct (2.50,4.58)
Epoch 0,Iteration 75,loss = 2.7906,time 51.8443,train accuracy(8.06,12.90)
Got correct (19.00,28.26)
No = 1 and total number of items 42279
Epoch 0,Iteration 0,loss = 2.6064,time 6.0869,train accuracy(9.57,14.97)
Got correct (21.71,32.86)
Epoch 0,Iteration 75,loss = 1.6249,time 52.5151,train accuracy(24.69,33.21)
Got correct (49.34,60.86)


KeyboardInterrupt: 

1. batch size = 64. count =  340 * 10240.
   Iteration 29400, loss = 0.5610
   Got 64 correct (80.22,85.62)
2. batch size =128 count = 340 *10240 * 2
    Iteration 37800, loss = 0.9535
    Got 128 correct (77.65,83.64)
3. batch size =128 count = 340 *10240 * 2 
    train on 28 * 28 drawing transfered from stroke
    Iteration 45900, loss = 0.9149
    Got correct (57.55,65.98)

## RNN Model

In [None]:
import torch.nn.functional as F
class SketchRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, dropout = 0.3 n_layers=1):
        super(SketchRNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.conv1d_1 = nn.Conv1d(input_size, 48, 5)
        self.dropout_1 = nn.Dropout(0.3)
        self.conv1d_2 = nn.Conv1d(48, 64, 5)
        self.dropout_2 = nn.Dropout(0.3)
        self.conv1d_3 = nn.Conv1d(64, 96, 3)
        self.dropout_3 = nn.Dropout(0.3)
        self.lstm_1 = nn.LSTM(96,hidden_size, n_layers, dropout,batch_first=True)
        self.fc_mu = nn.Linear(hidden_size, output_size)
        

    def forward(self, inputs, hidden):
        inputs = inputs.transpose(0, 1).transpose(1, 2)
        #print("inputs",type(inputs))
        #print("inputs",inputs.size())
        output = self.conv1d_1(inputs)
        #print(output.size())
        output = self.dropout_1(output)
        output = self.conv1d_2(output)
        #print(output.size())
        output = self.dropout_2(output)
        output = self.conv1d_3(output)
        #print(output.size())
        output = self.dropout_3(output)
        output = output.transpose(1, 2)
        #output = output.transpose(1, 2).transpose(0, 1)
        #print("output",type(output))
        output, (hidden,x) = self.lstm_1(output, hidden)
        output_in_last_timestep=output[:,-1,:]
        #print("output",output.size())
        #print(hidden.size())
        #print(x.size())
        #print("output_in_last_timestep", output_in_last_timestep.size())
        output = self.fc_mu(output_in_last_timestep)
        #print("fc output", output.size())
        output = F.log_softmax(output, dim=1)
        return output, hidden 

model = SketchRNN(3, 128, 340,drop=0.3).to(device)
optimizer = optim.Adam(model.parameters(),weight_decay = 1e-5,lr=0.1)

# Run test samples

In [None]:
torch.save(model.state_dict(),"center-loss93.93")

In [None]:
def evaluate_test(test_data, model):
    model.eval()  
    with torch.no_grad():
        meanAP = []
        meanAP3 = []
        center_feature = []
        
        for i ,(x,y) in enumerate(test_data):
            x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
            y = y.to(device=device, dtype=torch.long)
            y_pred,feature_pred = model(x)
            y_val = y.reshape(-1,1)
            y_pred = y_pred.data.cpu().numpy()
            feature_pred = feature_pred.data.cpu().numpy()
            
            mAP = 100 * mean_ap_3(y_val,y_pred,1)
            mAP3 = 100 * mean_ap_3(y_val,y_pred,3)
            meanAP.append(mAP)
            meanAP3.append(mAP3)

            center_feature.extend(feature_pred)
            
        m1 = np.mean(meanAP)
        m3 = np.mean(meanAP3)
        print(len(center_feature),center_feature[0].shape)
        return (m1,m3,center_feature)

model = model.to(device=device)
checkpoint = torch.load("modelresnet5093.680016")
model.load_state_dict(checkpoint)

class_name = QD.qd_names

class_accuracy = []

center_feature = []

val_accuracy_map1 = AverageMeter()
val_accuracy_map3 = AverageMeter()

for i,name in enumerate(tqdm(class_name)):
    t1 = time.time()
    batch_size = 128
    data_file = os.path.join("./train",name+".csv")
    test_data = QD.QDloadStrokeData(data_file = data_file)
    test_loader = DataLoader(dataset=test_data, batch_size=batch_size, num_workers=2, shuffle=False)

    m1,m3,feature_list = evaluate_test(test_loader,model)
    
    feature_list = np.array(feature_list)
    print(feature_list.shape)
    
    #feature_list is N * 2048
    
    center_feature.append(np.mean(feature_list,axis = 0))

    class_accuracy.append((name,str(m1),str(m3)))
    
    val_accuracy_map1.update(m1,feature_list[0])
    val_accuracy_map3.update(m3,feature_list[0])
    
    print(name,str(m1),str(m3))
    t2 = time.time()
    print("Time:",t2-t1)
    
with open('class_accuracy.csv',"w+") as fp:
    writer = csv.writer(fp, delimiter=',')
    writer.writerow(["class", "meanAP1", "meanAP3"])  # write header
    writer.writerows(class_accuracy)

print("The average accuracy %.4f %.4f",val_accuracy_map1.avg,val_accuracy_map3.avg)

np.save("center_feature",center_feature)



1) 96*96, resnet50, epoch 10, GSD. 75.2%
2) 96*96, resnet50, epoch 10, GSD  79.9% model_parametertensor(91.6667)
3) 96*96, resnet50, epoch 10, Adam model_resnet_92.317_90


* convert simplified test dataset to pixel drawsing

Check the accuracy based on the class.

Create test_simplified.
load compressed(96*96) 1.968482494354248
load non compressed(96*96)  0.31299567222595215
load non compressed(28*28) 0.04715704917907715


In [None]:
for i in range(10,11):
    data = QD.QDcreateData()
    data.create(start=i*3000,dir_name="../cs230/pic96")
    del data

In [None]:
torch.cuda.empty_cache()