## Deep Sketch: Deep Convolutional Neural Networks for Sketch Recognition and Similarity

### Download and unzip TU Berlin Sketch Dataset

In [0]:
!wget http://cybertron.cg.tu-berlin.de/eitz/projects/classifysketch/sketches_png.zip
!unzip sketches_png.zip

### Importing required modules

In [0]:
import numpy as np
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as f
import torch.optim as optim 
import torchvision
import sklearn.metrics as sm 
from PIL import Image, ImageOps
from glob import glob
import time

PATH=''

### Preprocess Image files:
* Resize image using cubic spline interpolation
* Invert colors
* Increase the intensity

In [0]:
files=glob(PATH+'png/*/*',recursive=True)

for file in sorted(files):
    img=Image.open(file)
    img=img.resize((180,180),Image.BICUBIC)
    img=ImageOps.invert(img)
    img=np.array(img)
    img[img>0]=255
    img=Image.fromarray(img)
    img.save(file)
    print("Saved File: %s\r"%(file))

### Create a map for labels

In [0]:
folders=glob(PATH+'png/*')
folders=[x[len(PATH+'png/'):] for x in folders]
folders.remove('filelist.txt')
folders=sorted(folders)
np.save(PATH+'labels.npy',folders)

### Define the model

In [0]:
class CNN(nn.Module):

    def __init__(self):
        super(CNN,self).__init__()
        self.conv1=nn.Conv2d(1,64,7,2,0)
        self.conv2=nn.Conv2d(64,128,5,2,2)
        self.conv3=nn.Conv2d(128,256,3,1,1)
        self.conv4=nn.Conv2d(256,512,3,1,0)
        self.conv5=nn.Conv2d(512,4096,5,1,0)
        self.conv6=nn.Conv2d(4096,250,1,1,0)
        self.maxpool=nn.MaxPool2d(3,2,0)
        self.dropout=nn.Dropout()

    def forward(self,x):
        x = self.conv1(x)
        x = f.leaky_relu(x)
        x = self.maxpool(x)

        x = self.conv2(x)
        x = f.leaky_relu(x)
        x = a = self.maxpool(x)
        
        x = self.conv3(x)
        x = f.leaky_relu(x)

        x = self.conv4(x)
        x = f.leaky_relu(x)
        x = b = self.maxpool(x)

        x = self.conv5(x)
        x = c = f.leaky_relu(x)
        
        x = self.dropout(x)
        
        x = self.conv6(x)
        x = x.view(-1,250)

        return (x,a,b,c)

### Function for dataset generation

In [0]:
def get_data_loader(root,batch_size):
    img_transform=torchvision.transforms.Compose([
            torchvision.transforms.Pad(22),
            torchvision.transforms.RandomAffine(degrees=35),
            torchvision.transforms.ToTensor()
        ])
    dataset=torchvision.datasets.ImageFolder(root,img_transform)
    loader=torch.utils.data.DataLoader(dataset,batch_size=batch_size,shuffle=True)
    return loader

### Training the model
* Adam Optimization Algorithm
* Cross Entropy Loss function for multi-class classification
* CUDA on GPU for speed

In [0]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:',device)

model_name='model_1.pt'
logger = open(model_name[:-3]+'_train.txt','w')

model=CNN().to(device)

lossfunc=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters())

batch_size=64
epochs=10
printfreq=60

train_loader=get_data_loader(PATH+'png/',batch_size)

logger.write(str(model)+'\n\n')
logger.write("Device : %s\n\n"%(device))
logger.write("Optimizer : ADAM\n\n")
logger.write("Batch Size : %d\n\n"%(batch_size))
logger.write("Epochs : %d\n\n"%(epochs))

print("\nTraining...\n")

start_time=time.time()

for ep in range(epochs):
    
    running_loss=0.0
    train_loss=0.0
    test_loss=0.0

    for i,data in enumerate(train_loader):
        x,y = data
        
        x=x[:,0].unsqueeze(1)

        x,y = x.to(device),y.to(device)
        
        optimizer.zero_grad()

        output = model(x)

        loss = lossfunc(output[0],y)
        running_loss+=loss.item()
        train_loss+=loss.item()

        loss.backward()
        optimizer.step()

        if (i+1)%printfreq==0:
            print("**** Epoch %d ****\nBatch: %d\tRunning Loss: %.4f\n"%(ep+1,i+1,running_loss))
            running_loss=0.0
        
    logger.write("**** Epoch %d ****\nTrain Loss: %.4f\nTest Loss: %.4f\n\n"%((ep+1),train_loss,test_loss))

end_time=time.time()
print("Training Complete. Time Taken: %.4f\n"%(end_time-start_time))

logger.close()

model.cpu()
torch.save(model.state_dict(),model_name)

### Classifier

In [0]:
def classifier(image,model,labels):
    img_transform=torchvision.transforms.Compose([
            torchvision.transforms.Pad(22),
            torchvision.transforms.ToTensor()
        ])
    x=img_transform(image)
    x=x.unsqueeze(0)
    y=model(x)
    idx=torch.argmax(y[0]).item()
    return labels[idx]

### Examples

In [0]:
model=CNN()
model.load_state_dict(torch.load(PATH+'model_1.pt'))
model.eval()

labels=np.load(PATH+'labels.npy')
img=Image.open(PATH+'png/airplane/63.png')

label=classifier(img,model,labels)
print(label)

### Performance Evaluation

In [0]:
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model_name='model_1.pt'
model=CNN()
model.load_state_dict(torch.load(PATH+model_name))
model.to(device)
model.eval()

logger=open(model_name[:-3]+'_performance.txt','w')

batch_size=64

test_loader=get_data_loader(PATH+'png/',batch_size)
target=[]
prediction=[]

print("\nTesting...\n")

start_time=time.time()

for i,data in enumerate(test_loader):
    x,y = data
    x=x[:,0].unsqueeze(1)
    x = x.to(device)
    output = model(x)
    idx=torch.argmax(output[0],1).detach().cpu().numpy()
    y=y.numpy()
    prediction=np.append(prediction,idx)
    target=np.append(target,y)
    print("Batch: %d"%(i))

end_time=time.time()
print("Testing Complete. Time Taken: %.4f\n"%(end_time-start_time))

report=sm.classification_report(target,prediction)

print(report)
print(sm.accuracy_score(target,prediction))
logger.write(report)
logger.close()