In [73]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import torch, torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
from sklearn import preprocessing
from sklearn.model_selection import StratifiedKFold, KFold
import torchvision.models, torchvision.transforms as transforms

torch.manual_seed(0)
np.random.seed(0)

device = torch.device('cpu')

In [74]:
from PIL import Image, ImageDraw
import numpy as np
import json

def vector_to_numpy(drawing, side=256):
    image = vector_to_image(drawing, side)
    image_array = np.array(image)
    return image_array

def vector_to_image(drawing, side=256):
    drawing = json.loads(drawing)
    min_x, min_y, max_x, max_y = calculate_bounding_box(drawing)

    # Calculate the offset to center the drawing within the canvas
    offset_x = (side - (max_x - min_x + 1)) // 2
    offset_y = (side - (max_y - min_y + 1)) // 2

    image = Image.new('L', (side, side), color='white')  # Create a white canvas
    draw = ImageDraw.Draw(image)

    for x, y in drawing:
        xy = [(x0 - min_x + offset_x, y0 - min_y + offset_y) for x0, y0 in zip(x, y)]
        draw.line(xy, fill='black', width=1)

    return image

def calculate_bounding_box(drawing):
    all_x = [x for x, _ in drawing]
    all_y = [y for _, y in drawing]

    min_x = min(min(x) for x in all_x)
    min_y = min(min(y) for y in all_y)
    max_x = max(max(x) for x in all_x)
    max_y = max(max(y) for y in all_y)

    return min_x, min_y, max_x, max_y

In [75]:
df = pd.read_csv('doodle-detectives-aiclubiitm/train.csv') #, dtype={'drawing': np.array})
class_list = df['word'].unique()
classes = {word: index for index, word in enumerate(class_list)}
def prediction_to_words(prediction):
    return ' '.join((class_list[p] for p in prediction))
df = df.sample(n=10000).reset_index().drop('index', axis=1)
# df['drawing'] = df['drawing'].map(vector_to_numpy)

In [76]:
transform = transforms.Compose(
    [
    #  transforms.Resize((224, 224)),
     transforms.Lambda(lambda x: x.repeat(1,3, 1, 1)),
    #  transforms.Lambda(lambda x: print(x.shape)),
    #  transforms.Grayscale(num_output_channels=3),
    #  transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
     ])
vgg16 = torchvision.models.vgg16(weights=torchvision.models.VGG16_Weights)




In [77]:
class BasicCNN(nn.Module):
    def __init__(self):
        nn.Module.__init__(self)
        self.convolutions = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1),
            # 1 input image. If we had an RGB image, it would be Conv2d(3, 32, 3, padding=1)
            # 32 output images, i.e, 32 kernels and 32 output images are produced
            nn.ReLU(),
            # The activation function
            nn.MaxPool2d(4),
            # Pooling with 2 x 2 blocks
            nn.Conv2d(32, 64, 3, padding=1),
            # Now we have those 32 images and we make 64 from them
            nn.ReLU(),
            nn.MaxPool2d(4)
            # Pooling again
        )
        self.fully_connected = nn.Sequential(
            nn.Flatten(),
            nn.Linear((224*224*64)//256, 600),
            # The image shape was initially 28 x 28, by pooling we've made it 7 x 7, so we divide by 16
            # We multiply by 64 because the model has learnt 64 features.
            nn.Linear(600, 128),
            nn.Linear(128, 101)
            # We have 10 output neurons (1 for each class)
        )
    def forward(self, inputs):
        # inputs = inputs.reshape([inputs.shape[1], inputs.shape[0], 256, 256])
        # print(type(inputs))
        x = self.convolutions(inputs)
        # Functions in convolution layers are run
        x = self.fully_connected(x)
        # Functions in fully connected layer are run
        return x
    def predict(self, test_loader, out='SUBMISSION.csv', out_small='sub', i_0=0): # out = None may not be implemented
        """
        Returns the predictions in a csv chosen by out, i_0 is in case you crash and have already done some stuff
        """
        self.eval()
        if not out: total_predictions=[]
        for i, (data, ids) in enumerate(iter(test_loader)):
            if i_0 > i: continue
            predictions = torch.topk(self.forward(data), 3, dim=1)[1]
            predictions = (prediction_to_words(p) for p in predictions)
            if out:
                df = pd.DataFrame({'key_id': ids, 'word': predictions})# dtype={'key_id': np.int64, 'word': np.array})
                # df['predictions'] = df['predictions'].map(prediction_to_words)
                df.to_csv(f'{out_small}_{i}.csv', index=False)
            else: total_predictions.append(predictions)
        if not out: return total_predictions
        else:
            total_predictions = []
            for j in range(i+1):
                total_predictions.append(pd.read_csv(f'{out_small}_{j}.csv'))
            total_predictions = pd.concat(total_predictions)
            total_predictions.to_csv(out, index=False)

# le = preprocessing.LabelEncoder()
class MyDataset():
    def __init__(self, data, targets=None, ids=None, train=True):
        self.data = data
        self.train = train
        if train: 
            self.targets = targets.map(lambda target: classes[target])
        if ids is not None: self.ids = ids
    def __len__(self):
        return len(self.data)
    def __getitem__(self, i):
        img = 1 - vector_to_numpy(self.data.loc[i], side=224)//255
        if self.train: return (torch.tensor(img, dtype=torch.float32).reshape((1, *img.shape))), torch.tensor(self.targets[i], dtype=torch.int64)
        if self.ids is not None: return (torch.tensor(img, dtype=torch.float32).reshape((1, *img.shape))), torch.tensor(self.ids[i], dtype=torch.int64)
        return (torch.tensor(img, dtype=torch.float32).reshape((1, *img.shape)))

In [78]:
i = 0
vgg_length = len(list(vgg16.features.parameters()))
for param in vgg16.features.parameters():
    if i < vgg_length - 2: param.requires_grad = False

In [79]:
class VGG16(nn.Module):
    def __init__(self):
        nn.Module.__init__(self)
        self.convolutions = vgg16
        self.extra = nn.Sequential(
            nn.Linear(1000, 101), # This was my 1st guess at the no of outputs in VGG16, I was so surprised when it worked
            nn.Softmax(dim=1)
        )
    def forward(self, inputs):
        # inputs = inputs.reshape([inputs.shape[1], inputs.shape[0], 256, 256])
        # print(type(inputs))
        inputs = transform(inputs).to(device=device)
        x = self.convolutions(inputs)
        x = self.extra(x)
        # Functions in fully connected layer are run
        return x
    def predict(self, test_loader, out='SUBMISSION.csv', out_small='sub', i_0=0): # out = None may not be implemented
        """
        Returns the predictions in a csv chosen by out, i_0 is in case you crash and have already done some stuff
        """
        self.eval()
        if not out: total_predictions=[]
        for i, (data, ids) in enumerate(iter(test_loader)):
            if i_0 > i: continue
            predictions = torch.topk(self.forward(data), 3, dim=1)[1]
            predictions = (prediction_to_words(p) for p in predictions)
            if out:
                df = pd.DataFrame({'key_id': ids, 'word': predictions})# dtype={'key_id': np.int64, 'word': np.array})
                # df['predictions'] = df['predictions'].map(prediction_to_words)
                df.to_csv(f'{out_small}_{i}.csv', index=False)
            else: total_predictions.append(predictions)
        if not out: return total_predictions
        else:
            total_predictions = []
            for j in range(i+1):
                total_predictions.append(pd.read_csv(f'{out_small}_{j}.csv'))
            total_predictions = pd.concat(total_predictions)
            total_predictions.to_csv(out, index=False)

In [80]:
# Defining one epoch of training
def train(model, train_loader, optimizer, loss, ps=50):
    # We train the appropriate model with the input data and the appropriate optimizer
    # ps is how often we print the accuracy
    train_iter = iter(train_loader)
    model.train()
    # Puts model in train mode
    for i, (data, targets) in enumerate(train_iter):
        # i is iteration, data = 1 mini batch of images, targets = 1 mini batch target values
        # This repeats for all mini batches 
        # print(targets)
        targets = targets.to(device)
        outputs = model.forward(data) # Forward pass
        loss_val = loss(outputs, targets) # Loss computation
        # print(targets)
        optimizer.zero_grad()  # Ensures gradients stored in optimizer are reset before each backward pass
        loss_val.backward() # Backward pass
        optimizer.step() # Backward pass

        if ps and i % ps == 0:
            model.eval()
            # Puts model in evaluation mode, so we 
            with torch.no_grad():
                print(f"Loss is {loss_val}")
                predicted = outputs.max(1)[1]
                correct = (predicted == targets).sum().item()
                accuracy = correct/len(targets)
                print(f"Train accuracy is {accuracy*100:.3f}%")
def accuracy(model, test):
    # Evaluate a model given a test loader
    model.eval()
    with torch.no_grad():
        count = 0
        correct = 0
        for data, targets in iter(test):
            targets = targets.to(device)
            outputs = model.forward(data)
            predicted = outputs.max(1)[1] # Maximum output is predicted class
            count += len(targets) # Total length of datasetS
            correct += (predicted == targets).sum().item()
            # This gives a tensor of True and False values and adds no. of True values to correct each iteration
        # print((predicted == targets).sum().item())
        accuracy = correct/count
        return accuracy

In [81]:
basic_cnn = BasicCNN()
cnn_optimizer = torch.optim.Adam(basic_cnn.parameters(), lr=2e-3)
loss_fn = nn.CrossEntropyLoss()

In [82]:
for train_index, val_index in KFold(n_splits=5).split(df['drawing'], df['word']):
    df_train = df.loc[train_index].reset_index().drop('index', axis=1)
    df_val = df.loc[val_index].reset_index().drop('index', axis=1)
    break
train_dataset = MyDataset(df_train['drawing'], df_train['word'])

In [83]:
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)

In [84]:
# print(next(iter(train_loader)))

In [85]:
# train_loader.targets.loc[0]
df_train

Unnamed: 0,drawing,key_id,word,recognized
0,"[[[255, 137, 0], [0, 82, 214]]]",5940930873917440,lightning,True
1,"[[[24, 70, 75, 74, 66, 26, 163, 255, 228, 211,...",6284814711783424,pillow,False
2,"[[[3, 3, 37, 110, 146, 120, 53, 31, 0], [53, 2...",5973976620728320,lightning,False
3,"[[[80, 72, 51, 36, 19, 6, 1, 0, 9, 16, 34, 82,...",6722227842056192,apple,True
4,"[[[1, 27, 57, 99, 136, 196, 175, 149, 116, 98,...",5491753161326592,pillow,False
...,...,...,...,...
7995,"[[[69, 71, 80], [129, 176, 255]], [[82, 77, 76...",5642310320652288,sword,False
7996,"[[[13, 0], [99, 155]], [[83, 66], [117, 184]],...",6698949417107456,bed,False
7997,"[[[74, 55, 49, 50, 59, 65, 74, 102, 121, 159, ...",6553356954763264,elephant,False
7998,"[[[15, 8, 1], [22, 43, 100]], [[7, 24, 15], [6...",6261791757697024,helmet,False


In [86]:
# train(basic_cnn, train_loader, cnn_optimizer, loss_fn)
# torch.save(basic_cnn.state_dict(), 'first_model.pt')

In [87]:
val_set = MyDataset(df_val['drawing'], df_val['word'])
val_loader = DataLoader(val_set, batch_size=128)
# print(accuracy(basic_cnn, val_loader))/

In [88]:
dft = pd.read_csv('doodle-detectives-aiclubiitm/test.csv')#, dtype={'drawing': np.array})
# dft['drawing'] = dft['drawing'].map(vector_to_numpy)


In [89]:
test_dataset = MyDataset(dft['drawing'], train=False, ids=dft['key_id'])
test_loader = DataLoader(test_dataset, batch_size=128, drop_last=False)

In [90]:
vgg16_model = VGG16()
vgg16_model.to(device)
vgg16_optim = torch.optim.Adam(vgg16_model.parameters(), lr=2e-3)
vgg_loss = nn.CrossEntropyLoss().to(device)

In [91]:
train(vgg16_model, train_loader, vgg16_optim, vgg_loss)

Loss is 4.616662979125977
Train accuracy is 0.781%


KeyboardInterrupt: 

In [None]:
vgg16_model.predict(test_loader)

In [None]:
print(accuracy(vgg16_model, val_loader))

0.0115


In [None]:
basic_cnn_2 = BasicCNN()
basic_cnn_2.load_state_dict(torch.load('first_model.pt'))
basic_cnn_2.predict(test_loader)
basic_cnn_3 = BasicCNN()
# train(basic_cnn_3, train_loader, cnn_optimizer, loss_fn, ps=2)

# with torch.no_grad():
#     print(basic_cnn_2.forward(next(iter(test_loader))[0]))
# # basic_cnn.eval()
# with torch.no_grad():
#     count = 0
#     correct = 0
#     for data, targets in iter(dataloader):
#         outputs = model.forward(data)
#         predicted = outputs.max(1)[1] # Maximum output is predicted class
#         count += len(targets) # Total length of datasetS
#         correct += (predicted == targets).sum().item()
#         # This gives a tensor of True and False values and adds no. of True values to correct each iteration
#     print((predicted == targets).sum().item())
#     accuracy = correct/count
#     return accuracy


KeyboardInterrupt: 

In [None]:
# basic_cnn_3.predict(test_loader)

KeyboardInterrupt: 