## Import Data

In [15]:
import pickle
import torch
import torch.nn as nn
from datetime import datetime 
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

## Hyper Parameters

In [3]:
learning_rate = 0.001
epochs = 100
batch_size = 15

## Setup Code

In [4]:
TRAIN_IMAGE_PATH = "images_l.pkl"
TRAIN_LABEL_PATH = "labels_l.pkl"

with open(TRAIN_IMAGE_PATH, 'rb') as f: 
    training_images = pickle.load(f)

with open(TRAIN_LABEL_PATH, 'rb') as f: 
    training_labels = pickle.load(f)

In [5]:
training_batch = int(0.8 * training_images.shape[0])
validation_batch = training_images.shape[0] - training_batch

In [6]:
train_X = torch.from_numpy(training_images)
train_y = torch.from_numpy(training_labels)

data = torch.utils.data.TensorDataset(train_X, train_y)

train_data, val_data = torch.utils.data.random_split(data, [training_batch, validation_batch])
trainloader = torch.utils.data.DataLoader(dataset = train_data, shuffle = True, batch_size = batch_size)
validation_loader = torch.utils.data.DataLoader(dataset = val_data, shuffle = False, batch_size = batch_size)

## CNN Architecture

In [11]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(p=0.2)
        self.batchnorm32 = nn.BatchNorm2d(32)
        self.batchnorm64 = nn.BatchNorm2d(64)
        self.batchnorm128 = nn.BatchNorm2d(128)
        self.conv1 = nn.Conv2d(1, 32, 3)
        self.conv2 = nn.Conv2d(32, 32, 3, padding='same')
        self.conv3 = nn.Conv2d(32, 32, 3, padding='same')
        self.conv4 = nn.Conv2d(32, 64, 3, padding='same')
        self.conv5 = nn.Conv2d(64, 64, 3, padding='same')
        self.conv6 = nn.Conv2d(64, 128, 3, padding='same')
        self.conv7 = nn.Conv2d(128, 128, 3, padding='same')
        self.conv8 = nn.Conv2d(128, 128, 3, padding='same')
        self.fc1 = nn.Linear(128 * 3 * 3, 144)
        self.fc2 = nn.Linear(144, 108)
        self.fc3 = nn.Linear(108, 36)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.batchnorm32(x)
        x = self.pool(F.relu(self.conv2(x)))
        x = self.batchnorm32(x)
        x = F.relu(self.conv3(x))
        x = self.batchnorm32(x)
        x = self.dropout(x)
        x = self.pool(F.relu(self.conv4(x)))
        x = self.batchnorm64(x)
        x = F.relu(self.conv5(x))
        x = self.batchnorm64(x)
        x = self.pool(F.relu(self.conv6(x)))
        x = self.batchnorm128(x)
        x = F.relu(self.conv7(x))
        x = self.pool(F.relu(self.conv8(x)))
        x = self.batchnorm128(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

### Training the Network

In [9]:
def validate():
    correct = 0
    correctNums = 0
    correctLetters = 0
    total = 0

  # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in validation_loader:
            images, labels = data
            images = images.reshape(batch_size, 1, 56, 56)
          # calculate outputs by running images through the network
            outputs = net(images.float())
          # the class with the highest energy is what we choose as prediction
          #_, predicted = torch.max(outputs.data, 1)
            numbers = outputs.data[:, :10]
            _, predictedNums = torch.max(numbers, 1)
            _, actualNums = torch.max(labels[:, :10], 1)

            letters = outputs.data[:, 10:]
          #print(predictedNums, actualNums)

            _, predictedLetters = torch.max(letters, 1)
            _, actualLetters = torch.max(labels[:, 10:], 1)
          #print(predictedLetters, actualLetters)

            correctNums += (predictedNums == actualNums).sum().item()

            correctLetters += (predictedLetters == actualLetters).sum().item()

            total += predictedNums.shape[0]
            correct += ((predictedNums == actualNums) & (predictedLetters == actualLetters)).sum().item()

        ovr_acc = correct / total
        num_acc = correctNums / total
        let_acc = correctLetters / total

        print(f'Total Accuracy: {ovr_acc:.4f}\t'
              f'Number Accuracy: {num_acc:.4f}\t'
              f'Letter Accuracy: {let_acc:.2f}')

        final_value = [ovr_acc, num_acc, let_acc]

        return final_value

In [17]:
net = Net()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr = learning_rate)

net = net.float() 

cross_validation = {}
for epoch in range(epochs):
    running_loss = 0.0
    i = 0
    for images, labels in trainloader:
        images = images.reshape(batch_size, 1, 56, 56)
        optimizer.zero_grad()
        outputs = net(images.float())
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
        i += 1

    print(f'{datetime.now().time().replace(microsecond=0)} --- '
              f'Epoch: {epoch}')
    x = validate()
    cross_validation[epoch] = x
print("Finished Training")

02:45:34 --- Epoch: 0
Total Accuracy: 0.1435	Number Accuracy: 0.6388	Letter Accuracy: 0.23
02:48:33 --- Epoch: 1
Total Accuracy: 0.4602	Number Accuracy: 0.8192	Letter Accuracy: 0.53
02:51:26 --- Epoch: 2
Total Accuracy: 0.6723	Number Accuracy: 0.8818	Letter Accuracy: 0.72
02:53:41 --- Epoch: 3
Total Accuracy: 0.7152	Number Accuracy: 0.8912	Letter Accuracy: 0.77
02:56:31 --- Epoch: 4
Total Accuracy: 0.7738	Number Accuracy: 0.9183	Letter Accuracy: 0.81
02:58:48 --- Epoch: 5
Total Accuracy: 0.7958	Number Accuracy: 0.9195	Letter Accuracy: 0.83
03:01:44 --- Epoch: 6
Total Accuracy: 0.8045	Number Accuracy: 0.9213	Letter Accuracy: 0.84
03:04:40 --- Epoch: 7
Total Accuracy: 0.8270	Number Accuracy: 0.9255	Letter Accuracy: 0.86
03:07:39 --- Epoch: 8
Total Accuracy: 0.8417	Number Accuracy: 0.9357	Letter Accuracy: 0.87
03:10:37 --- Epoch: 9
Total Accuracy: 0.8533	Number Accuracy: 0.9383	Letter Accuracy: 0.88
03:13:37 --- Epoch: 10
Total Accuracy: 0.8573	Number Accuracy: 0.9343	Letter Accuracy: 0.8

Total Accuracy: 0.9135	Number Accuracy: 0.9578	Letter Accuracy: 0.93
06:54:28 --- Epoch: 90
Total Accuracy: 0.9155	Number Accuracy: 0.9603	Letter Accuracy: 0.93
06:57:13 --- Epoch: 91
Total Accuracy: 0.8958	Number Accuracy: 0.9468	Letter Accuracy: 0.91
06:59:48 --- Epoch: 92
Total Accuracy: 0.9102	Number Accuracy: 0.9598	Letter Accuracy: 0.92
07:02:26 --- Epoch: 93
Total Accuracy: 0.9072	Number Accuracy: 0.9557	Letter Accuracy: 0.92
07:05:09 --- Epoch: 94
Total Accuracy: 0.9115	Number Accuracy: 0.9570	Letter Accuracy: 0.92
07:07:32 --- Epoch: 95
Total Accuracy: 0.8975	Number Accuracy: 0.9550	Letter Accuracy: 0.91
07:10:24 --- Epoch: 96
Total Accuracy: 0.9078	Number Accuracy: 0.9568	Letter Accuracy: 0.92
07:12:54 --- Epoch: 97
Total Accuracy: 0.9082	Number Accuracy: 0.9570	Letter Accuracy: 0.92
07:15:45 --- Epoch: 98
Total Accuracy: 0.9062	Number Accuracy: 0.9513	Letter Accuracy: 0.92
07:18:14 --- Epoch: 99
Total Accuracy: 0.9090	Number Accuracy: 0.9570	Letter Accuracy: 0.92
Finished Tr

In [16]:
cross_validation

In [18]:
PATH = './mnist_net1.pth'
torch.save(net.state_dict(), PATH)

### Running on the Test set

In [19]:
def test():
    predictions = []
    with torch.no_grad():
        for images in testloader:
            images = images.reshape(batch_size, 1, 56, 56)
          # calculate outputs by running images through the network
            outputs = net(images.float()).data
        
            for output in outputs:
                numbers = output[:10]
                letters = output[10:]
                output[:10] = torch.where(numbers == torch.max(numbers, 0)[0], 1, 0)
                output[10:] = torch.where(letters == torch.max(letters, 0)[0], 1, 0)
                predictions.append(output)

    return predictions

In [20]:
TEST_IMAGE_PATH = "images_test.pkl"
with open(TEST_IMAGE_PATH, 'rb') as f: 
    testing_images = pickle.load(f)

test_X = torch.from_numpy(testing_images)

testloader = torch.utils.data.DataLoader(dataset = test_X, shuffle = False, batch_size = batch_size)

predictions = test()

In [22]:
import csv

pred_strings = []
for pred in predictions:
    st = "".join([str(int(x)) for x in pred.numpy()])
    pred_strings.append(st)

id = list(range(15000))
ss = list(zip(id,pred_strings))

with open('submission_log.csv', 'w', newline = '') as f:
    writer = csv.writer(f, delimiter=',')
    writer.writerow(['# Id', 'Category'])
    writer.writerows(ss)

In [22]:
values = {
    0:'0', 
    1:'1',
    2:'2', 
    3:'3', 
    4:'4',
    5:'5', 
    6:'6', 
    7:'7', 
    8:'8', 
    9:'9', 
    10:'A', 
    11:'B', 
    12:'C', 
    13:'D', 
    14:'E',
    15:'F',
    16:'G',
    17:'H',
    18:'I',
    19:'J',
    20:'K',
    21:'L',
    22:'M',
    23:'N',
    24:'O',
    25:'P',
    26:'Q',
    27:'R',
    28:'S',
    29:'T',
    30:'U',
    31:'V',
    32:'W',
    33:'X',
    34:'Y',
    35:'Z'
}
for pred in predictions[:100]:
  pred = pred.numpy().tolist()
  num = pred.index(1)
  let = pred[10:].index(1) + 10
  print(values[num], values[let])

0 T
4 L
5 J
0 M
6 J
9 Y
3 O
2 S
1 W
6 G
6 T
9 E
7 G
9 E
9 Q
5 V
8 J
7 Z
1 I
3 F
3 Q
0 I
6 Y
6 V
1 I
5 Q
6 C
6 I
4 L
8 B
7 D
6 W
2 C
0 G
4 V
4 H
0 S
4 J
5 S
8 R
4 L
3 X
4 J
1 D
8 D
8 T
6 Q
9 F
2 U
5 I
1 F
7 M
2 L
0 J
4 R
1 T
0 O
3 B
7 C
7 J
8 B
9 A
8 G
7 Y
1 B
8 R
2 L
5 N
2 J
4 J
4 D
3 X
9 J
0 Q
2 M
1 D
5 C
4 M
2 U
3 S
1 M
4 P
7 H
9 S
8 N
3 U
0 J
7 G
5 E
1 W
5 A
3 Z
7 P
1 L
0 A
2 G
4 T
5 S
8 Q
1 X
