In [1]:
import time
import copy
import random
import glob

import cv2
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision

from tqdm import tqdm
import matplotlib.pyplot as plt

In [2]:
path = './digits2/train/'

le = 0
for i in range(10):
    paths = glob.glob(path + str(i) + '/*.png')
    le += len(paths)

x_train = torch.zeros((le, 1, 12, 10), dtype=torch.float32)
y_train = torch.zeros((le), dtype=torch.long)

idx = 0
for i in range(10):
    paths = glob.glob(path + str(i) + '/*.png')
    for p in paths:
        img = cv2.imread(p, 0)
#         img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 5, 1)
        
        img = np.true_divide(img, 255.0)
#         img = img.flatten()
        
        x_train[idx, 0] = torch.tensor(img)
        y_train[idx] = i
        
        idx += 1

In [3]:
path = './digits2/test/'

le = 0
for i in range(10):
    paths = glob.glob(path + str(i) + '/*.png')
    le += len(paths)

x_test = torch.zeros((le, 1, 12, 10), dtype=torch.float32)
y_test = torch.zeros((le), dtype=torch.long)

idx = 0
for i in range(10):
    paths = glob.glob(path + str(i) + '/*.png')
    for p in paths:
        img = cv2.imread(p, 0)
#         img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 5, 1)
        
        img = np.true_divide(img, 255.0)
#         img = img.flatten()
                
        x_test[idx, 0] = torch.tensor(img)
        y_test[idx] = i
        idx += 1 

In [4]:
print(x_train.shape) 
print(x_test.shape) 
print(y_train.shape) 
print(y_test.shape) 

torch.Size([47, 1, 12, 10])
torch.Size([33, 1, 12, 10])
torch.Size([47])
torch.Size([33])


In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [6]:
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(1, 32, 2)
        self.bn1 = nn.BatchNorm2d(32)
        
        self.conv2 = nn.Conv2d(32, 64, 2)
        self.bn2 = nn.BatchNorm2d(64)
        
        self.avgpool = nn.AdaptiveMaxPool2d(output_size=(1, 1))
        
        self.fc = nn.Linear(64, 10)
        self.act = nn.ReLU()
        self.pool = nn.MaxPool2d(2)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.act(x)
        x = self.pool(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.act(x)
        x = self.avgpool(x)
        
        x = torch.flatten(x, 1) 
        x = self.fc(x)

        return x

In [7]:
class DS(torch.utils.data.Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return len(self.x)

In [8]:
train_dataset = DS(x_train, y_train)
test_dataset = DS(x_test, y_test)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=4, num_workers=4, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=4, num_workers=4, shuffle=True)

In [9]:
model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0146)
criteria = nn.CrossEntropyLoss()

In [10]:
for e in range(96):
    avg_loss = 0
    for imgs, labels in train_dataloader:
        optimizer.zero_grad()
        
        imgs = imgs.to(device)
        labels = labels.to(device)
                
        output = model(imgs)
        
        loss = criteria(output, labels)
        avg_loss += loss.item()
        
        loss.backward()
        optimizer.step()
    
    print(f'{e} - {round(avg_loss / len(train_dataloader), 5)}')

0 - 3.00963
1 - 2.14787
2 - 1.2614
3 - 0.80654
4 - 0.53728
5 - 0.3269
6 - 0.3044
7 - 0.17165
8 - 0.12322
9 - 0.05366
10 - 0.03847
11 - 0.04301
12 - 0.04559
13 - 0.04108
14 - 0.02565
15 - 0.03066
16 - 0.02364
17 - 0.01847
18 - 0.0244
19 - 0.01012
20 - 0.01559
21 - 0.00631
22 - 0.00437
23 - 0.00591
24 - 0.00324
25 - 0.0027
26 - 0.00445
27 - 0.00278
28 - 0.00185
29 - 0.00177
30 - 0.008
31 - 0.0026
32 - 0.0049
33 - 0.01243
34 - 0.01067
35 - 0.006
36 - 0.01344
37 - 0.00786
38 - 0.00727
39 - 0.00434
40 - 0.00376
41 - 0.00399
42 - 0.00341
43 - 0.0012
44 - 0.00167
45 - 0.0016
46 - 0.00105
47 - 0.00132
48 - 0.00139
49 - 0.00141
50 - 0.00086
51 - 0.00079
52 - 0.00048
53 - 0.00078
54 - 0.00082
55 - 0.00129
56 - 0.00566
57 - 0.00383
58 - 0.00147
59 - 0.00153
60 - 0.00089
61 - 0.00378
62 - 0.00148
63 - 0.00139
64 - 0.00054
65 - 0.00169
66 - 0.0044
67 - 0.01647
68 - 0.02885
69 - 0.30065
70 - 0.55125
71 - 0.82748
72 - 0.51953
73 - 0.18739
74 - 0.55689
75 - 0.05049
76 - 0.05685
77 - 0.05378
78 - 0.053

In [11]:
torch.save(model.state_dict(), 'digit_classifier.pt')
model = model.cpu().eval()

In [12]:
from sklearn import metrics

In [13]:
y_test_real = []
y_test_pred = []

for img, label in test_dataset:
    r = model(img.unsqueeze(0))
    r = torch.argmax(r)
    
    y_test_real.append(label.item())
    y_test_pred.append(r.item())

In [14]:
metrics.accuracy_score(y_test_real, y_test_pred)

1.0

In [15]:
print(metrics.classification_report(y_test_real, y_test_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         4
           2       1.00      1.00      1.00         3
           3       1.00      1.00      1.00         4
           4       1.00      1.00      1.00         3
           5       1.00      1.00      1.00         3
           6       1.00      1.00      1.00         4
           7       1.00      1.00      1.00         3
           8       1.00      1.00      1.00         2
           9       1.00      1.00      1.00         4

    accuracy                           1.00        33
   macro avg       1.00      1.00      1.00        33
weighted avg       1.00      1.00      1.00        33



In [16]:
print(metrics.confusion_matrix(y_test_real, y_test_pred))

[[3 0 0 0 0 0 0 0 0 0]
 [0 4 0 0 0 0 0 0 0 0]
 [0 0 3 0 0 0 0 0 0 0]
 [0 0 0 4 0 0 0 0 0 0]
 [0 0 0 0 3 0 0 0 0 0]
 [0 0 0 0 0 3 0 0 0 0]
 [0 0 0 0 0 0 4 0 0 0]
 [0 0 0 0 0 0 0 3 0 0]
 [0 0 0 0 0 0 0 0 2 0]
 [0 0 0 0 0 0 0 0 0 4]]
