In [None]:
import torchvision.transforms.functional as TF
import torch.nn.functional as F
import os
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm 
from sklearn import metrics
from PIL import Image
from torch import nn 
from torch.utils.data import Dataset, DataLoader 

In [None]:
img_type = "png"
batch_size = 8
num_epochs = 10
learning_rate = 1e-03
classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
def getImages(path):
    images = []
    cwd = os.getcwd()
    print(os.listdir(cwd))
    for _class in os.listdir(cwd + "\\" + path):
        img_dir = path + "\\" + _class
        for img in os.listdir(img_dir):
            img_path = img_dir + "\\" + img
            images.append([img_path, img_type, int(_class)])

    return pd.DataFrame(images, columns=["path", "type", "class"])

train_images = getImages("dataset\\mnist_png\\training")
test_images = getImages("dataset\\mnist_png\\testing")

train_images

In [None]:
class Img(Dataset):
    def __init__(self, img_df):
        self.img_df = img_df

    def __len__(self):
        return len(self.img_df)

    def __getitem__(self, idx):
        img_path = self.img_df['path'].iloc[idx]
        image = Image.open(img_path)
        image_tesor = TF.to_tensor(image)
        
        label = self.img_df['class'].iloc[idx]
        
        return image_tesor, label

train_img = Img(img_df = train_images)
test_img = Img(img_df = test_images)

train_loader = DataLoader(dataset=train_img, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_img, batch_size=batch_size, shuffle=True)

In [None]:
class LeNet(nn.Module):

    def __init__(self):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.max_pool2d(x, (2,2))
        x = self.conv2(x)
        x = F.max_pool2d(x, (2,2))
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)

        return x

model = LeNet().to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
def predictDataset(loader, model, savefig=False):
    actual = np.array([])
    prediction = np.array([])
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, predictions = scores.max(1)
            actual = np.append(actual, y.cpu().detach().numpy())
            prediction = np.append(prediction, predictions.cpu().detach().numpy())
    
    return actual, prediction

actual, prediction = predictDataset(train_loader, model)
actual, prediction

In [None]:
def drawConfusionMatrix(actual, prediction, normalize=True, savefig=False):
    confusion_matrix = metrics.confusion_matrix(actual, prediction)
    cmn = confusion_matrix.astype('float') / confusion_matrix.sum(axis=1)[:, np.newaxis]
    
    if normalize:
        confusion_matrix = cmn
    
    cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix)
    cm_display.plot()
    plt.show()
    
    if savefig:
        plt.savefig('confusion_matrix.png')
        
drawConfusionMatrix(actual, prediction, normalize=True)

In [None]:
def displayImgs(imgs, labels, classes, n, save=False):
    if n > 32:
        n = 32
    fig = plt.figure(figsize=(20,11))
    for i, tensor in enumerate(imgs[:n]):
        img = torch.zeros(28, 28)
        img[:,:] = tensor  
        fig.add_subplot(4, 8, i + 1).title.set_text(classes[labels[i]])
        plt.imshow(img)
    plt.subplots_adjust(wspace=0.2)
    plt.show()
    if save:
        fig.savefig('fig.png')

with torch.no_grad():
    for x, y in train_loader:
        x = x.to(device=device)
        y = y.to(device=device)
        x_ = model(x)
        
        _, model_pred = x_.max(1)
        
        displayImgs(x, model_pred, classes=classes, n=batch_size)

        break

In [None]:
def evaluateTorch(loader, model):
    actuals = []
    predictions = []
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)

            scores = model(x)
            _, model_pred = scores.max(1)
            
            actuals.append(y)
            predictions.append(model_pred)
            
    actuals = torch.cat(actuals)
    predictions = torch.cat(predictions)

    oa = (actuals == predictions).sum() / len(actuals)    
    model.train()

    return oa.item()

In [None]:
accuracy = []
epochs = []
for epoch in range(num_epochs):
    batch_loss = np.array([])
    loop = tqdm(enumerate(train_loader), total=len(train_loader))
    for batch_idx, (data, targets) in loop:
        data = data.to(device=device)
        targets = targets.to(device=device)

        scores = model(data)
        loss = criterion(scores, targets)

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
        predictions = torch.argmax(scores, dim=1)
               
        loop.set_description(f"Epoch[{epoch}/{num_epochs - 1}]")
        
        batch_loss = np.append(batch_loss, loss.item())

    accuracy.append(evaluateTorch(test_loader, model))
    epochs.append(epoch)

    print("loss: ", batch_loss[-1])
    print("acc: ", accuracy[-1])

plt.plot(epochs, accuracy, label='accuracy')
plt.ylim(0, 1.1)
plt.title("Overal accuracy")

In [None]:
actual, prediction = predictDataset(test_loader, model)

drawConfusionMatrix(actual, prediction, normalize=True)

In [None]:
with torch.no_grad():
    for x, y in train_loader:
        x = x.to(device=device)
        y = y.to(device=device)
        x_ = model(x)
        
        _, model_pred = x_.max(1)
        
        displayImgs(x, model_pred, classes=classes, n=batch_size)

        break