In [1]:
import numpy as np
from sklearn.metrics import f1_score
import pandas as pd

Load MNIST data, including rotated one

In [2]:
data = pd.read_csv("../data/Mnist/mnist_train.csv")
test_data = pd.read_csv("../data/Mnist/mnist_test.csv")
test_rotated_data = pd.read_csv("../data/Mnist/mnist_test_rotated.csv")

In [3]:
from numpy import float32
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.functional import relu

# Define your CNN model
class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        self.linear1 = nn.Linear(in_features = 784, out_features = 250)
        self.linear2 = nn.Linear(in_features = 250, out_features = 100)
        self.linear3 = nn.Linear(in_features = 100, out_features = 10)

    def forward(self, x):
        x = relu(self.linear1(x))
        x = relu(self.linear2(x))
        x = self.linear3(x)
        return x

# Define a custom dataset class
class ImageDFDataset(Dataset):
    def __init__(self, df:pd.DataFrame):
        self.images = df.drop(columns= 'label').to_numpy(float32)
        self.labels = df['label'].to_numpy()

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx:int):
        image = self.images[idx]
        label = self.labels[idx]
        return image, label

In [4]:
# Convert the pandas DataFrame into a PyTorch dataset
dataset = ImageDFDataset(data)

# Create a dataloader for batching and shuffling the data
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Create an instance of your CNN model
model = CNNClassifier()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

# Training loop
for epoch in range(num_epochs):
    for batch in dataloader:
        inputs, labels = batch

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print the loss for monitoring the training progress
        print(f"Epoch: {epoch+1}, Loss: {loss.item()}")

Epoch: 1, Loss: 11.738248825073242
Epoch: 1, Loss: 15.029151916503906
Epoch: 1, Loss: 10.117426872253418
Epoch: 1, Loss: 9.983976364135742
Epoch: 1, Loss: 8.015982627868652
Epoch: 1, Loss: 4.279629230499268
Epoch: 1, Loss: 4.565290451049805
Epoch: 1, Loss: 2.3067567348480225
Epoch: 1, Loss: 2.7459073066711426
Epoch: 1, Loss: 4.46047830581665
Epoch: 1, Loss: 3.579508066177368
Epoch: 1, Loss: 2.2302181720733643
Epoch: 1, Loss: 1.0562212467193604
Epoch: 1, Loss: 1.3400907516479492
Epoch: 1, Loss: 2.1255152225494385
Epoch: 1, Loss: 1.6428207159042358
Epoch: 1, Loss: 1.2575607299804688
Epoch: 1, Loss: 1.2997863292694092
Epoch: 1, Loss: 1.7129155397415161
Epoch: 1, Loss: 1.116442322731018
Epoch: 1, Loss: 1.4313509464263916
Epoch: 1, Loss: 0.8922855854034424
Epoch: 1, Loss: 0.43140387535095215
Epoch: 1, Loss: 1.028207778930664
Epoch: 1, Loss: 1.794409990310669
Epoch: 1, Loss: 0.6433624029159546
Epoch: 1, Loss: 1.1504825353622437
Epoch: 1, Loss: 0.9220772981643677
Epoch: 1, Loss: 0.47056728601

KeyboardInterrupt: 

In [None]:
# Convert the pandas DataFrame into a PyTorch dataset
test_dataset = ImageDFDataset(test_data)

# Create a dataloader for batching and shuffling the data
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [None]:
# prepare to count predictions for each class
classes = tuple([x for x in range(0,10,1)])
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in test_dataloader:
        images, labels = data
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1

# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname} is {accuracy:.1f} %')

Accuracy for class: 0 is 98.0 %
Accuracy for class: 1 is 98.1 %
Accuracy for class: 2 is 96.2 %
Accuracy for class: 3 is 96.6 %
Accuracy for class: 4 is 93.9 %
Accuracy for class: 5 is 95.7 %
Accuracy for class: 6 is 97.9 %
Accuracy for class: 7 is 97.4 %
Accuracy for class: 8 is 97.5 %
Accuracy for class: 9 is 97.2 %


In [None]:
# Convert the pandas DataFrame into a PyTorch dataset
test_rotated_dataset = ImageDFDataset(test_rotated_data)

# Create a dataloader for batching and shuffling the data
test_dataloader = DataLoader(test_rotated_dataset, batch_size=32, shuffle=True)

In [None]:
# prepare to count predictions for each class
classes = tuple([x for x in range(0,10,1)])
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in test_dataloader:
        images, labels = data
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1

# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname} is {accuracy:.1f} %')

Accuracy for class: 0 is 51.9 %
Accuracy for class: 1 is 0.1 %
Accuracy for class: 2 is 10.0 %
Accuracy for class: 3 is 6.3 %
Accuracy for class: 4 is 3.1 %
Accuracy for class: 5 is 3.4 %
Accuracy for class: 6 is 19.6 %
Accuracy for class: 7 is 4.1 %
Accuracy for class: 8 is 20.9 %
Accuracy for class: 9 is 8.0 %


In [None]:
torch.save(model,"CNN_base.torch")

In [6]:
def unpickle(file:str):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

def cifarToDf (path:str = "../data/cifar-10-python\data_batch_1"):
    cifar = unpickle(path)
    cifar_df = pd.DataFrame(cifar[b'data'])
    cifar_df['label'] = cifar[b'labels']

    return cifar_df

In [7]:
cifarToDf()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3063,3064,3065,3066,3067,3068,3069,3070,3071,label
0,59,43,50,68,98,119,139,145,149,149,...,58,65,59,46,57,104,140,84,72,6
1,154,126,105,102,125,155,172,180,142,111,...,42,67,101,122,133,136,139,142,144,9
2,255,253,253,253,253,253,253,253,253,253,...,83,80,69,66,72,79,83,83,84,9
3,28,37,38,42,44,40,40,24,32,43,...,39,59,42,44,48,38,28,37,46,4
4,170,168,177,183,181,177,181,184,189,189,...,88,85,82,83,79,78,82,78,80,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,18,18,21,23,24,23,24,25,22,19,...,24,33,41,45,39,39,39,34,37,3
9996,235,240,249,253,254,253,254,254,179,108,...,89,113,139,163,174,179,182,181,181,9
9997,71,60,74,73,87,125,179,231,250,252,...,70,67,66,70,81,78,68,69,68,1
9998,250,254,211,64,61,60,55,54,55,59,...,134,132,130,127,132,133,215,255,254,1


55.42562584220407

In [8]:
from numpy import float32
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torch.nn.functional import relu

# Define your CNN model
class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        self.linear1 = nn.Linear(in_features = 784, out_features = 250)
        self.linear2 = nn.Linear(in_features = 250, out_features = 100)
        self.linear3 = nn.Linear(in_features = 100, out_features = 10)

    def forward(self, x):
        x = relu(self.linear1(x))
        x = relu(self.linear2(x))
        x = self.linear3(x)
        return x

# Define a custom dataset class
class ImageDFDataset(Dataset):
    def __init__(self, df:pd.DataFrame):
        self.images = df.drop(columns= 'label').to_numpy(float32)
        self.labels = df['label'].to_numpy()

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx:int):
        image = self.images[idx]
        label = self.labels[idx]
        return image, label

In [9]:
# Convert the pandas DataFrame into a PyTorch dataset
dataset = ImageDFDataset(data)

# Create a dataloader for batching and shuffling the data
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Create an instance of your CNN model
model = CNNClassifier()

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 10

# Training loop
for epoch in range(num_epochs):
    for batch in dataloader:
        inputs, labels = batch

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print the loss for monitoring the training progress
        print(f"Epoch: {epoch+1}, Loss: {loss.item()}")

Epoch: 1, Loss: 11.591334342956543
Epoch: 1, Loss: 11.382132530212402
Epoch: 1, Loss: 6.035768508911133
Epoch: 1, Loss: 5.080824375152588
Epoch: 1, Loss: 4.796090126037598
Epoch: 1, Loss: 3.1108062267303467
Epoch: 1, Loss: 3.3209681510925293
Epoch: 1, Loss: 3.955085277557373
Epoch: 1, Loss: 3.001190185546875
Epoch: 1, Loss: 1.8426002264022827
Epoch: 1, Loss: 2.727224111557007
Epoch: 1, Loss: 2.3276119232177734
Epoch: 1, Loss: 2.358344316482544
Epoch: 1, Loss: 0.875190794467926
Epoch: 1, Loss: 1.5460623502731323
Epoch: 1, Loss: 2.2510955333709717
Epoch: 1, Loss: 1.100217580795288
Epoch: 1, Loss: 0.7142276167869568
Epoch: 1, Loss: 0.8241528272628784
Epoch: 1, Loss: 0.6280162334442139
Epoch: 1, Loss: 1.7715513706207275
Epoch: 1, Loss: 0.7781990766525269
Epoch: 1, Loss: 0.8507561683654785
Epoch: 1, Loss: 1.185301661491394
Epoch: 1, Loss: 1.2569687366485596
Epoch: 1, Loss: 0.68179851770401
Epoch: 1, Loss: 0.22071276605129242
Epoch: 1, Loss: 0.5721359252929688
Epoch: 1, Loss: 0.3771985769271

KeyboardInterrupt: 

In [None]:
import torchvision
c = torchvision.datasets.CIFAR10

In [None]:
c._check_integrity

<function torchvision.datasets.cifar.CIFAR10._check_integrity(self) -> bool>