In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb

import torch
import torch.nn as nn
from torchvision import datasets
import torchvision.transforms as transforms
from torchvision.models import resnet34
from torch.utils.data import DataLoader

from sklearn.metrics import confusion_matrix, f1_score
import math
from tqdm import tqdm

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
# convert data to a normalized torch.FloatTensor
transform = transforms.Compose([    transforms.ToTensor(), transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010])
])


In [4]:
# loading the train data
batch_size = 100
#drop_last=True
train_data = datasets.CIFAR10('data', train=True,
                              download=True, transform=transform)
train_dataloader = DataLoader(train_data, batch_size=batch_size,shuffle=True )

#loading the test data
test_data = datasets.CIFAR10('data', train=False,
                             download=True, transform=transform)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True )
# You should define x_train and y_train

Files already downloaded and verified
Files already downloaded and verified


### Dense (fully connected) layer

In [5]:
class Dense:
    def __init__(self, n_inputs, n_neurons):
        # He Weight Initialization
        self.weights = torch.randn(n_inputs, n_neurons) * torch.sqrt(torch.tensor(2. / n_inputs))
        self.biases = torch.zeros((1, n_neurons))
        self.weights = self.weights.to(device)
        self.biases = self.biases.to(device)
    
    def forward(self, inputs):
        self.inputs = inputs
        return torch.matmul(self.inputs, self.weights) + self.biases

    def backward(self, output_error):
        # calculating errors
        self.inputs_error = torch.matmul(output_error, self.weights.T)
        self.weights_grad = torch.matmul(self.inputs.T, output_error)
        self.biases_grad = torch.sum(output_error, axis=0, keepdims=True)
        return self.inputs_error


### Activation Layers


In [6]:
class ReLU:
    def forward(self, inputs):
        self.inputs = inputs.clone()
        return torch.maximum(torch.zeros_like(inputs), inputs)

    def backward(self, output_error):
        self.inputs_error = output_error.clone()
        self.inputs_error[self.inputs <= 0] = 0
        return self.inputs_error


In [7]:
class Sigmoid:
    def forward(self,inputs):
        self.outputs = 1 / (1 + np.exp(-inputs))
        return self.outputs

    def backward(self,output_error):
        self.outputs_grad = output_error * (1-self.outputs) * self.outputs 
        return self.outputs_grad

In [8]:
class Softmax:
    def forward(self, inputs):
        self.inputs = inputs.clone()
        exp_inputs = torch.exp(self.inputs - torch.max(self.inputs, dim=1, keepdim=True).values)
        self.outputs = exp_inputs / torch.sum(exp_inputs, dim=1, keepdim=True)
        return self.outputs



### Loss function

In [9]:
class Categorical_Cross_Entropy_loss:
    
    def forward(self, softmax_output, class_label):
        # Apply softmax function to the output
        self.softmax_output = torch.clamp(softmax_output, 1e-12, 1. - 1e-12)
        N = softmax_output.shape[0]
        # Convert the class label to one-hot encoding
        self.class_label = torch.zeros_like(softmax_output)
        self.class_label[torch.arange(N), class_label] = 1
        # Calculate the cross-entropy loss
        self.loss = -torch.sum(self.class_label * torch.log(self.softmax_output)) / N
        return self.loss
    
    def backward(self, class_label):
        # Calculate the derivative of the loss with respect to the softmax output
        N = self.softmax_output.shape[0]
        self.d_inputs = self.softmax_output.clone()
        self.d_inputs[torch.arange(N), class_label] -= 1
        self.d_inputs = self.d_inputs / N
        return self.d_inputs


### Optimizer

In [10]:
class FactorScheduler:
    def __init__(self, factor=1, stop_factor_lr=1e-7, base_lr=0.01):
        self.factor = factor
        self.stop_factor_lr = stop_factor_lr
        self.base_lr = base_lr

    def __call__(self, num_update):
        self.base_lr = max(self.stop_factor_lr, self.base_lr * self.factor)
        return self.base_lr

In [11]:
class SGD:
    def __init__(self, learning_rate=0.01):
        self.learning_rate=learning_rate
        self.scheduler = FactorScheduler(factor=0.9, stop_factor_lr=1e-2, base_lr=learning_rate)
    
    def __call__(self, layer, num_epoch):
        device = layer.weights.device
        layer.weights = layer.weights.to(device)
        layer.biases = layer.biases.to(device)
        layer.weights -= self.scheduler(num_epoch) * layer.weights_grad.to(device)
        layer.biases -= self.scheduler(num_epoch) * layer.biases_grad.to(device)


### Architecture

In [12]:
feature_extractor = resnet34(pretrained=True)
num_features = feature_extractor.fc.in_features

for param in feature_extractor.parameters():
    param.requires_grad = False

feature_extractor.fc = nn.Identity()
feature_extractor.to(device)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [13]:
#model
Layer1 = Dense(num_features,20)
Act1 = ReLU()
Layer2 = Dense(20,10)
Act2 = Softmax()
Loss = Categorical_Cross_Entropy_loss()
Optimizer = SGD()

### Train

In [16]:


epochs = 20

for epoch in range(epochs):
    epoch_loss = 0
    epoch_accuracy = 0
    for x_train, y_train in tqdm(train_dataloader, desc=f"Epoch {epoch+1}", colour="blue"):
        # Forward pass
        x_train, y_train = x_train.to(device), y_train.to(device)
        x = feature_extractor(x_train)
        x = Layer1.forward(x)
        x = Act1.forward(x)
        x = Layer2.forward(x)
        x = Act2.forward(x)
        loss = Loss.forward(x, y_train)

        # Report batch metrics
        y_predict = torch.argmax(x, dim=1)
        accuracy = torch.mean((y_train == y_predict).float())
        epoch_loss += loss.item()
        epoch_accuracy += accuracy.item()

        # Backward pass
        x = Loss.backward(y_train)
        #x = Act2.backward(x)
        x = Layer2.backward(x)
        x = Act1.backward(x)
        x = Layer1.backward(x)

        # Update parameters
        Optimizer(Layer1, epoch)
        Optimizer(Layer2, epoch)

    # Report epoch metrics
    epoch_loss /= len(train_dataloader)
    epoch_accuracy /= len(train_dataloader)
    print(f'Epoch: {epoch+1}')
    print(f'Loss: {epoch_loss:.7f}')
    print(f'Accuracy: {epoch_accuracy:.7f}')
    print('--------------------------')


Epoch 1: 100%|[34m███████████████████████████████████████████████████████████████████████[0m| 500/500 [00:13<00:00, 37.73it/s][0m


Epoch: 1
Loss: 1.4843126
Accuracy: 0.4802200
--------------------------


Epoch 2: 100%|[34m███████████████████████████████████████████████████████████████████████[0m| 500/500 [00:13<00:00, 38.25it/s][0m


Epoch: 2
Loss: 1.4841807
Accuracy: 0.4837600
--------------------------


Epoch 3: 100%|[34m███████████████████████████████████████████████████████████████████████[0m| 500/500 [00:13<00:00, 37.60it/s][0m


Epoch: 3
Loss: 1.4787741
Accuracy: 0.4836400
--------------------------


Epoch 4: 100%|[34m███████████████████████████████████████████████████████████████████████[0m| 500/500 [00:12<00:00, 38.80it/s][0m


Epoch: 4
Loss: 1.4815349
Accuracy: 0.4824000
--------------------------


Epoch 5: 100%|[34m███████████████████████████████████████████████████████████████████████[0m| 500/500 [00:12<00:00, 38.84it/s][0m


Epoch: 5
Loss: 1.4781501
Accuracy: 0.4807800
--------------------------


Epoch 6: 100%|[34m███████████████████████████████████████████████████████████████████████[0m| 500/500 [00:13<00:00, 37.17it/s][0m


Epoch: 6
Loss: 1.4756937
Accuracy: 0.4822200
--------------------------


Epoch 7: 100%|[34m███████████████████████████████████████████████████████████████████████[0m| 500/500 [00:13<00:00, 37.81it/s][0m


Epoch: 7
Loss: 1.4730776
Accuracy: 0.4851600
--------------------------


Epoch 8: 100%|[34m███████████████████████████████████████████████████████████████████████[0m| 500/500 [00:13<00:00, 37.39it/s][0m


Epoch: 8
Loss: 1.4694630
Accuracy: 0.4869600
--------------------------


Epoch 9: 100%|[34m███████████████████████████████████████████████████████████████████████[0m| 500/500 [00:12<00:00, 38.60it/s][0m


Epoch: 9
Loss: 1.4715542
Accuracy: 0.4869600
--------------------------


Epoch 10: 100%|[34m██████████████████████████████████████████████████████████████████████[0m| 500/500 [00:12<00:00, 38.80it/s][0m


Epoch: 10
Loss: 1.4744176
Accuracy: 0.4833200
--------------------------


Epoch 11: 100%|[34m██████████████████████████████████████████████████████████████████████[0m| 500/500 [00:15<00:00, 31.43it/s][0m


Epoch: 11
Loss: 1.4664201
Accuracy: 0.4867200
--------------------------


Epoch 12: 100%|[34m██████████████████████████████████████████████████████████████████████[0m| 500/500 [00:14<00:00, 33.54it/s][0m


Epoch: 12
Loss: 1.4683230
Accuracy: 0.4859200
--------------------------


Epoch 13: 100%|[34m██████████████████████████████████████████████████████████████████████[0m| 500/500 [00:13<00:00, 37.06it/s][0m


Epoch: 13
Loss: 1.4670257
Accuracy: 0.4851800
--------------------------


Epoch 14: 100%|[34m██████████████████████████████████████████████████████████████████████[0m| 500/500 [00:12<00:00, 38.86it/s][0m


Epoch: 14
Loss: 1.4643761
Accuracy: 0.4912000
--------------------------


Epoch 15: 100%|[34m██████████████████████████████████████████████████████████████████████[0m| 500/500 [00:13<00:00, 37.42it/s][0m


Epoch: 15
Loss: 1.4616177
Accuracy: 0.4915000
--------------------------


Epoch 16: 100%|[34m██████████████████████████████████████████████████████████████████████[0m| 500/500 [00:13<00:00, 37.05it/s][0m


Epoch: 16
Loss: 1.4631858
Accuracy: 0.4884800
--------------------------


Epoch 17: 100%|[34m██████████████████████████████████████████████████████████████████████[0m| 500/500 [00:13<00:00, 37.72it/s][0m


Epoch: 17
Loss: 1.4546152
Accuracy: 0.4911600
--------------------------


Epoch 18: 100%|[34m██████████████████████████████████████████████████████████████████████[0m| 500/500 [00:12<00:00, 38.76it/s][0m


Epoch: 18
Loss: 1.4594784
Accuracy: 0.4895600
--------------------------


Epoch 19: 100%|[34m██████████████████████████████████████████████████████████████████████[0m| 500/500 [00:12<00:00, 39.41it/s][0m


Epoch: 19
Loss: 1.4539921
Accuracy: 0.4950000
--------------------------


Epoch 20: 100%|[34m██████████████████████████████████████████████████████████████████████[0m| 500/500 [00:13<00:00, 36.44it/s][0m

Epoch: 20
Loss: 1.4597016
Accuracy: 0.4905600
--------------------------





### Evaluation

In [15]:
#Confusion Matrix for the training set
cm_train = confusion_matrix(y_train, y_predict)
plt.subplots(figsize=(10, 6))
sb.heatmap(cm_train, annot = True, fmt = 'g')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix for the training set")
plt.show()

#Confusion Matrix for the test set
# // To Do

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.