<a href="https://colab.research.google.com/github/sancoap/pytorch1/blob/main/ANN_for_Image_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
#We will use GPU's
#We will use Torch Vision
#We will use Batch Gradient Descent instead of Full Gradient Descent
#torchvision includes MNIST dataset and Utilities for handling images

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision as torchvision
import torchvision.transforms as transforms

In [6]:
#root is the file path where we want to download the data to
#train = True, this function will return the training dataset
#transforms does some useful pre-processing for us
#download self explainatory

train_dataset = torchvision.datasets.MNIST(
    root = '.',
    train = True,
    transform = transforms.ToTensor(),
    download=True
)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


HTTPError: ignored

In [3]:
#represents input data, 3-D array, a large percentage of images are 0's only the digits are white (1)
train_dataset.data


NameError: ignored

In [4]:
#You can check the max value in the tensors, this should be 255 to indicate white?
train_dataset.data.max()


tensor(255, dtype=torch.uint8)

In [5]:
train_dataset.data.shape

torch.Size([60000, 28, 28])

In [6]:
#this should print numbers in the range 1-9
train_dataset.targets

tensor([5, 0, 4,  ..., 5, 6, 8])

In [7]:
test_dataset = torchvision.datasets.MNIST(
    root = '.',
    train = False,
    transform = transforms.ToTensor(),
    download=True
)

In [8]:
#60000 training samples and 10000 test samples
test_dataset.data.shape

torch.Size([10000, 28, 28])

In [10]:
#build the model
#We have a linear layer, followed by a Relu, followed by a linear layer, all wrapped in sequential
#28*28 = 784 which is the flattening that we have done of the input image
#not sure why we have 128 as the output layer?
#in the second neural network layer, we have to specify 128 as the input parameter of the previous layer
#10 is the output (10 digits)
#no need for a softmax at the end, its in-built with the cross-entropy loss

model = nn.Sequential(
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Linear(128,10)
)

In [1]:
#GPU's were originally built for GAMING. However deep learning uses the same matrix algebra
#if GPU is availavble, it will be called "cuda:0". we are checking if that string is in list of available devices
#we set this as the device
#model.to(device) is sent to GPU - BOTH the model parameters and output have to be on the GPU

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device)

NameError: ignored

In [None]:
#Loss and Optimizer
#cross entropy loss is meant for MULTIPLE categories
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

In [None]:
#Data Loader
#Automatically generates batches in training loop
# we only shuffle the training data, we want random batches but not the test data as we just measure the accuracy
batch_size = 128
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False)


In [None]:
#check out what the data loader looks like
#maps the values to 0 and 1
#makes the data of shape (batch size, color, height, width)
#normalization comes from the totensor()
tmp_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=1, shuffle=True)
for x, y in tmp_loader:
  print(x)
  print(x.shape)
  print(y.shape)
  break

In [None]:
train_dataset.transform(train_dataset.data.numpy()).max()

In [None]:
#Write code to train the models, same as before
#you need trial and error till loss per iteration looks right
#in Batch GD, we are only looking at a sample of the dataset in one epoch
#however, number of iteration is still quite high 
#number of iteration = number of epochs * number of batches
#we DO look at the entire dataset
# we have 2 loops, we get the losses (train_loss)for each BATHC, not losses per EPOCH
#we spcify first arg as -1 to allow pytorch / numpy to use arbirary value here
#note that even though we are callig this test_loss, this is actually the test_loos_for_batch_size

n_epochs = 10
train_losses = np.zeros(n_epochs)
test_losses = np.zeros(n_epochs)

for it in range(n_epochs):
  train_loss = []
  for inputs, targets in train_loader:
    #move data to GPU
    input, targets in train_loader:
      inputs, targets = inputs.to(device), targets.to(device)

      #reshape the input
      inputs = inputs.view(-1,784)

      #zero the parameter gradients
      optimizer.zero_grad()
      #Forward Pass
      outputs = model(X_train)
      loss = criterion(outputs,Y_train)

      #backward pass
      loss.backward()
      optimizer.step()

      train_loss.append(loss.item())

    #calculate loss
    train_loss = np.mean(train_loss)
    test_loss = []

    for inputs, targets in test_loader:
      inputs, targets = inputs.to(device), targets.to(device)
      inputs = inputs.view(-1,784)
      outputs = models(inputs)
      loss = criterion(outputs,targets)
      test_loss.append(loss.item())
    test_loss = np.mean(test_loss)

    #output_test = model(X_test)
    #loss_test = criterion(output_test,Y_test)

    #record the loss
    train_losses[it] = train_loss
    test_losses[it] = test_loss

    print(f'Epoch {it+1}/{n_epochs}, Train Loss: {loss.item():.4f}, Test Loss: {loss_test.item():.4f}')


In [None]:
plt.plot(train_losses, label="Training Loss")
plt.plot(test_losses, label="Test Loss")
plt.legend()
plt.show()


In [None]:
#since we're using the data loader, we have to loop through the data (unlike an array where we could do the cmoputation directly)
#we init the N-correct and N-total to 0
#move the data to the GPU
#reshape the inputs and the get the outputs
#these outputs are LOGITS and NOT probability
#torch.max returns the max value and the corresponding indices in each row
#item will bring things back into the pyTorch land from the Torch Tensor land

n_correct = 0.
n_total = 0.
for inputs, targets in train_loader:
  inputs, targets = inputs.to(device), targets.to(device)
  inputs = inputs.view(-1,784)
  outputs = models(inputs)
  _,predictions = torch.max(outputs,1)
  n_correct += (predictions == targets).sum().item()
  n_total += targets.shape[0]

train_acc = n_correct / n_total

n_correct = 0.
n_total = 0.
for inputs, targets in test_loader:
  inputs, targets = inputs.to(device), targets.to(device)
  inputs = inputs.view(-1,784)
  outputs = models(inputs)
  _,predictions = torch.max(outputs,1)
  n_correct += (predictions == targets).sum().item()
  n_total += targets.shape[0]

test_acc = n_correct / n_total

print(f'Train acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')


In [None]:
#plot confusion matrix
from sklearn.metrics import confusion_matrix
import itertools

def plot_confusion_matrix(cm, classes, normalize=False, title = 'Confusion Matrix', cmap=plt.cm.Blues):
  if normalize:
    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    print('Normalized Confusion Matrix')
  else:
    print('Without Normalization Confusion Matrix')
  
  print(cm)

  plt.imshow(cm, interpolation='nearest', cmap=cmap)
  plt.title(title)
  plt.colorbar()
  tick_marks = np.arrange(len(classes))
  plt.xticks(tick_marks, classes, rotation=45)
  plt.yticks(tick_marks, classes)

  fmt= '.2f' if normalize else 'd'
  thresh = cm.max()/2.
  for i,j in itertools.product(range(cm.shape[0]),range(cm.shape[1])):
    plt.text(j, i, format(cm[i,j],fmt),
    horizontalalignment = "center",
    color = 'white' if cm[i,j] > thresh else 'black')

  plt.tight_layout()
  plt.ylabel('True Label')
  plt.xlabel('Predicted Label')
  plt.show()
  

In [None]:
#get all predictions in an array
x_test = test_dataset.data.numpy()
xy_test = test_dataset.targets.numpy()
p_test = np.array([])
for inputs, targets in test_loader:
  inputs = inputs.to(device)
  inputs = inputs.view(-1,784)
  outputs = model(inputs)
    _,predictions = torch.max(outputs,1)
  p_test = np.concatenate((p_test, predictions.cpu().numpy()))

cm = confusion_matrix(y_test,p_test)
plot_confusion_matrix(cm, list(range(10)))

#confusion matrix tells us that for EACH label, how many predictions correspond to that label
#most of the entries should be ALONG the diagonel
#you might have 4 miscclasified as 9


In [None]:
#show some misclassified examples
misclassified_idx = np.where(p_test != y_test)[0]
i = np.random.choice(misclassified_idx)
plt.imshow(x_test[i],cmap='gray')
plt.title("True Label: %s Predicted" % (y_test[i], int(p_test[i])))