In [1]:
##Exercise NN Network using torch.nn module
##Classification
##Reference : CoderzColumn Tutorial

##Load Data
##Normalize Data
##Define Neural Network Model
##Train Model
##Make Predictions
##Evaluate Performance of Model
##Train Model in Batches
##Make Predictions in Batches
##Evaluate Performance of Model

import torch #root package
import pandas as pd
import numpy as np

from torch.utils.data import Dataset, DataLoader #dataset representation and loading

print("PyTorch Version : {}".format(torch.__version__))

PyTorch Version : 2.3.0+cu118


In [2]:
print("PyTorch Version : {}".format(torch.__version__))

PyTorch Version : 2.3.0+cu118


In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"

print("Device : {}".format(device))

Device : cpu


In [4]:
## Load Data
## For Classification use Breast Cancer Dataset

from sklearn import datasets
from sklearn.model_selection import train_test_split

X, Y = datasets.load_breast_cancer(return_X_y=True)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, stratify=Y, random_state=123)

X_train, X_test, Y_train, Y_test = torch.tensor(X_train, dtype=torch.float32),\
                                   torch.tensor(X_test, dtype=torch.float32),\
                                   torch.tensor(Y_train, dtype=torch.long),\
                                   torch.tensor(Y_test, dtype=torch.long)

samples, features = X_train.shape
classes = Y_test.unique()

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape


(torch.Size([455, 30]),
 torch.Size([114, 30]),
 torch.Size([455]),
 torch.Size([114]))

In [5]:
samples, features, classes

(455, 30, tensor([0, 1]))

In [6]:
##Normalize Data

mean = X_train.mean(axis=0)
std = X_train.std(axis=0)

X_train = (X_train - mean)/ std
X_test = (X_test - mean)/ std


In [7]:
##Define Classification Neural Network

from torch import nn

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.first_layer = nn.Linear(features, 5)
        self.second_layer = nn.Linear(5, 10)
        self.third_layer = nn.Linear(10, 15)
        self.final_layer = nn.Linear(15,2)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, X_batch):
        layer_out = self.relu(self.first_layer(X_batch))
        layer_out = self.relu(self.second_layer(layer_out))
        layer_out = self.relu(self.third_layer(layer_out))

        return self.softmax(self.final_layer(layer_out))

In [8]:
classifier = Classifier()

preds = classifier(X_train[:5])

preds

tensor([[0.4483, 0.5517],
        [0.4635, 0.5365],
        [0.4413, 0.5587],
        [0.4411, 0.5589],
        [0.4457, 0.5543]], grad_fn=<SoftmaxBackward0>)

In [9]:
##Train Neural Network Model

def TrainModel(model, loss_func, optimizer, X, Y, epochs=500):
    for i in range(epochs):
        preds = model(X) ## Make Predictions by forward pass through network

        loss = loss_func(preds, Y) ## Calculate Loss

        optimizer.zero_grad() ## Zero weights before calculating gradients
        loss.backward() ## Calculate Gradients
        optimizer.step() ## Update Weights

        if i % 100 == 0: ## Print MSE every 100 epochs
            print("NegLogLoss : {:.2f}".format(loss))


from torch.optim import SGD

torch.manual_seed(42) ##For reproducibility.This will make sure that same random weights are initialized each time.

epochs = 1500
learning_rate = torch.tensor(1/1e2) # 0.01

classifier = Classifier()
nll_loss = nn.NLLLoss()
optimizer = SGD(params=classifier.parameters(), lr=learning_rate)

TrainModel(classifier, nll_loss, optimizer, X_train, Y_train, epochs=epochs)

NegLogLoss : -0.49
NegLogLoss : -0.50
NegLogLoss : -0.51
NegLogLoss : -0.53
NegLogLoss : -0.54
NegLogLoss : -0.55
NegLogLoss : -0.56
NegLogLoss : -0.58
NegLogLoss : -0.61
NegLogLoss : -0.65
NegLogLoss : -0.71
NegLogLoss : -0.78
NegLogLoss : -0.82
NegLogLoss : -0.85
NegLogLoss : -0.88


In [10]:
##Make Predictions

test_preds = classifier(X_test) ## Make Predictions on test dataset

test_preds = torch.argmax(test_preds, axis=1) ## Convert Probabilities to class type

train_preds = classifier(X_train) ## Make Predictions on train dataset

train_preds = torch.argmax(train_preds, axis=1) ## Convert Probabilities to class type

test_preds[:5], train_preds[:5]

(tensor([1, 0, 1, 1, 1]), tensor([1, 1, 1, 1, 1]))

In [11]:
##Evaluate Performance of Model

from sklearn.metrics import accuracy_score

print("Train Accuracy : {:.2f}".format(accuracy_score(Y_train, train_preds)))
print("Test  Accuracy : {:.2f}".format(accuracy_score(Y_test, test_preds)))

Train Accuracy : 0.95
Test  Accuracy : 0.96


In [12]:
from sklearn.metrics import classification_report

print("Test Data Classification Report : ")
print(classification_report(Y_test, test_preds))

Test Data Classification Report : 
              precision    recall  f1-score   support

           0       0.97      0.90      0.94        42
           1       0.95      0.99      0.97        72

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114



In [13]:
##Train Model on Train Dataset in Batches

def TrainModelInBatches(model, loss_func, optimizer, X, Y, batch_size=32, epochs=500):
    for i in range(epochs):
        batches = torch.arange((X.shape[0]//batch_size)+1) ### Batch Indices

        losses = [] ## Record loss of each batch
        for batch in batches:
            if batch != batches[-1]:
                start, end = int(batch*batch_size), int(batch*batch_size+batch_size)
            else:
                start, end = int(batch*batch_size), None

            X_batch, Y_batch = X[start:end], Y[start:end] ## Single batch of data

            preds = model(X_batch) ## Make Predictions by forward pass through network

            loss = loss_func(preds, Y_batch) ## Calculate Loss 
            losses.append(loss) ## Record Loss

            optimizer.zero_grad() ## Zero weights before calculating gradients
            loss.backward() ## Calculate Gradients
            optimizer.step() ## Update Weights

        if i % 100 == 0: ## Print MSE every 100 epochs
            print("NegLogLoss : {:.2f}".format(torch.tensor(losses).mean()))



In [14]:
from torch.optim import SGD

torch.manual_seed(42) ##For reproducibility.This will make sure that same random weights are initialized each time.

epochs = 1500
learning_rate = torch.tensor(1/1e3) # 0.001
batch_size = 32

classifier = Classifier()
nll_loss = nn.NLLLoss()
optimizer = SGD(params=classifier.parameters(), lr=learning_rate)

TrainModelInBatches(classifier, nll_loss, optimizer, X_train, Y_train, batch_size=batch_size, epochs=epochs)

NegLogLoss : -0.49
NegLogLoss : -0.51
NegLogLoss : -0.53
NegLogLoss : -0.55
NegLogLoss : -0.57
NegLogLoss : -0.60
NegLogLoss : -0.63
NegLogLoss : -0.71
NegLogLoss : -0.81
NegLogLoss : -0.86
NegLogLoss : -0.89
NegLogLoss : -0.91
NegLogLoss : -0.93
NegLogLoss : -0.94
NegLogLoss : -0.94


In [15]:
##Make Predictions

def MakePredictions(model, input_data, batch_size=32):
    batches = torch.arange((input_data.shape[0]//batch_size)+1) ### Batch Indices

    with torch.no_grad():
        preds = []
        for batch in batches:
            if batch != batches[-1]:
                start, end = int(batch*batch_size), int(batch*batch_size+batch_size)
            else:
                start, end = int(batch*batch_size), None

            X_batch = input_data[start:end]

            preds.append(model(X_batch))

    return preds



In [16]:
test_preds = MakePredictions(classifier, X_test) ## Make Predictions on test dataset

test_preds = torch.cat(test_preds) ## Combine all batch predictions

test_preds = torch.argmax(test_preds, axis=1) ## Convert Probabilities to class type

train_preds = MakePredictions(classifier, X_train) ## Make Predictions on train dataset

train_preds = torch.cat(train_preds) ## Combine all batch predictions

train_preds = torch.argmax(train_preds, axis=1) ## Convert Probabilities to class type

In [17]:
##Evaluate Performance of Model

from sklearn.metrics import accuracy_score

print("Train Accuracy : {:.2f}".format(accuracy_score(Y_train, train_preds)))
print("Test  Accuracy : {:.2f}".format(accuracy_score(Y_test, test_preds)))



Train Accuracy : 0.98
Test  Accuracy : 0.98


In [18]:
from sklearn.metrics import classification_report

print("Test Data Classification Report : ")
print(classification_report(Y_test, test_preds))

Test Data Classification Report : 
              precision    recall  f1-score   support

           0       0.98      0.98      0.98        42
           1       0.99      0.99      0.99        72

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114

