<a href="https://colab.research.google.com/github/nirmit27/ds-and-ml/blob/main/DL/PyTorch/NeuralNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# The `pytorch` **Pipeline**

We will be working with the **Breast Cancer** dataset and solve a **Binary Classification** problem.

### Importing the necessary libraries

In [1]:
import os
from pprint import pprint

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.preprocessing import LabelEncoder as LE
from sklearn.preprocessing import StandardScaler as SS
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score

import torch
from torch import nn

### Importing the dataset

In [2]:
df = pd.read_csv(r"https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv")
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


## **Preprocessing**

### Dropping the unwanted features

In [3]:
df.drop(columns=["id", "Unnamed: 32"], inplace=True)
df.columns

Index(['diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst'],
      dtype='object')

### Splitting the dataset

In [4]:
test_size, random_state = 0.2, 44
X_train, X_test, y_train, y_test = tts(df.iloc[:, 1:], df.iloc[:, 0], test_size=test_size, random_state=random_state)

X_train.shape, X_test.shape

((455, 30), (114, 30))

### Scaling the features

In [5]:
ss = SS()
X_train = ss.fit_transform(X_train)
X_test = ss.fit_transform(X_test)

### One-hot Encoding the target labels

In [6]:
le = LE()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)

y_test

array([1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1,
       1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 1])

### Converting `numpy` arrays to `torch` tensors

In [7]:
X_train, X_test, y_train, y_test = torch.from_numpy(X_train), torch.from_numpy(X_test), torch.from_numpy(y_train), torch.from_numpy(y_test)
X_train.shape

torch.Size([455, 30])

In [8]:
y_test

tensor([1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
        0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0,
        1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1])

## **Modelling**
Designing a single neuron for modelling the following function: $$y = wx + b$$

### Defining the model

In [9]:
class Model_0():
  def __init__(self, X):
    self.weights = torch.rand(X.shape[1], 1, dtype=torch.float64, requires_grad=True)
    self.bias = torch.zeros(1, dtype=torch.float64, requires_grad=True)

  def forward(self, X):
    """Forward pass"""
    z = torch.matmul(X, self.weights) + self.bias   # y = wx + b
    y_pred = torch.sigmoid(z)
    return y_pred

  def loss_fn(self, y_pred, y):
    """Binary Cross-Entropy loss"""
    criterion = nn.BCELoss()
    loss = criterion(y_pred, y.type(torch.float64).reshape(y_pred.shape))
    return loss

### Defining the hyperparameters

In [10]:
learning_rate = torch.tensor(0.1)
epochs = 200

## Training **pipeline**
![pipeline.png](/resources/pipeline.png)

In [None]:
# Instantiating the model
model = Model_0(X_train)

# Training
for epoch in range(epochs):
  y_pred = model.forward(X_train) # Forward pass
  loss = model.loss_fn(y_pred, y_train) # Loss function

  loss.backward() # Backpropagation

  # Updating the model parameters
  with torch.no_grad():
    model.weights -= learning_rate.multiply(model.weights.grad)
    model.bias -= learning_rate.multiply(model.bias.grad)

  # Update gradients for next epoch
  model.weights.grad.zero_()
  model.bias.grad.zero_()

  print(f"Epoch : {epoch + 1}\tLoss : {loss:.4f}")

### Model evaluation

In [12]:
with torch.no_grad():
  y_hat = (model.forward(X_test) > 0.5).float()

  score = accuracy_score(y_test, y_hat.round())
  print(f"Accuracy score : {(score * 100):.4f}%")

Accuracy score : 98.2456%
