### Steps for training pipeline

1) Load the dataset  
2) Basic preprocessing  
3) Training process  
    a) Create the model  
    b) Forward pass  
    c) Loss calculation  
    d) Backpropagation  
    e) Parameters update  
4) Model evaluation

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [3]:
df.drop(columns=['id', 'Unnamed: 32'], inplace=True)

In [4]:
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [5]:
df.shape[1]

31

#### Train test split

In [6]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size=0.2)

In [7]:
X_train.shape

(455, 30)

#### Scaling

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [9]:
X_train

array([[-0.3965102 , -1.30238424, -0.41617838, ..., -0.76074799,
        -0.84448116, -0.81388616],
       [-0.77812283, -0.41175982, -0.79322207, ..., -0.40899834,
         0.10996912, -0.36073083],
       [ 2.5469578 ,  1.79717673,  2.49464762, ...,  2.16482325,
         0.31539716,  0.15244142],
       ...,
       [-0.96051122,  0.25092114, -0.91457413, ..., -0.42363591,
        -0.47155025,  0.16565616],
       [-0.99979487, -0.23551488, -1.02334454, ..., -1.37627252,
        -0.32300998, -0.91630036],
       [-1.10361595, -0.43290921, -1.09518171, ..., -1.17478196,
        -0.05595353, -0.32549153]], shape=(455, 30))

In [10]:
y_train

294    B
410    B
236    M
116    B
226    B
      ..
74     B
392    M
269    B
443    B
217    B
Name: diagnosis, Length: 455, dtype: object

#### Label Encoding

In [11]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [12]:
y_train

array([0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,
       1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0,

#### Numpy array to Pytorch tensors

In [13]:
import torch
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)


#### Defining the model

In [14]:
class MySimpleNN():
    def __init__(self,X):
        self.weights = torch.rand(X.shape[1],1,dtype=torch.float64,requires_grad=True)
        self.bias = torch.zeros(1,dtype=torch.float64,requires_grad=True)
        
    def forward(self,X):
        z = torch.matmul(X,self.weights) + self.bias
        y_pred = torch.sigmoid(z)
        return y_pred
    
    def binary_cross_entropy(self, y_pred, y):
        # Clamp predictions to avoid log(0)
        epsilon = 1e-7 
        y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
        
        # Calculate loss
        loss = -(y * torch.log(y_pred) + (1 - y) * torch.log(1 - y_pred))
        return loss.mean()
        
        

#### Important parameters

In [15]:
learning_rate = 0.1
epochs = 25

#### Training Pipeline

In [16]:
# Create model
model = MySimpleNN(X_train_tensor)

# Define loop

for epoch in range(epochs):
    # Forward pass
    y_pred = model.forward(X_train_tensor)
    
    # Compute loss
    loss = model.binary_cross_entropy(y_pred, y_train_tensor)
    
    
    # Backward pass
    loss.backward()
    
    # Update weights and bias
    with torch.no_grad():
        model.weights -= learning_rate * model.weights.grad
        model.bias -= learning_rate * model.bias.grad
    
    #Zero gradients
    model.weights.grad.zero_()
    model.bias.grad.zero_()
    
    # Print loss in each epoch 
    print(f'Epoch: {epoch+1}, Loss: {loss.item()}')
    
    

Epoch: 1, Loss: 3.2381496477534313
Epoch: 2, Loss: 3.0988611050758363
Epoch: 3, Loss: 2.9565787612025782
Epoch: 4, Loss: 2.808670014878629
Epoch: 5, Loss: 2.656943283246977
Epoch: 6, Loss: 2.5057263062420163
Epoch: 7, Loss: 2.3561987874187724
Epoch: 8, Loss: 2.204741508468022
Epoch: 9, Loss: 2.0556770686456978
Epoch: 10, Loss: 1.9062522195636917
Epoch: 11, Loss: 1.7654593101044727
Epoch: 12, Loss: 1.6311546426630115
Epoch: 13, Loss: 1.5074173274874065
Epoch: 14, Loss: 1.3903053292745924
Epoch: 15, Loss: 1.2870583450826283
Epoch: 16, Loss: 1.1981785079763698
Epoch: 17, Loss: 1.1234253315875398
Epoch: 18, Loss: 1.0615591370993362
Epoch: 19, Loss: 1.0105645647496742
Epoch: 20, Loss: 0.9682977009413316
Epoch: 21, Loss: 0.9329457070397802
Epoch: 22, Loss: 0.9031067792498244
Epoch: 23, Loss: 0.8777136242054606
Epoch: 24, Loss: 0.8559467200779979
Epoch: 25, Loss: 0.8371687647319325


#### Model Evaluation

In [18]:
with torch.no_grad():
    y_pred = model.forward(X_test_tensor)
    y_pred = (y_pred > 0.9).float()
    accuracy = (y_pred == y_test_tensor).float().mean()
    print(f'Accuracy: {accuracy.item() * 100:.2f}%')
    

Accuracy: 60.40%
