In [68]:
import torch
import torch.nn as nn

## Code flow

1) Load dataset
2) Basic preprocessing
3) Training process \
        - Create the model \
        - Forward pass \
        - Loss calculation \
        - Backpropagation \
        - Parameters update 
4) Model evaluation

In [69]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [70]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [71]:
df.shape

(569, 33)

In [72]:
df.drop(columns =['id', 'Unnamed: 32'], inplace = True)

In [73]:
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


### Train - Test Split

In [74]:
X_train, X_test,y_train,y_test = train_test_split(df.iloc[:,1:], df.iloc[:,0], test_size = 0.2)

In [75]:
scaling = StandardScaler()
X_train = scaling.fit_transform(X_train)
X_test = scaling.transform(X_test)

In [76]:
X_train

array([[ 0.73055923,  0.47611596,  0.75643972, ...,  1.45752436,
         1.18352438,  0.5881677 ],
       [ 1.70425168,  1.04799232,  1.77133716, ...,  1.19342391,
         0.65619229,  0.53796848],
       [-0.74130145,  0.38157188, -0.73056993, ..., -0.81849953,
        -0.22324555,  0.2114093 ],
       ...,
       [ 0.92869432,  0.86351608,  0.79752869, ..., -0.49482292,
        -0.73569993, -1.02296327],
       [-0.65921691, -1.06656665, -0.62784752, ..., -0.13782666,
        -0.48443197,  0.75461777],
       [ 0.4503396 ,  0.89579942,  0.76465752, ...,  2.02104104,
         0.62808996,  0.91895418]])

In [77]:
y_train

24     M
302    M
80     B
416    B
545    B
      ..
232    B
319    B
261    M
510    B
258    M
Name: diagnosis, Length: 455, dtype: object

In [78]:
# Label Encoding

encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [79]:
y_train

array([1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1,
       0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1,
       1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1,

In [80]:
# Changing to tensor ( Numpy )
x_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [81]:
x_train_tensor

tensor([[ 0.7306,  0.4761,  0.7564,  ...,  1.4575,  1.1835,  0.5882],
        [ 1.7043,  1.0480,  1.7713,  ...,  1.1934,  0.6562,  0.5380],
        [-0.7413,  0.3816, -0.7306,  ..., -0.8185, -0.2232,  0.2114],
        ...,
        [ 0.9287,  0.8635,  0.7975,  ..., -0.4948, -0.7357, -1.0230],
        [-0.6592, -1.0666, -0.6278,  ..., -0.1378, -0.4844,  0.7546],
        [ 0.4503,  0.8958,  0.7647,  ...,  2.0210,  0.6281,  0.9190]],
       dtype=torch.float64)

In [82]:
y_train_tensor.shape

torch.Size([455])

## Defining the model 

In [83]:
class MySimpleNN:
    def __init__(self, X):
        self.weights = torch.randn(X.shape[1], 1, dtype=torch.float64, requires_grad=True)
        self.bias = torch.zeros(1, dtype=torch.float64, requires_grad=True)

    def forward(self, X):  # Ensure proper indentation
        z = torch.matmul(X, self.weights) + self.bias
        y_pred = torch.sigmoid(z)
        return y_pred
    
    def loss_function(self,y_pred,y):
        epsilon = 1e-7
        y_pred = torch.clamp(y_pred,epsilon,1.0-epsilon)
        # calculate loss
        loss = -(y_train_tensor * torch.log(y_pred) + (1 - y_train_tensor) * torch.log(1 - y_pred)).mean()
        return loss
        

## Important parameters

In [84]:
learning_rate = 0.1
epochs = 25

In [85]:
# Traning Pipeline

model = MySimpleNN(x_train_tensor)
model.weights

tensor([[ 1.2278],
        [ 0.2414],
        [-0.5711],
        [-1.2045],
        [ 0.3933],
        [ 0.4774],
        [ 0.3011],
        [ 0.8594],
        [ 0.0420],
        [ 1.3707],
        [-0.7223],
        [-0.0155],
        [ 0.7790],
        [-0.2677],
        [ 1.1473],
        [ 0.4587],
        [-1.1269],
        [-0.4391],
        [-1.1020],
        [-0.3537],
        [-0.6506],
        [-0.1406],
        [-1.1914],
        [ 1.1251],
        [-0.4946],
        [ 0.3721],
        [-0.0780],
        [-0.5214],
        [ 0.8728],
        [ 0.1619]], dtype=torch.float64, requires_grad=True)

In [86]:
model.bias

tensor([0.], dtype=torch.float64, requires_grad=True)

In [87]:
for epoch in range(epochs):
    # Forward pass
    y_pred = model.forward(x_train_tensor)
    print(y_pred)

tensor([[5.4445e-01],
        [7.1035e-01],
        [9.5955e-01],
        [9.4150e-01],
        [1.5409e-01],
        [5.9473e-01],
        [9.9258e-01],
        [3.8478e-01],
        [2.1182e-02],
        [7.1703e-01],
        [8.8395e-01],
        [3.8408e-01],
        [3.3863e-01],
        [1.0454e-01],
        [9.4182e-01],
        [7.7774e-01],
        [2.7005e-01],
        [1.3684e-01],
        [7.9647e-01],
        [1.8648e-01],
        [2.0879e-01],
        [7.1866e-01],
        [5.5461e-01],
        [2.7165e-02],
        [3.7660e-01],
        [1.2989e-01],
        [7.4374e-01],
        [9.2149e-01],
        [7.4839e-01],
        [9.6550e-01],
        [8.3078e-02],
        [4.7769e-01],
        [9.1798e-03],
        [3.2357e-01],
        [3.7230e-01],
        [1.4910e-01],
        [4.5183e-02],
        [1.5008e-01],
        [9.3865e-01],
        [1.4025e-01],
        [5.9786e-01],
        [1.9920e-01],
        [9.4892e-01],
        [9.5591e-03],
        [4.1140e-01],
        [4

In [88]:
# loss calculate

loss = model.loss_function(y_pred, y_train_tensor)
print(f'epoch : {epoch + 1} , loss : {loss}')

epoch : 25 , loss : 1.2563877441280682


In [89]:
# backward pass 

loss.backward()

In [90]:
# parameters update
with torch.no_grad():
    model.weights -= learning_rate * model.weights.grad
    model.bias -= learning_rate * model.bias.grad
    
# zero the gradients
model.weights.grad.zero_()
model.bias.grad.zero_()
print(f'epoch : {epoch + 1} , loss : {loss.item()}')

epoch : 25 , loss : 1.2563877441280682


In [91]:
# create model
model = MySimpleNN(x_train_tensor)

# define loop
for epoch in range(epochs):

  # forward pass
  y_pred = model.forward(x_train_tensor)

  # loss calculate
  loss = model.loss_function(y_pred, y_train_tensor)

  # backward pass
  loss.backward()

  # parameters update
  with torch.no_grad():
    model.weights -= learning_rate * model.weights.grad
    model.bias -= learning_rate * model.bias.grad

  # zero gradients
  model.weights.grad.zero_()
  model.bias.grad.zero_()

  # print loss in each epoch
  print(f'Epoch: {epoch + 1}, Loss: {loss.item()}')

Epoch: 1, Loss: 2.454073770944596
Epoch: 2, Loss: 2.3058049529450098
Epoch: 3, Loss: 2.160192476882291
Epoch: 4, Loss: 2.0197821830342044
Epoch: 5, Loss: 1.8857010520033695
Epoch: 6, Loss: 1.7617307228460541
Epoch: 7, Loss: 1.6471992178770412
Epoch: 8, Loss: 1.5425844387731007
Epoch: 9, Loss: 1.449873075552998
Epoch: 10, Loss: 1.3690250051741708
Epoch: 11, Loss: 1.2995452066539397
Epoch: 12, Loss: 1.2406668303100734
Epoch: 13, Loss: 1.1915972354914446
Epoch: 14, Loss: 1.151555769440974
Epoch: 15, Loss: 1.1195768832743944
Epoch: 16, Loss: 1.0943647516735482
Epoch: 17, Loss: 1.0744776360497874
Epoch: 18, Loss: 1.0586171066524144
Epoch: 19, Loss: 1.0457401580505306
Epoch: 20, Loss: 1.0350424765019264
Epoch: 21, Loss: 1.025918593635575
Epoch: 22, Loss: 1.017927109617982
Epoch: 23, Loss: 1.0107570323960506
Epoch: 24, Loss: 1.0041947161215052
Epoch: 25, Loss: 0.9980951730959627


In [95]:
loss_function = nn.BCELoss()    # Binary Cross Entropy Loss

## Doing after lect. 4 , using nn modules to re write all the stuffs here 

In [None]:
# defining the model 

class MySimpleNN(nn.Module):
    def __init__(self,num_features):
        super().__init__()
        self.linear = nn.Linear(num_features,1)
        self.sigmoid = nn.Sigmoid()
    def forward(self,X):
        z = self.linear(X)
        y_pred = self.sigmoid(z)
        return y_pred
   

In [94]:
# Training pipeline

model = MySimpleNN(x_train_tensor.shape[1]).double()

for epoch in range(epochs):
    # Forward pass
    y_pred = model.forward(x_train_tensor)

    # loss calculate
    loss = model.loss_function(y_pred, y_train_tensor)

    # backward pass
    loss.backward()

    # parameters update
    with torch.no_grad():
        model.linear.weight -= learning_rate * model.linear.weight.grad
        model.linear.bias -= learning_rate * model.linear.bias.grad

    # zero gradients
    model.linear.weight.grad.zero_()
    model.linear.bias.grad.zero_()

    # print loss in each epoch
    print(f'Epoch: {epoch + 1}, Loss: {loss.item()}')

Epoch: 1, Loss: 0.7644059399097296
Epoch: 2, Loss: 0.7504812387969692
Epoch: 3, Loss: 0.7406937442504118
Epoch: 4, Loss: 0.7333706585491913
Epoch: 5, Loss: 0.7275345216059573
Epoch: 6, Loss: 0.7226358095524383
Epoch: 7, Loss: 0.7183692966001091
Epoch: 8, Loss: 0.714563413920103
Epoch: 9, Loss: 0.7111180843232398
Epoch: 10, Loss: 0.7079711890980852
Epoch: 11, Loss: 0.7050809338754496
Epoch: 12, Loss: 0.7024167197189191
Epoch: 13, Loss: 0.6999544508216737
Epoch: 14, Loss: 0.6976741143171091
Epoch: 15, Loss: 0.695558508001151
Epoch: 16, Loss: 0.6935925428995534
Epoch: 17, Loss: 0.6917628330691034
Epoch: 18, Loss: 0.6900574302061301
Epoch: 19, Loss: 0.6884656333282809
Epoch: 20, Loss: 0.6869778396724638
Epoch: 21, Loss: 0.6855854204127764
Epoch: 22, Loss: 0.6842806131791103
Epoch: 23, Loss: 0.6830564273148627
Epoch: 24, Loss: 0.6819065596522147
Epoch: 25, Loss: 0.6808253194265529
