## Binary classification using PyTorch manual implementation

In [116]:
import numpy as np
import pandas as pd

In [None]:
df = pd.read_csv("../../datasets/breast_canc_data.csv")

In [118]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 33 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       569 non-null    int64  
 1   diagnosis                569 non-null    object 
 2   radius_mean              569 non-null    float64
 3   texture_mean             569 non-null    float64
 4   perimeter_mean           569 non-null    float64
 5   area_mean                569 non-null    float64
 6   smoothness_mean          569 non-null    float64
 7   compactness_mean         569 non-null    float64
 8   concavity_mean           569 non-null    float64
 9   concave points_mean      569 non-null    float64
 10  symmetry_mean            569 non-null    float64
 11  fractal_dimension_mean   569 non-null    float64
 12  radius_se                569 non-null    float64
 13  texture_se               569 non-null    float64
 14  perimeter_se             5

In [119]:
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [120]:
df.columns

Index(['id', 'diagnosis', 'radius_mean', 'texture_mean', 'perimeter_mean',
       'area_mean', 'smoothness_mean', 'compactness_mean', 'concavity_mean',
       'concave points_mean', 'symmetry_mean', 'fractal_dimension_mean',
       'radius_se', 'texture_se', 'perimeter_se', 'area_se', 'smoothness_se',
       'compactness_se', 'concavity_se', 'concave points_se', 'symmetry_se',
       'fractal_dimension_se', 'radius_worst', 'texture_worst',
       'perimeter_worst', 'area_worst', 'smoothness_worst',
       'compactness_worst', 'concavity_worst', 'concave points_worst',
       'symmetry_worst', 'fractal_dimension_worst', 'Unnamed: 32'],
      dtype='object')

In [121]:
df.drop(columns={"id", "Unnamed: 32"}, inplace=True)

---


In [122]:
from sklearn.model_selection import train_test_split

xtrain, xtest, ytrain, ytest = train_test_split(
    df.drop(columns={"diagnosis"}), df["diagnosis"], test_size=0.2, random_state=42
)

In [123]:
xtrain.shape, xtest.shape, ytrain.shape, ytest.shape

((455, 30), (114, 30), (455,), (114,))

In [124]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

xtrain = scaler.fit_transform(xtrain)
xtest = scaler.transform(xtest)

In [125]:
xtest

array([[-0.46649743, -0.13728933, -0.44421138, ..., -0.19435087,
         0.17275669,  0.20372995],
       [ 1.36536344,  0.49866473,  1.30551088, ...,  0.99177862,
        -0.561211  , -1.00838949],
       [ 0.38006578,  0.06921974,  0.40410139, ...,  0.57035018,
        -0.10783139, -0.20629287],
       ...,
       [-0.73547237, -0.99852603, -0.74138839, ..., -0.27741059,
        -0.3820785 , -0.32408328],
       [ 0.02898271,  2.0334026 ,  0.0274851 , ..., -0.49027026,
        -1.60905688, -0.33137507],
       [ 1.87216885,  2.80077153,  1.80354992, ...,  0.7925579 ,
        -0.05868885, -0.09467243]], shape=(114, 30))

In [126]:
from sklearn.preprocessing import LabelEncoder

enc = LabelEncoder()

ytrain = enc.fit_transform(ytrain)
ytest = enc.transform(ytest)

In [127]:
ytrain

array([0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1,
       1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,

---


Numpy array to pytorch tensor conversion


In [128]:
import torch

In [129]:
xtrain_tensor = torch.from_numpy(xtrain)
xtest_tensor = torch.from_numpy(xtest)
ytrain_tensor = torch.from_numpy(ytrain)
ytest_tensor = torch.from_numpy(ytest)

In [130]:
xtrain

array([[-1.44075296, -0.43531947, -1.36208497, ...,  0.9320124 ,
         2.09724217,  1.88645014],
       [ 1.97409619,  1.73302577,  2.09167167, ...,  2.6989469 ,
         1.89116053,  2.49783848],
       [-1.39998202, -1.24962228, -1.34520926, ..., -0.97023893,
         0.59760192,  0.0578942 ],
       ...,
       [ 0.04880192, -0.55500086, -0.06512547, ..., -1.23903365,
        -0.70863864, -1.27145475],
       [-0.03896885,  0.10207345, -0.03137406, ...,  1.05001236,
         0.43432185,  1.21336207],
       [-0.54860557,  0.31327591, -0.60350155, ..., -0.61102866,
        -0.3345212 , -0.84628745]], shape=(455, 30))

In [131]:
xtrain_tensor

tensor([[-1.4408, -0.4353, -1.3621,  ...,  0.9320,  2.0972,  1.8865],
        [ 1.9741,  1.7330,  2.0917,  ...,  2.6989,  1.8912,  2.4978],
        [-1.4000, -1.2496, -1.3452,  ..., -0.9702,  0.5976,  0.0579],
        ...,
        [ 0.0488, -0.5550, -0.0651,  ..., -1.2390, -0.7086, -1.2715],
        [-0.0390,  0.1021, -0.0314,  ...,  1.0500,  0.4343,  1.2134],
        [-0.5486,  0.3133, -0.6035,  ..., -0.6110, -0.3345, -0.8463]],
       dtype=torch.float64)

In [132]:
ytest_tensor.shape

torch.Size([114])

---


In [133]:
xtrain_tensor.shape

torch.Size([455, 30])

In [134]:
# 30 ip feats

In [135]:
class SimpleNN:
    def __init__(self, X):

        # random weight and bias init
        self.weights = torch.rand(
            X.shape[1], 1, dtype=torch.float64, requires_grad=True
        )  # x.shape1->input feat dim,
        self.bias = torch.zeros(1, dtype=torch.float64, requires_grad=True)

    #  calculate ypred ..forward pass

    def forward_pass(self, X):
        z = torch.matmul(X, self.weights) + self.bias
        y_pred = torch.sigmoid(z)
        return y_pred

    # use binary cross entropy

    def loss_function(self, y_pred, y):
        epsilon = 1e-7
        y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
        # Calculate loss
        loss = -(
            ytrain_tensor * torch.log(y_pred)
            + (1 - ytrain_tensor) * torch.log(1 - y_pred)
        ).mean()
        return loss

In [136]:
# imp parameters

learning_rate = 0.1
epochs = 30

In [137]:
xtrain_tensor.shape

torch.Size([455, 30])

In [138]:
model1 = SimpleNN(xtrain_tensor)
model1.weights

tensor([[0.0584],
        [0.9248],
        [0.7726],
        [0.8478],
        [0.0250],
        [0.1282],
        [0.9108],
        [0.0161],
        [0.0552],
        [0.5445],
        [0.7770],
        [0.7250],
        [0.0671],
        [0.0363],
        [0.0665],
        [0.5716],
        [0.9791],
        [0.4100],
        [0.6103],
        [0.6356],
        [0.2799],
        [0.9517],
        [0.0253],
        [0.9637],
        [0.2585],
        [0.0933],
        [0.4623],
        [0.9510],
        [0.1441],
        [0.7289]], dtype=torch.float64, requires_grad=True)

In [139]:
len(model1.weights.ravel())

30

In [140]:
model1.bias

tensor([0.], dtype=torch.float64, requires_grad=True)

---


```py
class SimpleNN:
    def __init__(self, X):

        # random weight and bias init
        self.weights = torch.rand(
            X.shape[1], 1, dtype=torch.float64, requires_grad=True
        )  # x.shape1->input feat dim,
        self.bias = torch.rand(1, dtype=torch.float64, requires_grad=True)

    #  calculate ypred ..forward pass

    def forward_pass(self, X):
        z = torch.matmul(X, self.weights) + self.bias
        y_pred = torch.sigmoid(z)
        return y_pred

    # use binary cross entropy

    def loss_function(self, y_pred, y):
        epsilon = 1e-7
        y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)
        # Calculate loss
        loss = -(
            ytrain_tensor * torch.log(y_pred)
            + (1 - ytrain_tensor) * torch.log(1 - y_pred)
        ).mean()
        return loss
```
------



In [141]:
# Training pipeline

# create model
model = SimpleNN(xtrain_tensor)

# define loop


# forward pass
for epoch in range(epochs):

    ypred = model.forward_pass(xtrain_tensor)

    # loss calc

    loss = model.loss_function(ypred, ytrain_tensor)

    # backward pass

    loss.backward()

    # wnew=wold-lr*(Del(L)/delw)

    # parameter update
    with torch.no_grad():
        model.weights -= learning_rate * model.weights.grad
        model.bias -= learning_rate * model.bias.grad

    # solve gradient accumulate problem
    model.weights.grad.zero_()
    model.bias.grad.zero_()

    print("Ephocs", epoch + 1, "Loss: ", loss.item())

Ephocs 1 Loss:  3.528149846675087
Ephocs 2 Loss:  3.396875833663181
Ephocs 3 Loss:  3.2586190495031557
Ephocs 4 Loss:  3.113620677989027
Ephocs 5 Loss:  2.9664115252750145
Ephocs 6 Loss:  2.812668950989177
Ephocs 7 Loss:  2.6549872958156393
Ephocs 8 Loss:  2.4926062266720246
Ephocs 9 Loss:  2.3279856293099392
Ephocs 10 Loss:  2.1589047345486003
Ephocs 11 Loss:  1.9955115545113102
Ephocs 12 Loss:  1.8376762882035802
Ephocs 13 Loss:  1.682800364987335
Ephocs 14 Loss:  1.5340755757155526
Ephocs 15 Loss:  1.393514170479711
Ephocs 16 Loss:  1.2642348957743668
Ephocs 17 Loss:  1.1518528961347858
Ephocs 18 Loss:  1.0573645751418377
Ephocs 19 Loss:  0.9808612426741937
Ephocs 20 Loss:  0.9212338675194659
Ephocs 21 Loss:  0.8762414976356363
Ephocs 22 Loss:  0.8429985659379408
Ephocs 23 Loss:  0.8185756372545414
Ephocs 24 Loss:  0.8004320163005526
Ephocs 25 Loss:  0.7865995070236501
Ephocs 26 Loss:  0.7756724745694931
Ephocs 27 Loss:  0.7667039150979487
Ephocs 28 Loss:  0.7590824958333158
Ephocs 

In [142]:
model.bias

tensor([-0.1754], dtype=torch.float64, requires_grad=True)

Evaluation 

In [143]:
# model evaluation
with torch.no_grad():
  y_pred = model.forward_pass(xtest_tensor)
  y_pred = (y_pred > 0.9).float()
  accuracy = (y_pred == ytest_tensor).float().mean()
  print(f'Accuracy: {accuracy.item()}')


Accuracy: 0.6228070259094238
