In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [3]:
df.drop(columns=['id', 'Unnamed: 32'], inplace= True)

## train-test-split manually.

In [4]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 1:], df.iloc[:, 0], test_size=0.2)

## Scaling

In [5]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

## Numpy arrays to PyTorch tensors

In [7]:
X_train_tensor = torch.from_numpy(X_train.astype(np.float32))
X_test_tensor = torch.from_numpy(X_test.astype(np.float32))
y_train_tensor = torch.from_numpy(y_train.astype(np.float32))
y_test_tensor = torch.from_numpy(y_test.astype(np.float32))
print(y_train_tensor.dtype)

torch.float32


# Define Model with the help of nn and sequential etc
# NOTE: Customize the neural network to check accuracies.
# Just for fun

In [8]:
class MyModel(nn.Module):

    def __init__(self, features):

        super().__init__()

        self.network = nn.Sequential(
            nn.Linear(features, 20),
            nn.ReLU(),
            nn.Linear(20, 10),
            nn.ReLU(),
            nn.Linear(10, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.network(x)

## Important Parameters

In [9]:
learning_rate = 0.1
epochs = 25

# Training Pipeline

In [10]:
# define loss function
loss_function = nn.BCELoss()

In [11]:
model = MyModel(X_train_tensor.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(epochs):

    # forward pass
    y_pred = model(X_train_tensor)

    # loss calculate
    loss = loss_function(y_pred, y_train_tensor.view(-1, 1))

    # clear grads for efficient compute gradients
    optimizer.zero_grad()

    # backward pass
    loss.backward()

    # calculating optimization and update parameters
    optimizer.step()

    #print each loss
    print("Epochs:", {epoch+1}, "Loss:{loss:.4f}".format(loss=loss.item()))



Epochs: {1} Loss:0.6450
Epochs: {2} Loss:0.2545
Epochs: {3} Loss:0.1284
Epochs: {4} Loss:0.0757
Epochs: {5} Loss:0.0735
Epochs: {6} Loss:0.0871
Epochs: {7} Loss:0.0645
Epochs: {8} Loss:0.0469
Epochs: {9} Loss:0.0385
Epochs: {10} Loss:0.0319
Epochs: {11} Loss:0.0270
Epochs: {12} Loss:0.0244
Epochs: {13} Loss:0.0223
Epochs: {14} Loss:0.0208
Epochs: {15} Loss:0.0197
Epochs: {16} Loss:0.0186
Epochs: {17} Loss:0.0172
Epochs: {18} Loss:0.0151
Epochs: {19} Loss:0.0129
Epochs: {20} Loss:0.0109
Epochs: {21} Loss:0.0095
Epochs: {22} Loss:0.0080
Epochs: {23} Loss:0.0066
Epochs: {24} Loss:0.0058
Epochs: {25} Loss:0.0045


# Check Accuracy

In [12]:
# model evaluation
with torch.no_grad():
  y_pred = model.forward(X_test_tensor)
  y_pred = (y_pred > 0.5).float()
  accuracy = (y_pred == y_test_tensor).float().mean()
  print(f'Accuracy: {accuracy.item()}')


Accuracy: 0.5677131414413452


In [18]:
from torchinfo import summary

summary(model, input_size=X_train_tensor.shape)

Layer (type:depth-idx)                   Output Shape              Param #
MyModel                                  [455, 1]                  --
├─Sequential: 1-1                        [455, 1]                  --
│    └─Linear: 2-1                       [455, 20]                 620
│    └─ReLU: 2-2                         [455, 20]                 --
│    └─Linear: 2-3                       [455, 10]                 210
│    └─ReLU: 2-4                         [455, 10]                 --
│    └─Linear: 2-5                       [455, 1]                  11
│    └─Sigmoid: 2-6                      [455, 1]                  --
Total params: 841
Trainable params: 841
Non-trainable params: 0
Total mult-adds (M): 0.38
Input size (MB): 0.05
Forward/backward pass size (MB): 0.11
Params size (MB): 0.00
Estimated Total Size (MB): 0.17