In [69]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [70]:
df = pd.read_csv(
    'https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df = df.drop(columns=['Unnamed: 32', 'id'])
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [71]:
df.shape

(569, 31)

In [72]:
# train-test split
X = df.iloc[:, 1:]
y = df.iloc[:, 0]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(455, 30) (114, 30) (455,) (114,)


In [73]:
# scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [74]:
X_train

array([[-1.44075296, -0.43531947, -1.36208497, ...,  0.9320124 ,
         2.09724217,  1.88645014],
       [ 1.97409619,  1.73302577,  2.09167167, ...,  2.6989469 ,
         1.89116053,  2.49783848],
       [-1.39998202, -1.24962228, -1.34520926, ..., -0.97023893,
         0.59760192,  0.0578942 ],
       ...,
       [ 0.04880192, -0.55500086, -0.06512547, ..., -1.23903365,
        -0.70863864, -1.27145475],
       [-0.03896885,  0.10207345, -0.03137406, ...,  1.05001236,
         0.43432185,  1.21336207],
       [-0.54860557,  0.31327591, -0.60350155, ..., -0.61102866,
        -0.3345212 , -0.84628745]])

In [75]:
y_train

68     B
181    M
63     B
248    B
60     B
      ..
71     B
106    B
270    B
435    M
102    B
Name: diagnosis, Length: 455, dtype: object

In [76]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)
# y_train

### numpy arrays to tensors

In [77]:
X_train_tensor = torch.from_numpy(X_train)
X_test_tensor = torch.from_numpy(X_test)
y_train_tensor = torch.from_numpy(y_train)
y_test_tensor = torch.from_numpy(y_test)

In [78]:
X_train_tensor.shape

torch.Size([455, 30])

### Defining the model

In [79]:
class SimpleNN():
    """
    A simple neural network class for binary classification using PyTorch.
    This class implements a single-layer neural network with sigmoid activation.
    """

    def __init__(self, X):
        """
        Initializes the neural network with random weights and a bias.

        Parameters:
        X (torch.Tensor): The input data tensor. The number of features in X determines the size of the weight matrix.
        """
        # Initialize weights with random values and set them to require gradients for backpropagation
        self.weights = torch.randn(
            size=(X.shape[1], 1), dtype=torch.float64, requires_grad=True)
        # Initialize bias with zeros and set it to require gradients for backpropagation
        self.bias = torch.zeros(1, dtype=torch.float64, requires_grad=True)

    # forward pass
    def forward(self, X):
        """
        Performs the forward pass of the neural network.

        Parameters:
        X (torch.Tensor): The input data tensor.

        Returns:
        torch.Tensor: The predicted output after applying the sigmoid activation function.
        """
        # Compute the linear combination of inputs and weights, then add the bias
        z = torch.matmul(X, self.weights) + self.bias
        # Apply the sigmoid activation function to produce the output
        y_pred = torch.sigmoid(z)
        return y_pred

    def loss_function(self, y_pred, y_true):
        """
        Computes the binary cross-entropy loss between predicted and true labels.

        Parameters:
        y_pred (torch.Tensor): The predicted output tensor.
        y_true (torch.Tensor): The true labels tensor.

        Returns:
        torch.Tensor: The computed loss value.
        """
        # Clamp the predicted values to avoid log(0) issues
        epsilon = 1e-7
        y_pred = torch.clamp(y_pred, min=epsilon, max=1-epsilon)
        # Compute the binary cross-entropy loss
        loss = -(y_true * torch.log(y_pred) +
                 (1 - y_true) * torch.log(1 - y_pred))
        return loss.mean()

#### Important Parameters

In [80]:
learning_rate = 0.1
epochs = 10

### Training Pipeline

In [81]:
# create model
model = SimpleNN(X_train_tensor)
# 30 weights and 1 bias
model.weights.shape, model.bias.shape

(torch.Size([30, 1]), torch.Size([1]))

In [82]:
y_pred = model.forward(X_train_tensor)
y_pred.shape

torch.Size([455, 1])

In [92]:
# define loop
for epoch in range(epochs):

    # forward pass
    y_pred = model.forward(X_train_tensor)
    # print(y_pred)

    # compute loss using the cross-entropy loss function
    loss = model.loss_function(y_pred, y_train_tensor)

    # backward pass
    loss.backward()

    # update weights and bias
    # no_grad is used to avoid tracking the gradient for this operation
    # this is important because we don't want to compute gradients for the update step
    with torch.no_grad():
        model.weights -= learning_rate * model.weights.grad
        model.bias -= learning_rate * model.bias.grad

    # zero gradients -- because they keep accumulating over iterations
    model.weights.grad.zero_()
    model.bias.grad.zero_()
    
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")
    

Epoch 1/10, Loss: 1.513478754526905
Epoch 2/10, Loss: 1.4828636304513347
Epoch 3/10, Loss: 1.4547858470052482
Epoch 4/10, Loss: 1.429609377467109
Epoch 5/10, Loss: 1.4069237697192651
Epoch 6/10, Loss: 1.3863022300479482
Epoch 7/10, Loss: 1.3672660589117585
Epoch 8/10, Loss: 1.3498590361263074
Epoch 9/10, Loss: 1.3338153442135947
Epoch 10/10, Loss: 1.3189104442446593


In [95]:

# evaluate model
with torch.no_grad():
    y_pred_test = model.forward(X_test_tensor)
    # print(torch.round(y_pred_test, decimals=2))
    y_pred_test = (y_pred_test > 0.5).float()
    y_pred_test
    accuracy = (y_pred_test == y_test_tensor).float().mean()
    print(f"Test Accuracy: {accuracy.item() * 100:.2f}%")
    


Test Accuracy: 51.29%
