In [1]:
import pandas as pd
import numpy as np

# Dataset Loading

In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')

# Splitting, Column removing and Encoding

In [3]:
from sklearn.model_selection import train_test_split as split
from sklearn.preprocessing import LabelEncoder, StandardScaler
X = df.drop(['diagnosis','Unnamed: 32'],axis=1)
y = df['diagnosis']

train_x, test_x, train_y, test_y = split(X,y,test_size=0.2, random_state = 42)

scaler = StandardScaler()
train_x = scaler.fit_transform(train_x)
test_x = scaler.fit_transform(test_x)

encoder = LabelEncoder()
train_y = encoder.fit_transform(train_y)
test_y = encoder.fit_transform(test_y)

# Converting into Tensor

In [4]:
import torch

train_x = torch.from_numpy(train_x.astype(np.float32))
train_y = torch.from_numpy(train_y.astype(np.float32))

test_x = torch.from_numpy(test_x.astype(np.float32))
test_y = torch.from_numpy(test_y.astype(np.float32))

# Use of Custom Dataset and Dataloader

In [8]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
    def __len__(self):
      return len(self.features)

    def __getitem__(self, idx):
      return self.features[idx], self.labels[idx]

In [9]:
train_x_dataset = CustomDataset(train_x, train_y)
test_x_dataset = CustomDataset(test_x, test_y)

In [10]:
train_x_dataloader = DataLoader(train_x_dataset, batch_size=4, shuffle=True)
test_x_dataloader = DataLoader(test_x_dataset, batch_size=4, shuffle=True)

# Defining the model

In [12]:
import torch.nn as nn

class MyModel(nn.Module):
  def __init__(self, num_features):
    super(MyModel, self).__init__()

    self.network = nn.Sequential(
        nn.Linear(num_features, 16),
        nn.ReLU(),
        nn.Linear(16, 8),
        nn.ReLU(),
        nn.Linear(8, 1),
        nn.Sigmoid()
    )

  def forward(self, features):
    return self.network(features)


# Important Parameter

In [13]:
learning_rate = 0.1
epochs = 25

In [14]:
# Create the model
model = MyModel(train_x.shape[1])

# define Optimiser
optimiser  = torch.optim.SGD(model.parameters(), lr=learning_rate)

# define loss function
loss_fn = nn.BCELoss()

# Model Pipelie

In [17]:
for epoch in range(epochs):
  for batch_features, batch_labels in train_x_dataloader:

    # foreword pass
    y_pred = model(batch_features)

    # loss calculation
    loss = loss_fn(y_pred, batch_labels.view(-1, 1))

    # clear Gradient
    optimiser.zero_grad()

    # backward pass
    loss.backward()

    # update the weights
    optimiser.step()

  # print output
  print(f"Epochs: {epoch+1}, Loss: {loss.item()}")

Epochs: 1, Loss: 0.004176496993750334
Epochs: 2, Loss: 0.07810018211603165
Epochs: 3, Loss: 0.012529191561043262
Epochs: 4, Loss: 0.04047079756855965
Epochs: 5, Loss: 1.66096031665802
Epochs: 6, Loss: 3.964467396144755e-05
Epochs: 7, Loss: 0.002223167335614562
Epochs: 8, Loss: 0.00037291055195964873
Epochs: 9, Loss: 0.02236497402191162
Epochs: 10, Loss: 0.00840203370898962
Epochs: 11, Loss: 0.0014376160688698292
Epochs: 12, Loss: 0.00018280558288097382
Epochs: 13, Loss: 0.0004011767159681767
Epochs: 14, Loss: 3.4142280469495745e-07
Epochs: 15, Loss: 0.007614761125296354
Epochs: 16, Loss: 0.00030025787418708205
Epochs: 17, Loss: 0.02665439434349537
Epochs: 18, Loss: 3.1625947940483456e-06
Epochs: 19, Loss: 0.00011792717123171315
Epochs: 20, Loss: 0.0025922127533704042
Epochs: 21, Loss: 0.0019700052216649055
Epochs: 22, Loss: 0.011568107642233372
Epochs: 23, Loss: 5.715872566924851e-11
Epochs: 24, Loss: 1.4128873772278894e-05
Epochs: 25, Loss: 1.9018048078578431e-06


# Model Evaluation

In [18]:
# Model evaluation using test_loader
model.eval()  # Set the model to evaluation mode
accuracy_list = []

with torch.no_grad():
    for batch_features, batch_labels in test_x_dataloader:
        # Forward pass
        y_pred = model(batch_features)
        y_pred = (y_pred > 0.8).float()  # Convert probabilities to binary predictions

        # Calculate accuracy for the current batch
        batch_accuracy = (y_pred.view(-1) == batch_labels).float().mean().item()
        accuracy_list.append(batch_accuracy)

# Calculate overall accuracy
overall_accuracy = sum(accuracy_list) / len(accuracy_list)
print(f'Accuracy: {overall_accuracy:.4f}')

Accuracy: 0.9828
