## I have implemented the binary classification pipeline using a Neural Network (Multi-Layer Perceptron) in PyTorch, utilizing the breast-cancer.csv

## Problem Description: Breast Cancer Classification
The objective is to classify whether a breast tumor is malignant (1) or benign (0)

In [5]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

## 1. PREPROCESSing

### The CSV is loaded using pandas. The id column is dropped, and the diagnosis target is mapped to binary values. The data is then split into Training, Validation, and Test sets and scaled using StandardScaler

In [6]:
data = pd.read_csv('breast-cancer.csv')

In [7]:
data = data.drop(columns=['id'])

In [8]:
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})

In [9]:
X = data.drop(columns=['diagnosis'])
y = data['diagnosis']

### Split: 70% Train, 15% Validation, 15% Test

In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

## Scaling

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

### Convert to PyTorch Tensors

In [None]:
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train.values).view(-1, 1)
X_val = torch.FloatTensor(X_val)
y_val = torch.FloatTensor(y_val.values).view(-1, 1)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test.values).view(-1, 1)

## 2. DEFINE MODEL

### A neural network with input, hidden, and output layers is defined, using ReLU activation for hidden layers and Sigmoid for the final output to produce a probability

In [None]:
class BreastCancerNN(nn.Module):
    def __init__(self, input_dim):
        super(BreastCancerNN, self).__init__()
        self.layer1 = nn.Linear(input_dim, 16)
        self.layer2 = nn.Linear(16, 8)
        self.output = nn.Linear(8, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.sigmoid(self.output(x))
        return x

In [14]:
model = BreastCancerNN(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

## 3. TRAIN & VALIDATION

### The model is trained using the Adam optimizer and Binary Cross Entropy Loss, it is validated after every 10 epochs

In [15]:
epochs = 50
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    predictions = model(X_train)
    loss = criterion(predictions, y_train)
    loss.backward()
    optimizer.step()
    
    model.eval()
    with torch.no_grad():
        val_preds = model(X_val)
        val_loss = criterion(val_preds, y_val)
    
    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs} | Train Loss: {loss.item():.4f} | Val Loss: {val_loss.item():.4f}")

Epoch 10/50 | Train Loss: 0.3299 | Val Loss: 0.2868
Epoch 20/50 | Train Loss: 0.1150 | Val Loss: 0.0983
Epoch 30/50 | Train Loss: 0.0709 | Val Loss: 0.0661
Epoch 40/50 | Train Loss: 0.0527 | Val Loss: 0.0662
Epoch 50/50 | Train Loss: 0.0410 | Val Loss: 0.0818


## 4. TEST (Final Evaluation)

### The final model is evaluated

### Convert probabilities to binary predictions

In [None]:
model.eval()
with torch.no_grad():
    test_preds = model(X_test)
    final_preds = (test_preds > 0.5).float()

In [17]:
print("\nFinal Test Accuracy:", accuracy_score(y_test, final_preds))
print("\nClassification Report:\n", classification_report(y_test, final_preds))


Final Test Accuracy: 0.9883720930232558

Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      0.98      0.99        60
         1.0       0.96      1.00      0.98        26

    accuracy                           0.99        86
   macro avg       0.98      0.99      0.99        86
weighted avg       0.99      0.99      0.99        86

