In [31]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

## Understanding the Dataset

In [32]:
data = load_breast_cancer()
print(data.DESCR)

.. _breast_cancer_dataset:

Breast cancer Wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

:Number of Instances: 569

:Number of Attributes: 30 numeric, predictive attributes and the class

:Attribute Information:
    - radius (mean of distances from center to points on the perimeter)
    - texture (standard deviation of gray-scale values)
    - perimeter
    - area
    - smoothness (local variation in radius lengths)
    - compactness (perimeter^2 / area - 1.0)
    - concavity (severity of concave portions of the contour)
    - concave points (number of concave portions of the contour)
    - symmetry
    - fractal dimension ("coastline approximation" - 1)

    The mean, standard error, and "worst" or largest (mean of the three
    worst/largest values) of these features were computed for each image,
    resulting in 30 features.  For instance, field 0 is Mean Radius, field
    10 is Radius SE, field 20 is Worst Radius.

    - 

In [33]:
print(data.feature_names)
print(data.target_names)

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
['malignant' 'benign']


In [34]:
print(data.data.shape)
print(data.target.shape)

(569, 30)
(569,)


In [35]:
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
print(df.tail())

     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
564        21.56         22.39          142.00     1479.0          0.11100   
565        20.13         28.25          131.20     1261.0          0.09780   
566        16.60         28.08          108.30      858.1          0.08455   
567        20.60         29.33          140.10     1265.0          0.11780   
568         7.76         24.54           47.92      181.0          0.05263   

     mean compactness  mean concavity  mean concave points  mean symmetry  \
564           0.11590         0.24390              0.13890         0.1726   
565           0.10340         0.14400              0.09791         0.1752   
566           0.10230         0.09251              0.05302         0.1590   
567           0.27700         0.35140              0.15200         0.2397   
568           0.04362         0.00000              0.00000         0.1587   

     mean fractal dimension  ...  worst texture  worst perimeter  wo

In [36]:
print(df.describe())


       mean radius  mean texture  mean perimeter    mean area  \
count   569.000000    569.000000      569.000000   569.000000   
mean     14.127292     19.289649       91.969033   654.889104   
std       3.524049      4.301036       24.298981   351.914129   
min       6.981000      9.710000       43.790000   143.500000   
25%      11.700000     16.170000       75.170000   420.300000   
50%      13.370000     18.840000       86.240000   551.100000   
75%      15.780000     21.800000      104.100000   782.700000   
max      28.110000     39.280000      188.500000  2501.000000   

       mean smoothness  mean compactness  mean concavity  mean concave points  \
count       569.000000        569.000000      569.000000           569.000000   
mean          0.096360          0.104341        0.088799             0.048919   
std           0.014064          0.052813        0.079720             0.038803   
min           0.052630          0.019380        0.000000             0.000000   
25%      

In [37]:
print(df['target'].value_counts())

target
1    357
0    212
Name: count, dtype: int64


## Loading and preprocessing the Data


In [38]:
# Load the data
X = data.data
y = data.target

# Normalizing the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state=42)

# Converting into tensors
X_train_tensor = torch.tensor(X_train, dtype = torch.float32)
X_test_tensor = torch.tensor(X_test, dtype = torch.float32)
y_train_tensor = torch.tensor(y_train, dtype = torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test, dtype = torch.float32).unsqueeze(1)

## Creating DataLoader

In [39]:
# creating Datasets
train_dataset = TensorDataset(X_train_tensor,y_train_tensor)
test_dataset = TensorDataset(X_test_tensor,y_test_tensor)

# Create Dataloaders

train_loader = DataLoader(dataset=train_dataset,shuffle=True,batch_size=32)
test_loader = DataLoader(dataset=test_dataset,batch_size=32)

## Building the neural nerwork

In [40]:
class BreastCancerNet(nn.Module):
    def __init__(self):
        super(BreastCancerNet,self).__init__()
        self.fc1 = nn.Linear(30,16)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(16,8)
        self.output = nn.Linear(8,1)
        self.sigmoid = nn.Sigmoid()

    def forward(self,x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.output(x))
        return x
    
model = BreastCancerNet()

## Defining Loss and Optimizer

In [41]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(),lr=0.001)

In [42]:
epochs = 50

for epoch in range(epochs):
    model.train()
    runningLoss = 0.0
    for inputs,labels in train_loader:

        # forward pass
        outputs = model(inputs)
        loss = criterion(outputs,labels)

        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        runningLoss+=loss.item()
    print(f"Epoch {epoch+1}/{epochs} | Loss: {runningLoss / len(train_loader):.4f}")



Epoch 1/50 | Loss: 3.4276
Epoch 2/50 | Loss: 0.7858
Epoch 3/50 | Loss: 0.6140
Epoch 4/50 | Loss: 0.4760
Epoch 5/50 | Loss: 0.4454
Epoch 6/50 | Loss: 0.4091
Epoch 7/50 | Loss: 0.4032
Epoch 8/50 | Loss: 0.3835
Epoch 9/50 | Loss: 0.3869
Epoch 10/50 | Loss: 0.3626
Epoch 11/50 | Loss: 0.3637
Epoch 12/50 | Loss: 0.3406
Epoch 13/50 | Loss: 0.3261
Epoch 14/50 | Loss: 0.3192
Epoch 15/50 | Loss: 0.2935
Epoch 16/50 | Loss: 0.2942
Epoch 17/50 | Loss: 0.2999
Epoch 18/50 | Loss: 0.2805
Epoch 19/50 | Loss: 0.2640
Epoch 20/50 | Loss: 0.2772
Epoch 21/50 | Loss: 0.2857
Epoch 22/50 | Loss: 0.2666
Epoch 23/50 | Loss: 0.2698
Epoch 24/50 | Loss: 0.2525
Epoch 25/50 | Loss: 0.2500
Epoch 26/50 | Loss: 0.2502
Epoch 27/50 | Loss: 0.2280
Epoch 28/50 | Loss: 0.2415
Epoch 29/50 | Loss: 0.2361
Epoch 30/50 | Loss: 0.2423
Epoch 31/50 | Loss: 0.2476
Epoch 32/50 | Loss: 0.2419
Epoch 33/50 | Loss: 0.2173
Epoch 34/50 | Loss: 0.2278
Epoch 35/50 | Loss: 0.2311
Epoch 36/50 | Loss: 0.2242
Epoch 37/50 | Loss: 0.2198
Epoch 38/5

## Evaluating the model

In [43]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        predicted = (outputs > 0.5).float()
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Test Accuracy: 95.61%
