In [1]:
from sklearn.datasets import make_classification
import torch

In [2]:
X,y=make_classification(
    n_samples=100,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_classes=2,
    random_state=42
)

In [3]:
X = torch.tensor(X,dtype=torch.float32)
y= torch.tensor(y,dtype=torch.float32)

In [4]:
from torch.utils.data import Dataset,DataLoader

### We can add any kind of transformation in the `__getitem__` method

In [5]:
class CustomDataset(Dataset):

    def __init__(self,features,labels):
        super().__init__()

        self.features = features
        self.labels = labels
    
    def __len__(self):
        return self.features.shape[0]
    
    def __getitem__(self, index):
        return self.features[index],self.labels[index]


In [6]:
dataset = CustomDataset(X,y)

In [7]:
len(dataset)

100

In [8]:
dataset[0]

(tensor([0.5594, 2.3887]), tensor(0.))

In [9]:
dataloader=DataLoader(dataset=dataset,batch_size=5,shuffle=True)

In [10]:
for batch_features, batch_labels in dataloader:
    print(batch_features, batch_labels)
    print("\n---------------------------------\n")

tensor([[ 0.9642,  0.5560],
        [-0.8080,  1.1966],
        [ 1.8399,  2.3045],
        [ 0.6274, -1.3293],
        [ 1.1733,  0.7364]]) tensor([1., 0., 1., 1., 1.])

---------------------------------

tensor([[ 0.3413,  0.5730],
        [ 0.8258,  0.5348],
        [-1.4074, -1.5683],
        [-0.9428,  1.1001],
        [-0.8168, -0.6796]]) tensor([1., 1., 0., 0., 0.])

---------------------------------

tensor([[ 1.5607, -0.4280],
        [-1.2172, -1.3672],
        [-1.6930, -1.6145],
        [-2.0035, -2.3995],
        [-1.2857,  0.7302]]) tensor([1., 0., 0., 0., 0.])

---------------------------------

tensor([[-1.5485, -1.4353],
        [ 1.3727,  1.0795],
        [-1.1776, -1.2059],
        [-1.0983, -0.8602],
        [-0.7874,  1.1874]]) tensor([0., 1., 0., 0., 0.])

---------------------------------

tensor([[-0.3490,  1.5601],
        [-1.4348, -1.4270],
        [ 1.6588, -0.4313],
        [ 1.2801,  1.2894],
        [ 1.1520, -0.7135]]) tensor([0., 0., 1., 1., 1.])

-----

In [11]:
## Actual Application

import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [12]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [13]:
df.drop(['id','Unnamed: 32'],axis=1,inplace=True)

In [14]:
X_train,X_test,y_train,y_test=train_test_split(df.drop('diagnosis',axis=1),df['diagnosis'],test_size=0.2)

In [15]:
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.fit_transform(X_test)

encoder=LabelEncoder()
y_train=encoder.fit_transform(y_train)
y_test=encoder.transform(y_test)

X_train=torch.from_numpy(X_train.astype(np.float32))
X_test=torch.from_numpy(X_test.astype(np.float32))
y_train=torch.from_numpy(y_train.astype(np.float32))
y_test=torch.from_numpy(y_test.astype(np.float32))

In [16]:
train_dataset=CustomDataset(X_train,y_train)
test_dataset=CustomDataset(X_test,y_test)

In [17]:
train_loader=DataLoader(train_dataset,batch_size=32,shuffle=True)
test_loader=DataLoader(test_dataset,batch_size=32,shuffle=True)

In [18]:
import torch.nn as nn


class Neural_Network(nn.Module):
    def __init__(self, num_features):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(num_features, 60),
            nn.ReLU(),
            nn.Linear(60, 1),
            nn.Sigmoid()
        )

    def forward(self, features):
       
        return self.network(features)

In [21]:
model=Neural_Network(train_dataset.features.shape[1])

optmizer=torch.optim.SGD(model.parameters(),lr=0.1)

loss_function=nn.BCELoss()

In [22]:
for epoch in range(1000):

    for batch_features,batch_labels in train_loader:

        y_pred=model(batch_features)

        loss=loss_function(y_pred,batch_labels.view(-1,1))

        optmizer.zero_grad()

        loss.backward()

        optmizer.step()

    print(f"Epoch: {epoch+1}, Loss: {loss.item()}")

Epoch: 1, Loss: 0.27582046389579773
Epoch: 2, Loss: 0.1187482699751854
Epoch: 3, Loss: 0.08845497667789459
Epoch: 4, Loss: 0.08253645896911621
Epoch: 5, Loss: 0.1685347706079483
Epoch: 6, Loss: 0.11194176971912384
Epoch: 7, Loss: 0.05975604057312012
Epoch: 8, Loss: 0.18624863028526306
Epoch: 9, Loss: 0.0039685992524027824
Epoch: 10, Loss: 0.1037030816078186
Epoch: 11, Loss: 0.05797841027379036
Epoch: 12, Loss: 0.007559332065284252
Epoch: 13, Loss: 0.16593050956726074
Epoch: 14, Loss: 0.05393781140446663
Epoch: 15, Loss: 0.025781231001019478
Epoch: 16, Loss: 0.1124039962887764
Epoch: 17, Loss: 0.08470319211483002
Epoch: 18, Loss: 0.004033155273646116
Epoch: 19, Loss: 0.03984353318810463
Epoch: 20, Loss: 0.08223722875118256
Epoch: 21, Loss: 0.01962788589298725
Epoch: 22, Loss: 0.1121383085846901
Epoch: 23, Loss: 0.7132829427719116
Epoch: 24, Loss: 0.027779703959822655
Epoch: 25, Loss: 0.01402166485786438
Epoch: 26, Loss: 0.03690887242555618
Epoch: 27, Loss: 0.014540466479957104
Epoch: 28

In [25]:
model.eval()   # Set the model to evaluation mode 
accuracy_list=[]

with torch.no_grad():

    for batch_features,batch_labels in test_loader:

        y_pred=model(batch_features)

        y_pred=(y_pred>0.6).float()

        batch_accuracy=(y_pred.view(-1)==batch_labels).float().mean().item()
        accuracy_list.append(batch_accuracy)

overall_accuracy=sum(accuracy_list)/len(accuracy_list)
print(overall_accuracy*100)

97.04861044883728
