In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder


In [2]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')

In [3]:
df.drop(columns=["id","Unnamed: 32"],axis=1,inplace=True)

In [4]:
x_train,x_test,y_train,y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size=0.2)

In [5]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [6]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

In [7]:
x_train = torch.from_numpy(x_train.astype(np.float32))
x_test = torch.from_numpy(x_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))


In [8]:
from torch.utils.data import Dataset,DataLoader

In [9]:
class CustomDataset(Dataset):
    def __init__(self,features,labels):
        super().__init__()
        self.features = features
        self.labels = labels

    def __len__(self):
        return self.features.shape[0]

    def __getitem__(self, index):
        return self.features[index],self.labels[index]

In [10]:
train_dataset = CustomDataset(x_train,y_train)
test_dataset = CustomDataset(x_test,y_test)

In [11]:
train_loader = DataLoader(train_dataset,batch_size = 32, shuffle = True)
test_loader = DataLoader(test_dataset,batch_size = 32, shuffle = True)

In [12]:
class SimpleNN(nn.Module):
    def __init__(self,num_features):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(num_features,8),
            nn.ReLU(),
            nn.Linear(8,1),
            nn.Sigmoid()
        )

    def forward(self,features):
        return self.network(features)

In [13]:
loss_fn = nn.BCELoss()
model = SimpleNN(x_train.shape[1])

In [14]:
learning_rate = 0.1
epochs = 100 

In [15]:
optimizer = torch.optim.SGD(model.parameters(),lr = learning_rate)

In [16]:
for i in range(epochs):
    for batch_features,batch_labels in train_loader:

        y_pred = model(batch_features)
        loss = loss_fn(y_pred,batch_labels.reshape(-1,1))

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()       

In [17]:
model.eval()
accuracy_list = []
with torch.no_grad():
    y_pred = model.forward(x_test)
    y_pred = (y_pred > 0.5).float()

    batch_accuracy =(y_pred.squeeze() == y_test).float().mean()
    accuracy_list.append(batch_accuracy)

overall_accuracy = sum(accuracy_list)/len(accuracy_list)
print(f"Accuracy :{overall_accuracy:.4f}")

Accuracy :0.9825
