In [1]:
import torch as t
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


In [2]:
class CustomDataset(Dataset):
    def __init__(self,feature,label):
        self.features = feature
        self.labels = label
    def __len__(self):
        return self.features.shape[0]
    def __getitem__(self,index):
        return self.features[index],self.labels[index]

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv')
df = df.head(10)

In [4]:
df.drop(columns=['id','Unnamed: 32'],inplace=True)

In [5]:
X_train,X_test,y_train,y_test = train_test_split(df.iloc[:,1:],df.iloc[:,0],test_size=0.2)

In [6]:
scaler = StandardScaler()
x_train = scaler.fit_transform(X_train)
x_test = scaler.transform(X_test)

In [7]:
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder .transform(y_test)

In [8]:
x_train_tensor = t.from_numpy(x_train).float()
x_test_tensor = t.from_numpy(x_test).float()
y_train_tensor = t.from_numpy(y_train).float()
y_test_tensor = t.from_numpy(y_test).float()

In [9]:
# defining the model
class MySimpleNN(nn.Module):
    def __init__(self,num_feature):
        super().__init__()
        self.linear = nn.Linear(num_feature,1)
        self.sigmoid = nn.Sigmoid()
    def forward(self,X):
        out = self.linear(X)
        out = self.sigmoid(out)
        return out
   

In [10]:
loss = nn.BCELoss()
# parameter
learning_rate = 0.1
epochs = 10
model = MySimpleNN(x_train_tensor.shape[1])

optimizer = optim.SGD(model.parameters(), lr=0.01)  # SGD with learning rate 0.01


In [11]:
# x_train_tensor.shape,y_train_tensor.reshape(455,1).shape
dataset = CustomDataset(x_train_tensor, y_train_tensor)


In [12]:
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)


In [14]:
for i in dataloader:
    print(i)
    break

[tensor([[ 0.8368,  1.0205,  0.8352,  0.8164, -0.1432, -0.1759,  0.3397,  1.1157,
         -0.1037, -0.7413,  0.5424, -0.6546,  0.1339,  0.5619, -0.5589,  0.2751,
         -0.0146,  1.4820, -0.1203, -0.3080,  0.6731,  0.4613,  0.4567,  0.6146,
         -0.5463, -0.0576, -0.0528,  0.9316, -0.1289, -0.7090],
        [-0.8924,  0.9037, -0.9466, -0.9465,  0.3876, -0.1093, -1.1343, -1.2775,
          0.3524,  0.3381, -0.1879,  2.1328, -0.2622, -0.5320,  0.6521, -0.2424,
         -0.9266, -0.2806, -0.6528,  0.1307, -0.9379,  1.0929, -0.9897, -0.9206,
          0.2798, -0.3100, -1.1650, -1.1922, -0.4581,  0.2211],
        [ 1.0103, -0.9003,  1.0635,  1.0815, -0.6740, -0.5686,  0.3482,  0.2857,
         -1.0374, -0.8276,  0.5946, -0.6810,  0.5974,  0.5724,  1.8768, -0.5433,
          1.2449,  0.9821, -0.4646, -0.0243,  0.4182, -1.6827,  0.4463,  0.3613,
         -0.8217, -1.0418, -0.3598, -1.0245, -1.1150, -1.0740],
        [-1.5546,  0.7787, -1.5116, -1.4874,  1.7346,  1.6208,  0.9648,  0.317

In [77]:
# define loop
for epoch in range(epochs):
    for x,y in dataloader:
        y_pred = model.forward(x)
    
        los = loss(y_pred,y.reshape(-1,1))
        
        optimizer.zero_grad()
        
        los.backward()
        
        optimizer.step()
        
        print(los)

tensor(0.7193, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7157, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7122, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7087, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7053, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7019, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6986, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6953, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6920, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6888, grad_fn=<BinaryCrossEntropyBackward0>)
