In [21]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from torchsummary import summary
import numpy as np

In [22]:
df = pd.read_csv('diabetes.csv')

In [23]:
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [24]:
df.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [25]:
X = df.drop('Outcome', axis=1).values
y = df['Outcome'].values

In [27]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [28]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((614, 8), (154, 8), (614,), (154,))

In [29]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.int64)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.int64)

In [30]:
train_dataset = TensorDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = TensorDataset(X_test, y_test)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [31]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

(torch.Size([614, 8]),
 torch.Size([154, 8]),
 torch.Size([614]),
 torch.Size([154]))

In [41]:
class DiabetesDense(nn.Module):
    def __init__(self):
        super(DiabetesDense, self).__init__()
        self.fc1 = nn.Linear(8, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)  # 2 outcomes in the dataset
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model, loss function, and optimizer
model = DiabetesDense()

In [42]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [46]:
# Variables to store loss and accuracy
train_losses = []
test_accuracies = []

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_dataloader:
        optimizer.zero_grad()
        
        # BCEWithLogitsLoss expects float labels
        labels = labels.float().unsqueeze(1)  # shape: (batch_size, 1)

        outputs = model(inputs)              # shape: (batch_size, 1)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    train_losses.append(running_loss / len(train_dataloader))

    # Evaluate on test data
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_dataloader:
            outputs = model(inputs)  # shape: (batch_size, 1)

            # Sigmoid activation to get probabilities
            probs = torch.sigmoid(outputs)
            predicted = (probs > 0.5).int().squeeze(1)  # binary prediction (0 or 1)
            labels = labels.int()  # make sure labels are int for comparison

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    test_accuracies.append(accuracy)

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {train_losses[-1]:.4f}, Accuracy: {accuracy:.2f}%")

print("Training complete.")

Epoch 1/100, Loss: 0.2397, Accuracy: 77.27%
Epoch 2/100, Loss: 0.2406, Accuracy: 77.92%
Epoch 3/100, Loss: 0.2304, Accuracy: 78.57%
Epoch 4/100, Loss: 0.2389, Accuracy: 77.27%
Epoch 5/100, Loss: 0.2470, Accuracy: 77.92%
Epoch 6/100, Loss: 0.2323, Accuracy: 78.57%
Epoch 7/100, Loss: 0.2293, Accuracy: 77.27%
Epoch 8/100, Loss: 0.2282, Accuracy: 77.27%
Epoch 9/100, Loss: 0.2258, Accuracy: 77.27%
Epoch 10/100, Loss: 0.2313, Accuracy: 77.92%
Epoch 11/100, Loss: 0.2266, Accuracy: 77.27%
Epoch 12/100, Loss: 0.2253, Accuracy: 77.92%
Epoch 13/100, Loss: 0.2200, Accuracy: 76.62%
Epoch 14/100, Loss: 0.2507, Accuracy: 77.27%
Epoch 15/100, Loss: 0.2156, Accuracy: 77.92%
Epoch 16/100, Loss: 0.2119, Accuracy: 77.92%
Epoch 17/100, Loss: 0.2101, Accuracy: 77.27%
Epoch 18/100, Loss: 0.2098, Accuracy: 77.92%
Epoch 19/100, Loss: 0.2123, Accuracy: 77.27%
Epoch 20/100, Loss: 0.2151, Accuracy: 77.27%
Epoch 21/100, Loss: 0.2218, Accuracy: 77.92%
Epoch 22/100, Loss: 0.2071, Accuracy: 77.27%
Epoch 23/100, Loss: