In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

sns.set(style="whitegrid")
print("GPU Available:", torch.cuda.is_available())


ModuleNotFoundError: No module named 'pandas'

In [38]:
df = pd.read_csv("/kaggle/input/diabetes-prediction-dataset/diabetes_prediction_dataset.csv")

df["gender"] = LabelEncoder().fit_transform(df["gender"])
df["smoking_history"] = LabelEncoder().fit_transform(df["smoking_history"])

X = df.drop("diabetes", axis=1)
y = df["diabetes"]


In [39]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor  = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1,1)
y_test_tensor  = torch.tensor(y_test.values, dtype=torch.float32).view(-1,1)


In [40]:
class_counts = y_train.value_counts().to_dict()
weight_0 = 1 / class_counts[0]
weight_1 = 1 / class_counts[1]

sample_weights = [weight_1 if t == 1 else weight_0 for t in y_train_tensor.numpy().flatten()]
sample_weights = torch.tensor(sample_weights, dtype=torch.float32)

sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)

train_ds = TensorDataset(X_train_tensor, y_train_tensor)
test_ds  = TensorDataset(X_test_tensor, y_test_tensor)

train_dl = DataLoader(train_ds, batch_size=512, sampler=sampler)
test_dl  = DataLoader(test_ds, batch_size=512, shuffle=False)


In [41]:
class DiabetesNN(nn.Module):
    def __init__(self):
        super(DiabetesNN, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(8, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.4),

            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.4),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

model = DiabetesNN().cuda()


In [42]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0008, weight_decay=1e-5)

best_loss = float("inf")
patience = 8
wait = 0


In [43]:
epochs = 60
for epoch in range(epochs):
    model.train()
    total_loss = 0

    for Xb, yb in train_dl:
        Xb, yb = Xb.cuda(), yb.cuda()

        optimizer.zero_grad()
        preds = model(Xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_dl)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")

    if avg_loss < best_loss:
        best_loss = avg_loss
        wait = 0
        torch.save(model.state_dict(), "best_model.pth")
    else:
        wait += 1
        if wait >= patience:
            print("EARLY STOPPING TRIGGERED")
            break


Epoch 1/60 - Loss: 0.3030
Epoch 2/60 - Loss: 0.2516
Epoch 3/60 - Loss: 0.2349
Epoch 4/60 - Loss: 0.2220
Epoch 5/60 - Loss: 0.2161
Epoch 6/60 - Loss: 0.2158
Epoch 7/60 - Loss: 0.2099
Epoch 8/60 - Loss: 0.2084
Epoch 9/60 - Loss: 0.2059
Epoch 10/60 - Loss: 0.2062
Epoch 11/60 - Loss: 0.2022
Epoch 12/60 - Loss: 0.2022
Epoch 13/60 - Loss: 0.2013
Epoch 14/60 - Loss: 0.1974
Epoch 15/60 - Loss: 0.2024
Epoch 16/60 - Loss: 0.2004
Epoch 17/60 - Loss: 0.2015
Epoch 18/60 - Loss: 0.1999
Epoch 19/60 - Loss: 0.1969
Epoch 20/60 - Loss: 0.1985
Epoch 21/60 - Loss: 0.1986
Epoch 22/60 - Loss: 0.1980
Epoch 23/60 - Loss: 0.1967
Epoch 24/60 - Loss: 0.1973
Epoch 25/60 - Loss: 0.1928
Epoch 26/60 - Loss: 0.1949
Epoch 27/60 - Loss: 0.1973
Epoch 28/60 - Loss: 0.1977
Epoch 29/60 - Loss: 0.1963
Epoch 30/60 - Loss: 0.1937
Epoch 31/60 - Loss: 0.1926
Epoch 32/60 - Loss: 0.1945
Epoch 33/60 - Loss: 0.1947
Epoch 34/60 - Loss: 0.1940
Epoch 35/60 - Loss: 0.1923
Epoch 36/60 - Loss: 0.1939
Epoch 37/60 - Loss: 0.1935
Epoch 38/6

In [44]:
model.load_state_dict(torch.load("best_model.pth"))
model.eval()

with torch.no_grad():
    preds = model(X_test_tensor.cuda()).cpu().numpy()
    preds = (preds > 0.5).astype(int)

acc = accuracy_score(y_test_tensor, preds)
print("FINAL TEST ACCURACY:", acc)
print("\nClassification Report:\n", classification_report(y_test_tensor, preds))


FINAL TEST ACCURACY: 0.89745

Classification Report:
               precision    recall  f1-score   support

         0.0       0.99      0.90      0.94     18300
         1.0       0.45      0.92      0.60      1700

    accuracy                           0.90     20000
   macro avg       0.72      0.91      0.77     20000
weighted avg       0.95      0.90      0.91     20000



In [None]:


threshold = 0.65 

with torch.no_grad():
    probs = model(X_test_tensor.cuda()).cpu().numpy()
    preds_thresh = (probs > threshold).astype(int)

acc2 = accuracy_score(y_test_tensor, preds_thresh)
print(f"\nAccuracy (threshold={threshold}):", acc2)
print("\nClassification Report:\n", classification_report(y_test_tensor, preds_thresh))



Accuracy (threshold=0.65): 0.9378

Classification Report:
               precision    recall  f1-score   support

         0.0       0.99      0.95      0.97     18300
         1.0       0.59      0.85      0.70      1700

    accuracy                           0.94     20000
   macro avg       0.79      0.90      0.83     20000
weighted avg       0.95      0.94      0.94     20000

