In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader, random_split
import torch.optim as optim

In [2]:
from ucimlrepo import fetch_ucirepo

In [3]:
def get_data():

    _data = fetch_ucirepo(id=2)
    
    raw = _data.data.features
    targets = _data.data.targets
    
    num_features = ['age', 'capital-gain', 'capital-loss', 'hours-per-week']
    cat_features = ['workclass', 'education', 'marital-status', 
                    'occupation', 'relationship', 'race', 'sex', 'native-country']

    df1 = pd.get_dummies(raw[cat_features], dtype=float)
    df2 = raw[num_features]
    df = pd.concat([df1, df2], axis=1)

    _d = {
        "<=50K" : 0,
        "<=50K." : 0,
        ">50K" : 1,
        ">50K." : 1
    }        
    targets = targets["income"].map(_d)

    return df, targets

In [4]:
class DiabetesDataset(Dataset):

    def __init__(self, uci_id):
        
        _X, _y = get_data()
        self.X = torch.tensor(_X.values, dtype=torch.float32)
        self.y = torch.tensor(_y.values, dtype=torch.float32).reshape(-1, 1)
        

    def __len__(self):

        return len(self.X)

    
    def __getitem__(self, idx):

        return self.X[idx], self.y[idx]


dataset = DiabetesDataset(uci_id=2)
len_data = len(dataset)
train_len = int(0.8 * len_data)
test_size = len_data - train_len

train_data, test_data = random_split(dataset, [train_len, test_size])

In [5]:
batch_size = 32


train_dl, test_dl = DataLoader(
    train_data,
    shuffle=True,
    batch_size=batch_size
), DataLoader(
    test_data,
    shuffle=True,
    batch_size=batch_size
)

In [6]:
# Model, optimizer and loss function

feature_size = dataset.X.shape[1]
hidden_size1 = 150
output_size = 1

class DBModel(nn.Module):

    def __init__(self):
        super().__init__()

        self.l1 = nn.Linear(feature_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size1, output_size)
        self.op = nn.Sigmoid()

    
    def forward(self, x):
        return self.op(
            self.l2(
                self.relu1(
                    self.l1(
                        x
                    )
                )
            )
        )

model = DBModel()
optimizer = optim.Adam(
    model.parameters(),
    lr = 0.01
)
loss_fn = nn.BCELoss()

In [7]:
# Training Loop
epochs = 100

for epoch in range(epochs):

    model.train()
    epoch_loss = 0.0
    batch_accuracy = []

    for inputs, targets in train_dl:

        inputs = inputs.float()  # Ensure inputs are of type torch.float
        targets = targets.float()  # Ensure targets are of type torch.float

        outputs = model(inputs)  # Raw probabilities from the sigmoid

        # Compute loss
        loss = loss_fn(outputs, targets)

        optimizer.zero_grad()  # Zero out the gradients
        loss.backward()  # Compute gradients
        optimizer.step()  # Update model parameters

        epoch_loss += loss.item()
    #     with torch.no_grad():
            
    #         outputs = model(inputs)
    #         pred = (outputs > .7).float().view(-1)

    #         correct_predictions = (pred == targets.view(-1)).sum().item()
    #         accuracy = correct_predictions / targets.size(0)
    #         batch_accuracy.append(accuracy)
            
    # print(f"Train Accuracy: {np.mean(batch_accuracy)}")

    if epoch % 10 == 0:
        model.eval()
        with torch.no_grad():
            batch_test_acc = []
    
            for inputs, targets in test_dl:
    
                outputs = model(inputs)
                pred = (outputs > .7).float().view(-1)
    
                correct_predictions = (pred == targets.view(-1)).sum().item()
                accuracy = correct_predictions / targets.size(0)
    
                batch_test_acc.append(accuracy)
    
            print(f"Test Accuracy: {np.mean(batch_test_acc)}\n")
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")

Test Accuracy: 0.8122390159767611

Epoch 1/100, Loss: 7105.6457
Test Accuracy: 0.7838484931009441

Epoch 11/100, Loss: 9379.1051
Test Accuracy: 0.7794004175744371

Epoch 21/100, Loss: 9378.5229
Test Accuracy: 0.7905319535221496

Epoch 31/100, Loss: 9377.9963
Test Accuracy: 0.7861406136528686

Epoch 41/100, Loss: 9377.3745
Test Accuracy: 0.7571259985475671

Epoch 51/100, Loss: 9375.0616
Test Accuracy: 0.7807734204793028

Epoch 61/100, Loss: 9375.7867
Test Accuracy: 0.7962055192447349

Epoch 71/100, Loss: 9376.3740
Test Accuracy: 0.7819421750181553

Epoch 81/100, Loss: 7087.9571
Test Accuracy: 0.8049314633260711

Epoch 91/100, Loss: 7086.5743


In [None]:
# Initial Max test acc: 80.49%
# 