In [2]:
import pandas as pd
import numpy as np

from ucimlrepo import fetch_ucirepo

In [3]:
def get_data():
    _data = fetch_ucirepo(id=2)
    
    raw = _data.data.features
    targets = _data.data.targets
    
    num_features = ['age', 'capital-gain', 'capital-loss', 'hours-per-week']
    cat_features = ['workclass', 'education', 'marital-status', 
                    'occupation', 'relationship', 'race', 'sex', 'native-country']

    df1 = pd.get_dummies(raw[cat_features], dtype=float)
    df2 = raw[num_features]

    df = pd.concat([df1, df2], axis=1)

    _d = {
        "<=50K" : 0,
        "<=50K." : 0,
        ">50K" : 1,
        ">50K." : 1
    }        
    targets = targets["income"].map(_d)

    return df, targets

In [4]:
df, targets = get_data()

In [5]:
train_len = int(0.8 * len(df))
test_len = len(df) - train_len
train_data, test_data = df.iloc[:train_len], df.iloc[train_len:]
train_targets, test_targets = targets.iloc[:train_len], targets.iloc[train_len:]

In [7]:
from sklearn.preprocessing import StandardScaler

In [8]:
scaler = StandardScaler()
train_data = pd.DataFrame(scaler.fit_transform(train_data), columns=train_data.columns)
test_data = pd.DataFrame(scaler.transform(test_data), columns=test_data.columns)

In [10]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split

In [11]:
class DiabetesDataset(Dataset):
    
    def __init__(self, data, targets):
        self.X = torch.tensor(data.values, dtype=torch.float32)
        self.y = torch.tensor(targets.values, dtype=torch.float32).reshape(-1, 1)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [12]:
train_dataset = DiabetesDataset(train_data, train_targets)
test_dataset = DiabetesDataset(test_data, test_targets)

batch_size = 32
train_dl = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_dl = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [14]:
import torch.optim as optim

In [15]:
feature_size = train_dataset.X.shape[1]
hidden_size1 = 150
output_size = 1

class DBModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(feature_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.l2 = nn.Linear(hidden_size1, output_size)
        self.op = nn.Sigmoid()
    
    def forward(self, x):
        return self.op(self.l2(self.relu1(self.l1(x))))

model = DBModel()
optimizer = optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.BCELoss()

In [16]:
epochs = 50
for epoch in range(epochs):
    epoch_loss = 0.0
    batch_accuracy = []
    
    model.train()
    for inputs, targets in train_dl:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, targets)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        
        with torch.no_grad():
            pred = (outputs > 0.5).float()
            correct_predictions = (pred == targets).sum().item()
            accuracy = correct_predictions / targets.size(0)
            batch_accuracy.append(accuracy)
            
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}, Train Accuracy: {np.mean(batch_accuracy):.4f}")
    
    # Evaluation on test data every 10 epochs
    if epoch % 10 == 0:
        model.eval()
        batch_test_acc = []
        with torch.no_grad():
            for inputs, targets in test_dl:
                outputs = model(inputs)
                pred = (outputs > 0.5).float()
                correct_predictions = (pred == targets).sum().item()
                accuracy = correct_predictions / targets.size(0)
                batch_test_acc.append(accuracy)
                
        print(f"Test Accuracy: {np.mean(batch_test_acc):.4f}\n")

Epoch 1/50, Loss: 459.8313, Train Accuracy: 0.8405
Test Accuracy: 0.8486

Epoch 2/50, Loss: 440.5209, Train Accuracy: 0.8485
Epoch 3/50, Loss: 427.0266, Train Accuracy: 0.8495
Epoch 4/50, Loss: 446.1654, Train Accuracy: 0.8512
Epoch 5/50, Loss: 461.4412, Train Accuracy: 0.8525
Epoch 6/50, Loss: 438.7478, Train Accuracy: 0.8537
Epoch 7/50, Loss: 422.4259, Train Accuracy: 0.8551
Epoch 8/50, Loss: 427.2044, Train Accuracy: 0.8564
Epoch 9/50, Loss: 454.2737, Train Accuracy: 0.8553
Epoch 10/50, Loss: 420.7595, Train Accuracy: 0.8581
Epoch 11/50, Loss: 435.1024, Train Accuracy: 0.8571
Test Accuracy: 0.8522

Epoch 12/50, Loss: 477.1595, Train Accuracy: 0.8575
Epoch 13/50, Loss: 457.3715, Train Accuracy: 0.8571
Epoch 14/50, Loss: 535.7233, Train Accuracy: 0.8589
Epoch 15/50, Loss: 445.8385, Train Accuracy: 0.8575
Epoch 16/50, Loss: 423.9448, Train Accuracy: 0.8604
Epoch 17/50, Loss: 424.9641, Train Accuracy: 0.8608
Epoch 18/50, Loss: 434.2736, Train Accuracy: 0.8588
Epoch 19/50, Loss: 445.3595