In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
# 1. Load the Adult dataset (OpenML, needs internet)
from sklearn.datasets import fetch_openml
adult = fetch_openml('adult', version=2, as_frame=True)
X = adult.data
y = adult.target

In [3]:
# 2. Preprocessing: Encode categorical features and target
categorical_cols = X.select_dtypes(include='category').columns.tolist()
numerical_cols = X.select_dtypes(include=['int', 'float']).columns.tolist()

# One-hot encode categorical features
X_cat = pd.get_dummies(X[categorical_cols], dummy_na=True)
# Standardize numerical features
scaler = StandardScaler()
X_num = pd.DataFrame(scaler.fit_transform(X[numerical_cols]), columns=numerical_cols)

X_prepared = pd.concat([X_num, X_cat], axis=1).values.astype(np.float32)

# Encode target (binary: <=50K, >50K)
le = LabelEncoder()
y_prepared = le.fit_transform(y).astype(np.float32)


In [4]:
# 3. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_prepared, y_prepared, test_size=0.2, random_state=42)

In [5]:
# 4. PyTorch Dataset
class AdultDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y).unsqueeze(1)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [6]:
train_ds = AdultDataset(X_train, y_train)
test_ds = AdultDataset(X_test, y_test)

In [7]:
train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=128)

In [8]:
# 5. Standard MLP Model
class MLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 8),
            nn.ReLU(),
            nn.Linear(8, 4),
            nn.ReLU(),
            nn.Linear(4, 1),
            nn.Sigmoid()  # For binary classification
        )
    def forward(self, x):
        return self.model(x)


In [9]:
from dpn_3.dpn import DPN
class MLP_DPN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            DPN(input_dim, 13, 1),
            nn.Sigmoid()  # For binary classification
        )
    def forward(self, x):
        return self.model(x)

In [10]:
from dpn_3.dpn import DPN
class MLP_DPN_MAX(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            DPN(input_dim, 13, 1, False),
            nn.Sigmoid()  # For binary classification
        )
    def forward(self, x):
        return self.model(x)

In [11]:

# Initialize model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MLP(X_train.shape[1]).to(device)

In [12]:
# 6. Training setup
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [13]:
from utils import train

train_metrics, eval_metrics, test_metrics = train(model, train_loader, test_loader, test_loader, 20, optimizer, criterion, device=device)


Epoch: 1 Total_Time: 0.7061 Average_Time_per_batch: 0.0023 Train_Accuracy: 0.7973 Train_Loss: 0.4773 Validation_Accuracy: 0.8448 Validation_Loss: 0.3343
Epoch: 2 Total_Time: 0.4566 Average_Time_per_batch: 0.0015 Train_Accuracy: 0.8477 Train_Loss: 0.3279 Validation_Accuracy: 0.8537 Validation_Loss: 0.3157
Epoch: 3 Total_Time: 0.4797 Average_Time_per_batch: 0.0016 Train_Accuracy: 0.8516 Train_Loss: 0.3187 Validation_Accuracy: 0.8570 Validation_Loss: 0.3075
Epoch: 4 Total_Time: 0.4671 Average_Time_per_batch: 0.0015 Train_Accuracy: 0.8535 Train_Loss: 0.3146 Validation_Accuracy: 0.8617 Validation_Loss: 0.3056
Epoch: 5 Total_Time: 0.4588 Average_Time_per_batch: 0.0015 Train_Accuracy: 0.8551 Train_Loss: 0.3125 Validation_Accuracy: 0.8599 Validation_Loss: 0.3042
Epoch: 6 Total_Time: 0.4573 Average_Time_per_batch: 0.0015 Train_Accuracy: 0.8548 Train_Loss: 0.3108 Validation_Accuracy: 0.8607 Validation_Loss: 0.3037
Epoch: 7 Total_Time: 0.4706 Average_Time_per_batch: 0.0015 Train_Accuracy: 0.8554

In [14]:
model = MLP_DPN(X_train.shape[1]).to(device)

In [15]:
# 6. Training setup
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [16]:
train_metrics, eval_metrics, test_metrics = train(model, train_loader, test_loader, test_loader, 20, optimizer, criterion, device=device)


Epoch: 1 Total_Time: 0.3831 Average_Time_per_batch: 0.0013 Train_Accuracy: 0.8145 Train_Loss: 0.4122 Validation_Accuracy: 0.8467 Validation_Loss: 0.3265
Epoch: 2 Total_Time: 0.3739 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.8477 Train_Loss: 0.3270 Validation_Accuracy: 0.8546 Validation_Loss: 0.3140
Epoch: 3 Total_Time: 0.3709 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.8512 Train_Loss: 0.3204 Validation_Accuracy: 0.8581 Validation_Loss: 0.3097
Epoch: 4 Total_Time: 0.3699 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.8518 Train_Loss: 0.3172 Validation_Accuracy: 0.8594 Validation_Loss: 0.3074
Epoch: 5 Total_Time: 0.3795 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.8529 Train_Loss: 0.3153 Validation_Accuracy: 0.8589 Validation_Loss: 0.3057
Epoch: 6 Total_Time: 0.3660 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.8535 Train_Loss: 0.3133 Validation_Accuracy: 0.8620 Validation_Loss: 0.3045
Epoch: 7 Total_Time: 0.3704 Average_Time_per_batch: 0.0012 Train_Accuracy: 0.8544

In [17]:
model = MLP_DPN_MAX(X_train.shape[1]).to(device)

In [18]:
# 6. Training setup
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [19]:
train_metrics, eval_metrics, test_metrics = train(model, train_loader, test_loader, test_loader, 20, optimizer, criterion, device=device)


Epoch: 1 Total_Time: 2.5601 Average_Time_per_batch: 0.0084 Train_Accuracy: 0.8199 Train_Loss: 0.3982 Validation_Accuracy: 0.8527 Validation_Loss: 0.3167
Epoch: 2 Total_Time: 2.6895 Average_Time_per_batch: 0.0088 Train_Accuracy: 0.8507 Train_Loss: 0.3207 Validation_Accuracy: 0.8559 Validation_Loss: 0.3089
Epoch: 3 Total_Time: 2.6126 Average_Time_per_batch: 0.0085 Train_Accuracy: 0.8512 Train_Loss: 0.3174 Validation_Accuracy: 0.8576 Validation_Loss: 0.3077
Epoch: 4 Total_Time: 2.5481 Average_Time_per_batch: 0.0083 Train_Accuracy: 0.8526 Train_Loss: 0.3153 Validation_Accuracy: 0.8583 Validation_Loss: 0.3065
Epoch: 5 Total_Time: 2.5494 Average_Time_per_batch: 0.0083 Train_Accuracy: 0.8522 Train_Loss: 0.3141 Validation_Accuracy: 0.8591 Validation_Loss: 0.3054
Epoch: 6 Total_Time: 2.4231 Average_Time_per_batch: 0.0079 Train_Accuracy: 0.8535 Train_Loss: 0.3126 Validation_Accuracy: 0.8600 Validation_Loss: 0.3042
Epoch: 7 Total_Time: 2.5989 Average_Time_per_batch: 0.0085 Train_Accuracy: 0.8541