In [1]:
# Imports
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
import time

# Adult Dataset

In [2]:
# Load the dataset
data = pd.read_csv("datasets/adult.csv")
data.head(3)

Unnamed: 0,age,workclass,fnlwgt,education,educational-num,marital-status,occupation,relationship,race,gender,capital-gain,capital-loss,hours-per-week,native-country,income
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K


In [3]:
# Preprocessing the data
# Encode categorical variables
label_encoders = {}
categorical_cols = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'gender', 'native-country']
for col in categorical_cols:
    label_encoders[col] = LabelEncoder()
    data[col] = label_encoders[col].fit_transform(data[col])

# Map income column to binary values
data['income'] = data['income'].map({'<=50K': 0, '>50K': 1})

In [4]:
# Splitting the data into features and target variable
X = data.drop('income', axis=1)
y = data['income']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)

In [5]:
# Define model, loss function, and optimizer
input_dim = X_train.shape[1]

criterion = nn.CrossEntropyLoss()

# Training loop
def train(model, optimizer, criterion, X_train, y_train):
    model.train()
    if optimizer:
        optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    if optimizer:
        loss.backward()
        optimizer.step()
    return loss.item()

# Evaluation function
def evaluate(model, X_test, y_test):
    model.eval()
    with torch.no_grad():
        outputs = model(X_test)
        _, predicted = torch.max(outputs, 1)
        accuracy = accuracy_score(y_test, predicted)
    return accuracy

In [6]:
from models.SentimentClassifier import AdultComplexNN, AdultSimpleNN
from utils import SignSGD


output_str = []
# run all combinations
for m in ['simple','complex']:
    for optimizer in ['adam','adagrad','signsgd','sgd']:
        for LEARNING_RATE in [0.01,0.02]:
            for NUM_EPOCH in [10,20]:
                output_str += [f"model:{m} optimizer:{optimizer} epochs:{NUM_EPOCH} learning_rate:{LEARNING_RATE}"]

                if m == 'simple':
                    model = AdultSimpleNN(input_dim)
                elif m == 'complex':
                    model = AdultComplexNN(input_dim)

                if optimizer == 'adam':
                    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
                elif optimizer == 'adagrad':
                    optimizer = optim.Adagrad(model.parameters(), lr=LEARNING_RATE)
                elif optimizer == 'signsgd':
                    optimizer = SignSGD(model.parameters(), lr=LEARNING_RATE)
                elif optimizer == 'sgd':
                    optimizer = optim.SGD(model.parameters(),lr=LEARNING_RATE)


                # Train and evaluate model
                start = time.time()
                for epoch in range(NUM_EPOCH):
                    loss = train(model, optimizer, criterion, X_train_tensor, y_train_tensor)
                    accuracy = evaluate(model, X_test_tensor, y_test_tensor)
                    output_str += [f"Epoch {epoch+1}: Loss={loss:.4f}, Accuracy={accuracy:.4f}"]

                output_str += [f"Time needed: {time.time()-start:.4f}s\n\n"]

with open('measurements.txt','w') as f:
    f.write('\n'.join(output_str))

  from .autonotebook import tqdm as notebook_tqdm
