In [148]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import math
import matplotlib.pyplot as plt
import lightgbm as lgb
import xgboost as xgb

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
import sklearn.metrics as sk_metrics

import torch
import torch.nn as nn
import torch.autograd as autograd
from torch.utils.data import TensorDataset, DataLoader

from ucimlrepo import fetch_ucirepo 

In [75]:
# fetch dataset 
adult = fetch_ucirepo(id=2) 
  
# data (as pandas dataframes) 
X = adult.data.features 
X = X.drop(["education"], axis=1)   # similar columns exist in the dataset, which is education-num
X = X.dropna()

y = adult.data.targets 
y = y.iloc[X.index]

# metadata 
print(adult.metadata) 
  
# variable information 
print(adult.variables) 

{'uci_id': 2, 'name': 'Adult', 'repository_url': 'https://archive.ics.uci.edu/dataset/2/adult', 'data_url': 'https://archive.ics.uci.edu/static/public/2/data.csv', 'abstract': 'Predict whether income exceeds $50K/yr based on census data. Also known as "Census Income" dataset. ', 'area': 'Social Science', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 48842, 'num_features': 14, 'feature_types': ['Categorical', 'Integer'], 'demographics': ['Age', 'Income', 'Education Level', 'Other', 'Race', 'Sex'], 'target_col': ['income'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 1996, 'last_updated': 'Mon Aug 07 2023', 'dataset_doi': '10.24432/C5XW20', 'creators': ['Barry Becker', 'Ronny Kohavi'], 'intro_paper': None, 'additional_info': {'summary': 'Extraction was done by Barry Becker from the 1994 Census database.  A set of reasonably clean records was extracted using the following conditions: ((AAG

#### Data Preprocessing

In [126]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

cat_var = ['workclass', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']
X_train = pd.get_dummies(data=X_train, columns=cat_var, drop_first=True)
X_test = pd.get_dummies(data=X_test, columns=cat_var, drop_first=True)
X_test = X_test.reindex(columns=X_train.columns, fill_value=False)

num_var = ['age', 'fnlwgt', 'capital-gain', 'capital-loss', 'hours-per-week']
scaler = StandardScaler()
scaler_fit = scaler.fit(X_train[num_var])
X_train_ = pd.DataFrame(scaler_fit.transform(X_train[num_var]))
X_test_ = pd.DataFrame(scaler_fit.transform(X_test[num_var]))
X_train = X_train.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

X_train_.columns = num_var
X_test_.columns = num_var
X_train[num_var] = X_train_[num_var]
X_test[num_var] = X_test_[num_var]

y_train["income"] = pd.Categorical(y_train["income"]).codes
y_test["income"] = pd.Categorical(y_test["income"]).codes

X_train, X_test, y_train, y_test = X_train.to_numpy().astype(float), X_test.to_numpy().astype(float), y_train.to_numpy().reshape(-1), y_test.to_numpy().reshape(-1)

label, num_class = np.unique(y_train, return_counts=True)
rewards = 1 / num_class**(1/2)
rewards_lab = np.round(rewards / np.linalg.norm(rewards), 4)
print("\nReward for each class label.")
for idx, reward in enumerate(rewards_lab):
    print("\t- Class {} : {:.4f}".format(idx, reward))
    
minority = np.argmin(num_class)


Reward for each class label.
	- Class 0 : 0.2770
	- Class 1 : 0.4070
	- Class 2 : 0.4937
	- Class 3 : 0.7169


In [108]:
for i in range(len(num_class)):
    print(f"class {i}, proportion {num_class[i]/np.sum(num_class)}")

class 0, proportion 0.5188996220075599
class 1, proportion 0.24029519409611808
class 2, proportion 0.1633467330653387
class 3, proportion 0.07745845083098338


#### Helper function

In [187]:
_, num_class = np.unique(y_train, return_counts=True)
rewards = 1 / num_class**(1/2)
rewards_lab = np.round(rewards / np.linalg.norm(rewards), 4)

def get_reward(actual, pred):
    r = rewards_lab[actual].item()
    return r if pred == actual else -r

def get_reward_batch(actual, pred):
    rewards_table = torch.tensor(rewards_lab, device=actual.device)
    reward = rewards_table.gather(0, actual.to(torch.int64))
    return torch.where(actual == pred, reward, -reward)

def macro_avg_precision(y_pred, y_true):
    '''
    Calculate precision for each class and take the average
    '''
    eps = 1e-10
    assert type(y_pred) == type(y_true)
    if type(y_pred) != torch.Tensor:
        y_pred = torch.tensor(y_pred)
        y_true = torch.tensor(y_true)
        
    num_classes = len(torch.unique(y_true))
    precision = 0
    
    for c in torch.unique(y_true):
        temp_tp = torch.where((y_pred == c) & (y_true == c), 1, 0).sum()
        temp_fp = torch.where((y_pred == c) & (y_true != c), 1, 0).sum()
        
        temp_precision = temp_tp / (temp_tp + temp_fp + eps)
        
        precision += temp_precision
    
    return precision / num_classes

def micro_avg_precision(y_pred, y_true):
    '''
    Calculate class wise TP and FP, then use it to calculate overall precision
    '''
    eps = 1e-10
    assert type(y_pred) == type(y_true)
    if type(y_pred) != torch.Tensor:
        y_pred = torch.tensor(y_pred)
        y_true = torch.tensor(y_true)
    
    num_classes = len(torch.unique(y_true))
    
    tp = 0
    fp = 0
    
    for c in torch.unique(y_true):
        tp += torch.where((y_pred == c) & (y_true == c), 1, 0).sum()
        fp += torch.where((y_pred == c) & (y_true != c), 1, 0).sum()
    
    precision = tp / (tp+fp+eps)
    return precision

def pred_minority_summary(y_truth, y_pred):
    assert type(y_pred) == type(y_truth)
    if type(y_pred) != torch.Tensor:
        y_pred = torch.tensor(y_pred)
        y_truth = torch.tensor(y_truth)
        
    actual_minority = torch.bincount(y_truth.reshape(1,-1)[0])[1:].sum().item()
    pred_count_dist = torch.bincount(y_pred.reshape(1,-1)[0])

    if len(pred_count_dist) > 1:
        pred_minority = pred_count_dist[1:].sum().item()
    else:
        # uniq_element = pred_count_dist.unique().item()
        pred_minority = 0
        uniq_element = y_pred.unique().item()
        print(f"Only class {uniq_element} is predicted")

    pred_correct = torch.where((y_pred == y_truth) & (y_truth!=0), 1, 0).sum().item()
    
    return (actual_minority, pred_minority, pred_correct)

#### Neural Bandits

In [115]:
class NeuralEPSG(nn.Module):
    def __init__(self, num_arms, context_dim, hidden_dim) -> None:
        super().__init__()
        self.num_arms = num_arms
        self.context_dim = context_dim
        self.hidden_dim = hidden_dim
        self.training_step = 0
        
        self.model = nn.Sequential(
            nn.Linear(context_dim + num_arms - 1, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)
        )
        
        self.optimizer = torch.optim.Adam(self.model.parameters())
        
    def forward(self, context):
        return self.model(context)
    
    def cal_reward(self, x):
        device = next(self.model.parameters()).device
        dtype = next(self.model.parameters()).dtype
        
        if len(x.shape) == 1:
            x = x.unsqueeze(dim=0)
        bs = x.shape[0]
        est_reward = []
        
        for a in range(self.num_arms):
            context = torch.zeros(bs, self.num_arms, device=device)
            context = torch.cat([context[:,:a].reshape(bs,-1), x, context[:,a+1:].reshape(bs,-1)], dim=1).to(dtype)
            pred = self.model(context)
            est_reward.append(pred)
            
        est_reward = torch.cat(est_reward, dim=1)
        return est_reward

    def select_arm(self, x):
        device = next(self.model.parameters()).device
        if len(x.shape) == 1:
            x = x.unsqueeze(dim=0)
        bs = x.shape[0]
        
        epsilon = torch.clamp(
            torch.tensor([(0.01 - 1) / 10000 * self.training_step + 1]),
            min=0.001,
            max=1
        )
        
        self.training_step += bs
        
        if torch.rand(1) < epsilon:
            # action = torch.randint(0, self.num_arms+1, (bs,))
            return torch.randint(0, self.num_arms, (bs,), device=device)
        else:
            est_reward = self.cal_reward(x)
            return torch.argmax(est_reward, dim=1).detach()
        
    def update(self, x, chosen_arm, reward):
        device = next(self.model.parameters()).device
        if len(x.shape) == 1:
            x = x.unsqueeze(0)
        bs = x.shape[0]
        zeros = torch.zeros(bs, self.context_dim + self.num_arms - 1, device=device)
        chosen_arm = chosen_arm.unsqueeze(1).repeat(1, self.context_dim) + torch.arange(self.context_dim).unsqueeze(0).to(device)
        context = zeros.scatter(1, chosen_arm, x)
        
        # pred_reward = self.forward(context)[chosen_arm]
        pred_reward = self.model(context)
        
        if type(reward) != torch.Tensor:
            device = next(self.model.parameters()).device
            dtype = next(self.model.parameters()).dtype
            reward = torch.tensor(reward, dtype=dtype, device=device).reshape(-1,1)
        if len(reward.shape) == 1:
            reward = reward.reshape(-1,1)
        loss = nn.MSELoss()(pred_reward, reward.to(torch.float))
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

In [198]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

n_features = X_train.shape[1]
hidden_dim = 2048
n_actions = len(np.unique(y_train))

epsg = NeuralEPSG(num_arms=n_actions, context_dim=n_features, hidden_dim=hidden_dim).to(device)

features_tensor = torch.tensor(X_train, dtype=torch.float, device=device)
labels_tensor = torch.tensor(y_train, dtype=torch.float, device=device)
dataset = TensorDataset(features_tensor, labels_tensor)
dataloader = DataLoader(dataset, batch_size=1024, shuffle=False)


recall_list, precision_list, f_score_list, g_mean_list, epr_list = [], [], [], [], []
recall_list_val, precision_list_val, f_score_list_val, g_mean_list_val, epr_list_val = [], [], [], [], []
best_epr = 0

for epoch in tqdm(range(5)):
    epsg.training_step = 0
    for batch_idx, (features, labels) in enumerate(dataloader):
        action = epsg.select_arm(features)
        reward = get_reward_batch(labels, action)
        epsg.update(features, action, reward)

  0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 5/5 [00:17<00:00,  3.42s/it]


In [199]:
model = epsg
model.eval()

x = X_train
y_truth = torch.tensor(y_train)

n_sep = 1000
sep = math.ceil(x.shape[0] / n_sep)
y_pred = torch.tensor([], dtype=torch.int)
# y_proba = torch.tensor([])

for i in tqdm(range(math.ceil(x.shape[0]/sep))):
    features = torch.tensor(x[i*sep:(i+1)*sep], dtype=torch.float, device=device)
    cur_y_pred  = model.select_arm(features).cpu()
    # cur_y_proba = nn.Softmax(dim=1)(model.cal_reward(features)).detach()[:,1].cpu()
    
    y_pred = torch.cat([y_pred, cur_y_pred], dim=0)
    # y_proba = torch.cat([y_proba, cur_y_proba], dim=0)
    
    del features
    torch.cuda.empty_cache()

minority_summary = pred_minority_summary(y_truth, y_pred)
macro_avg_pre = macro_avg_precision(y_pred, y_truth)
micro_avg_pre = micro_avg_precision(y_pred, y_truth)
print("Training Data")
print(f"Actual minority: {minority_summary[0]}; Predicted minority: {minority_summary[1]}; Correctly predicted: {minority_summary[2]}")
print(f"Macro Average Precision: {macro_avg_pre}; Micro Average Precision: {micro_avg_pre}")

100%|██████████| 981/981 [00:02<00:00, 390.68it/s]

Training Data
Actual minority: 16037; Predicted minority: 8109; Correctly predicted: 3732
Macro Average Precision: 0.3935583531856537; Micro Average Precision: 0.5781184434890747





In [200]:
model = epsg
model.eval()

x = X_test
y_truth = torch.tensor(y_test)

n_sep = 1000
sep = math.ceil(x.shape[0] / n_sep)
y_pred = torch.tensor([], dtype=torch.int)
# y_proba = torch.tensor([])

for i in tqdm(range(math.ceil(x.shape[0]/sep))):
    features = torch.tensor(x[i*sep:(i+1)*sep], dtype=torch.float, device=device)
    cur_y_pred  = model.select_arm(features).cpu()
    # cur_y_proba = nn.Softmax(dim=1)(model.cal_reward(features)).detach()[:,1].cpu()
    
    y_pred = torch.cat([y_pred, cur_y_pred], dim=0)
    # y_proba = torch.cat([y_proba, cur_y_proba], dim=0)
    
    del features
    torch.cuda.empty_cache()

minority_summary = pred_minority_summary(y_truth, y_pred)
macro_avg_pre = macro_avg_precision(y_pred, y_truth)
micro_avg_pre = micro_avg_precision(y_pred, y_truth)
print("Test Data")
print(f"Actual minority: {minority_summary[0]}; Predicted minority: {minority_summary[1]}; Correctly predicted: {minority_summary[2]}")
print(f"Macro Average Precision: {macro_avg_pre}; Micro Average Precision: {micro_avg_pre}")

100%|██████████| 953/953 [00:01<00:00, 660.63it/s]

Test Data
Actual minority: 6864; Predicted minority: 3553; Correctly predicted: 1636
Macro Average Precision: 0.33944010734558105; Micro Average Precision: 0.5798978209495544





#### XGBoost

In [139]:
clf = xgb.XGBClassifier(tree_method="hist", enable_categorical=True)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_train)
y_truth = y_train

minority_summary = pred_minority_summary(y_truth, y_pred)
macro_avg_pre = macro_avg_precision(y_pred, y_truth)
micro_avg_pre = micro_avg_precision(y_pred, y_truth)
print("Training Data")
print(f"Actual minority: {minority_summary[0]}; Predicted minority: {minority_summary[1]}; Correctly predicted: {minority_summary[2]}")
print(f"Macro Average Precision: {macro_avg_pre}; Micro Average Precision: {micro_avg_pre}")

y_pred = clf.predict(X_test)
y_truth = y_test

minority_summary = pred_minority_summary(y_truth, y_pred)
macro_avg_pre = macro_avg_precision(y_pred, y_truth)
micro_avg_pre = micro_avg_precision(y_pred, y_truth)
print("Test Data")
print(f"Actual minority: {minority_summary[0]}; Predicted minority: {minority_summary[1]}; Correctly predicted: {minority_summary[2]}")
print(f"Macro Average Precision: {macro_avg_pre}; Micro Average Precision: {micro_avg_pre}")

Training Data
Actual minority: 16037; Predicted minority: 7704; Correctly predicted: 5190
Macro Average Precision: 0.7827357053756714; Micro Average Precision: 0.6513170003890991
Test Data
Actual minority: 6864; Predicted minority: 3166; Correctly predicted: 1607
Macro Average Precision: 0.4344041049480438; Micro Average Precision: 0.5924266576766968


#### LightGBM

In [188]:
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

params = {
    'objective': 'multiclass',
    'metric': 'multi_logloss',
    'boosting_type': 'gbdt',  # Gradient Boosting Decision Tree
    'learning_rate': 0.1,
    'num_class': len(np.unique(y_train)),
    'num_leaves': 31,
    'max_depth': -1,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0
}

num_round = 10
bst = lgb.train(params, train_data, num_round, valid_sets=[train_data])

y_pred = np.argmax(bst.predict(X_train), axis=1)
y_truth = y_train

minority_summary = pred_minority_summary(y_truth, y_pred)
macro_avg_pre = macro_avg_precision(y_pred, y_truth)
micro_avg_pre = micro_avg_precision(y_pred, y_truth)
print("Training Data")
print(f"Actual minority: {minority_summary[0]}; Predicted minority: {minority_summary[1]}; Correctly predicted: {minority_summary[2]}")
print(f"Macro Average Precision: {macro_avg_pre}; Micro Average Precision: {micro_avg_pre}")

y_pred = np.argmax(bst.predict(X_test), axis=1)
y_truth = y_test

minority_summary = pred_minority_summary(y_truth, y_pred)
macro_avg_pre = macro_avg_precision(y_pred, y_truth)
micro_avg_pre = micro_avg_precision(y_pred, y_truth)
print("Training Data")
print(f"Actual minority: {minority_summary[0]}; Predicted minority: {minority_summary[1]}; Correctly predicted: {minority_summary[2]}")
print(f"Macro Average Precision: {macro_avg_pre}; Micro Average Precision: {micro_avg_pre}")

Training Data
Actual minority: 16037; Predicted minority: 4412; Correctly predicted: 2601
Macro Average Precision: 0.29342150688171387; Micro Average Precision: 0.584868311882019
Training Data
Actual minority: 6864; Predicted minority: 1911; Correctly predicted: 1128
Macro Average Precision: 0.5440467000007629; Micro Average Precision: 0.5866872072219849
