This file contains some additional baselines used in the paper, namely:
1. PI-EW (plugin)
2. FW val 

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import os
import csv
import numpy as np
import tensorflow as tf
from sklearn import metrics 
import pandas as pd

BASE_DIR = '../../../'
import sys
sys.path.append(BASE_DIR)

# custom code
import utils.utils
CONFIG = utils.utils.load_config("../../config.json")

import utils.metrics
import utils.record

Using TensorFlow backend.


In [3]:
DATASET = os.path.basename(os.getcwd()) # name of folder this file is in
RANDOM_SEED = CONFIG['random_seed']
BATCH_SIZE = CONFIG["experiment_configs"][DATASET]["batch_size"]

print(RANDOM_SEED)

PROCESSED_DIR = os.path.join(BASE_DIR, f'processed/adult/rs={RANDOM_SEED}')
MODELS_DIR = os.path.join(BASE_DIR, f'models/adult/rs={RANDOM_SEED}')
RESULTS_DIR = os.path.join(BASE_DIR, "results")

PROCESSED_SAVEPATH = utils.utils.get_savepath(PROCESSED_DIR, "adult", ".pkl")
BASE_MODEL_SAVEPATH = utils.utils.get_savepath(MODELS_DIR, "adult", ".h5", mt="base") # mt = model_type
os.makedirs(RESULTS_DIR, exist_ok=True)

15


In [4]:
hyper_train_df = pd.read_csv(os.path.join(PROCESSED_DIR, "hyper_train.csv"))
val_df = pd.read_csv(os.path.join(PROCESSED_DIR, "val.csv"))
hyper_val_df = pd.read_csv(os.path.join(PROCESSED_DIR, "hyper_val.csv"))
test_df = pd.read_csv(os.path.join(PROCESSED_DIR, "test.csv"))

In [5]:
x_hyper_train = hyper_train_df.drop('label', axis=1).values
y_hyper_train = hyper_train_df['label'].values

x_val = val_df.drop('label', axis=1).values
y_val = val_df['label'].values

x_hyper_val = hyper_val_df.drop('label', axis=1).values
y_hyper_val = hyper_val_df['label'].values

x_test = test_df.drop('label', axis=1).values
y_test = test_df['label'].values

In [6]:
model = tf.keras.models.Sequential([
    tf.keras.Input(shape=x_hyper_train.shape[1]),
    tf.keras.layers.Dense(2, activation=tf.nn.softmax),
])
model.load_weights(BASE_MODEL_SAVEPATH)

In [7]:
preds_val = model.predict(x_val)
preds_hyper_val = model.predict(x_hyper_val)
preds_test = model.predict(x_test)

In [8]:
preds_val_full = np.concatenate([preds_val, preds_hyper_val])
y_val_full = np.concatenate([y_val, y_hyper_val])

In [9]:
METRIC = 'G-mean'
CLASSES = 2

# for FW_val
NUM_ITERS = 100

In [10]:
pred_eta_model = {
    "val_full": preds_val_full,
    "val": preds_val,
    "hyper_val": preds_hyper_val,
    "test": preds_test
}

In [11]:
from utils.papers_piew import plugin

In [12]:
mval_val, mval_test, final_weights = plugin(METRIC, y_val_full, y_test, pred_eta_model, CLASSES)

100%|██████████| 192/192 [00:00<00:00, 1243.77it/s]


In [13]:
os.makedirs(RESULTS_DIR, exist_ok=True)
savepath = os.path.join(RESULTS_DIR, f"results_{DATASET}.csv")
saver = utils.record.Results_Recorder(savepath, DATASET)
saver.save(RANDOM_SEED, METRIC, "plugin", mval_val, mval_val, mval_test, None)
saver.close()

Results file exists, appending to it...


In [14]:
from utils.papers_fw_val import FW_val

In [15]:
mval_val_list, mval_hyper_val_list, mval_test_list, grad_norm_list = FW_val(
    NUM_ITERS,
    METRIC,
    y_val,
    y_hyper_val,
    y_test, 
    pred_eta_model,
    CLASSES,
    RANDOM_SEED
)

In [16]:
best_idx = np.argmax(mval_hyper_val_list)
mval_val = mval_val_list[best_idx]
mval_hyper_val = mval_hyper_val_list[best_idx]
mval_test = mval_test_list[best_idx]

In [17]:
mval_val, mval_hyper_val, mval_test

(0.7151104408240753, 0.6738252028830652, 0.6625389229257546)

In [18]:
os.makedirs(RESULTS_DIR, exist_ok=True)
savepath = os.path.join(RESULTS_DIR, f"results_{DATASET}.csv")
saver = utils.record.Results_Recorder(savepath, DATASET)
saver.save(RANDOM_SEED, METRIC, "fw_val", mval_val, mval_hyper_val, mval_test, None)
saver.close()

Results file exists, appending to it...
