In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import csv
import pickle
import os
import tensorflow as tf
import numpy as np
from sklearn import model_selection
from tqdm.notebook import tqdm

BASE_DIR = '../../../'
import sys
sys.path.append(BASE_DIR)

# custom code
import utils.utils
CONFIG = utils.utils.load_config("../../config.json")
import utils.papers
import utils.bbox
import utils.metrics

Using TensorFlow backend.


In [6]:
RANDOM_SEED = CONFIG['random_seed']
GROUPS = CONFIG['experiment_configs']['adult_bb']['groups']
EVAL_GROUPS = CONFIG['experiment_configs']['adult_bb']['eval_groups']
BATCH_SIZE = 32

print(RANDOM_SEED)
print(f"EVAL_GROUPS: {EVAL_GROUPS}")

PROCESSED_DIR = os.path.join(BASE_DIR, f'processed/adult_bb/rs={RANDOM_SEED}')
MODELS_DIR = os.path.join(BASE_DIR, f'models/adult_bb/rs={RANDOM_SEED}')

PROCESSED_SAVEPATH = utils.utils.get_savepath(PROCESSED_DIR, "adult_bb", ".pkl", g=GROUPS, eg=EVAL_GROUPS)
BASE_MODEL_SAVEPATH = utils.utils.get_savepath(MODELS_DIR, "adult_bb", ".h5", mt="base") # mt = model_type

RESULTS_DIR = os.path.join(BASE_DIR, 'results')

# models saved here
if not os.path.exists(BASE_MODEL_SAVEPATH):
    print(f"warning: model has been done for rs={RANDOM_SEED}")
    

55
EVAL_GROUPS: ['gender_Male', 'gender_Female']


In [7]:
os.makedirs(RESULTS_DIR, exist_ok=True)

In [8]:
dat = None
# load processed data
with open(PROCESSED_SAVEPATH, 'rb') as f:
    dat = pickle.load(f)
    
z_train = dat['z_train_full']
z_eval_train = dat['z_eval_train_full']
z_val = dat['z_val']
z_eval_val = dat['z_eval_val']
z_test = dat['z_test']
z_eval_test = dat['z_eval_test']

x_val = dat['x_val']
y_val = dat['y_val']

x_test = dat['x_test']
y_test = dat['y_test']

In [6]:
model_types = [
    "base",
    "ft",
    "fc",
    "lrw",
    "kmm",
    "jv"
]

metric_list = [
    'Accuracy',
    'G-mean',
]

In [7]:
columns = ["dataset", "subset", "seed", "model_type", "metric", "score"]

In [8]:
save_path = os.path.join(RESULTS_DIR, "results_baselines.csv")

writer = None
if os.path.exists(save_path):
    print("Results file exists, appending to it...")
    fp = open(save_path, mode='a')
    writer = csv.writer(fp)
else:
    print("Results file does not exist, creating it...")
    fp = open(save_path, mode='w')
    writer = csv.writer(fp)
    writer.writerow(columns)

Results file does not exist, creating it...


In [9]:
(basis_train, eval_train, basis_val, eval_val, basis_test, eval_test, grp_id_arr, eval_grp_id_arr) = \
    utils.bbox.get_basis_fns(
    GROUPS,
    EVAL_GROUPS, 
    z_train,
    z_eval_train,
    z_val,
    z_eval_val,
    z_test,
    z_eval_test,
)

Basis functions are  ['All']
Evaluation groups are  ['gender_Male', 'gender_Female']


In [10]:
# create model architecture
model = tf.keras.models.Sequential([
    tf.keras.Input(shape=x_val.shape[1]),
    tf.keras.layers.Dense(2, activation=tf.nn.softmax),
])

In [11]:
classes = 2
for mt in model_types:
    print(f"Model Type: {mt}")
    modelpath = utils.utils.get_savepath(MODELS_DIR, "adult", ".h5", mt=mt)
    model.load_weights(modelpath)
    preds_valid = utils.utils.compute_preds(
        model,
        x_val,
        batch_size=BATCH_SIZE,
    )
    preds_test = utils.utils.compute_preds(
        model,
        x_test,
        batch_size=BATCH_SIZE,
    )

    preds_v = np.argmax(preds_valid, axis=1)
    preds_t = np.argmax(preds_test, axis=1)
    
    pred_val_one_hot = np.zeros((preds_v.size, classes))
    pred_val_one_hot[np.arange(preds_v.size), preds_v] = 1
    
    pred_test_one_hot = np.zeros((preds_t.size, classes))
    pred_test_one_hot[np.arange(preds_t.size), preds_t] = 1

    _, conf_val, _ = utils.bbox.get_confs_frm_scr(
        y_val,
        basis_val,
        eval_val,
        pred_val_one_hot,
        classes,
    )
    _, conf_test, _ = utils.bbox.get_confs_frm_scr(
        y_test,
        basis_test,
        eval_test,
        pred_test_one_hot,
        classes,
    )
    
    for metric in metric_list:
        valid_score = utils.metrics.eval_metric_bb(conf_val, metric)
        test_score = utils.metrics.eval_metric_bb(conf_test, metric)
        
        print(f"METRIC: {metric}, val: {valid_score}, test: {test_score}")
                
        writer.writerow( [f"adult_bb_g={GROUPS}", "val", RANDOM_SEED, mt, metric, valid_score] )
        writer.writerow( [f"adult_bb_g={GROUPS}", "test", RANDOM_SEED, mt, metric, test_score] )
        

Model Type: base
METRIC: Accuracy, val: (0.8093922651933702, 'gender_Male'), test: (0.8219766728054021, 'gender_Male')
METRIC: G-mean, val: (0.7330878355969046, 'race_White'), test: (0.7331294391980918, 'race_White')
Model Type: lrw
METRIC: Accuracy, val: (0.8204419889502762, 'gender_Male'), test: (0.8207489257213014, 'gender_Male')
METRIC: G-mean, val: (0.751267101761173, 'race_White'), test: (0.735274486451107, 'race_White')
Model Type: kmm
METRIC: Accuracy, val: (0.8093922651933703, 'gender_Male'), test: (0.8220789850624105, 'gender_Male')
METRIC: G-mean, val: (0.7338176544655202, 'race_White'), test: (0.7342915429553992, 'race_White')


In [12]:
fp.close()