In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.gaussian_process.kernels import RBF
from sklearn import model_selection
from tqdm.notebook import tqdm
import umap
import time
import functools

BASE_DIR = '../../../'
import sys
sys.path.append(BASE_DIR)

# custom code
import utils.utils
CONFIG = utils.utils.load_config("../../config.json")

import utils.fweg

Using TensorFlow backend.


In [3]:
DATASET = os.path.basename(os.getcwd()) # name of folder this file is in
RANDOM_SEED = CONFIG['random_seed']
EPOCHS = CONFIG["experiment_configs"][DATASET]["epochs"]
BATCH_SIZE = CONFIG["experiment_configs"][DATASET]["batch_size"]
IMAGE_X_SIZE = CONFIG["experiment_configs"][DATASET]["image_x_size"]
IMAGE_Y_SIZE = CONFIG["experiment_configs"][DATASET]["image_y_size"]
IMAGE_SIZE = (IMAGE_Y_SIZE, IMAGE_X_SIZE)
VAL_FULL_SPLIT = CONFIG['experiment_configs'][DATASET]['val_full_split']
HYPER_VAL_SPLIT = CONFIG['experiment_configs'][DATASET]['hyper_val_split']

print(DATASET, RANDOM_SEED, VAL_FULL_SPLIT)

# folders for processed, models
DATA_F = os.path.join(BASE_DIR, f"data/{DATASET}/")
PROCESSED_DIR = os.path.join(BASE_DIR, f'processed/{DATASET}/rs={RANDOM_SEED}/vs={VAL_FULL_SPLIT}')
MODELS_DIR = os.path.join(BASE_DIR, f'models/{DATASET}/rs={RANDOM_SEED}/vs={VAL_FULL_SPLIT}')

BASE_MODEL_SAVEPATH = utils.utils.get_savepath(MODELS_DIR, DATASET, ".h5", mt="base") # mt = model_type
RESULTS_DIR = os.path.join(BASE_DIR, 'results')

# base model saved here
if not os.path.exists(BASE_MODEL_SAVEPATH):
    print(f"warning: no model has been run for rs={RANDOM_SEED}")
    

adience_ablation 55 0.5


In [4]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "train"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    shuffle = False,
)

hyper_train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "hyper_train"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    shuffle = False,
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "val"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    shuffle = False,
)

hyper_val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "hyper_val"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    shuffle = False,
)

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "test"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    shuffle = False,
)

Found 9795 files belonging to 2 classes.
Found 2449 files belonging to 2 classes.
Found 996 files belonging to 2 classes.
Found 996 files belonging to 2 classes.
Found 1992 files belonging to 2 classes.


In [5]:
'''
This will standardize the pixel data
'''
def preprocess(imgs, labels):
    # turn from <0..255> to <0..1>
    imgs = imgs / 255.0
    means = np.array( [0.5, 0.5, 0.5] )
    stds = np.array( [0.5, 0.5, 0.5] )
    imgs = (imgs - means) / stds
    return imgs, labels

In [6]:
# after loading the data, this will efficiently preprocess it in real-time
# this approach is 3x faster than `flow_from_directory`
train_ds = train_ds.map(preprocess)
hyper_train_ds = hyper_train_ds.map(preprocess)
val_ds = val_ds.map(preprocess)
hyper_val_ds = hyper_val_ds.map(preprocess)
test_ds = test_ds.map(preprocess)

train_full_ds = train_ds.concatenate(hyper_train_ds)
del train_ds, hyper_train_ds

In [7]:
val_df = utils.utils.load_sorted_df(PROCESSED_DIR, "val")

# map strings to ints
gender_dict = {'m':1, 'f':0}
age_dict = {'(60, 100)' : 7, '(48, 53)' : 6, '(38, 43)' : 5, 
            '(25, 32)' : 4, '(15, 20)' : 3, '(8, 12)' : 2, '(4, 6)' : 1, '(0, 2)' : 0}

val_df.replace({"age": age_dict}, inplace=True)
val_df.replace({"gender": gender_dict}, inplace=True)

age_val = val_df.age.values

In [8]:
X_TRAIN_FULL_EMB_SAVEPATH = utils.utils.get_savepath(PROCESSED_DIR, "x_train_full_emb", ".npy")
X_VAL_EMB_SAVEPATH = utils.utils.get_savepath(PROCESSED_DIR, "x_val_emb", ".npy")
X_HYPER_VAL_EMB_SAVEPATH = utils.utils.get_savepath(PROCESSED_DIR, "x_hyper_val_emb", ".npy")
X_TEST_EMB_SAVEPATH = utils.utils.get_savepath(PROCESSED_DIR, "x_test_emb", ".npy")

In [9]:
assert os.path.exists(X_TRAIN_FULL_EMB_SAVEPATH)

In [10]:
x_train_full_emb = np.load(
    X_TRAIN_FULL_EMB_SAVEPATH,
)

x_val_emb = np.load(
    X_VAL_EMB_SAVEPATH,
)

x_hyper_val_emb = np.load(
    X_HYPER_VAL_EMB_SAVEPATH,
)

x_test_emb = np.load(
    X_TEST_EMB_SAVEPATH,
)

In [11]:
model = utils.utils.make_resnet(
    depth=2,
    random_state=RANDOM_SEED,
    input_shape=(*IMAGE_SIZE, 3),
    nc=2,
)

model.load_weights(BASE_MODEL_SAVEPATH)

In [12]:
preds_train_full, y_train_full = utils.utils.compute_preds(
    model,
    train_full_ds,
)

preds_val, y_val = utils.utils.compute_preds(
    model,
    val_ds,
)

preds_hyper_val, y_hyper_val = utils.utils.compute_preds(
    model,
    hyper_val_ds,
)

preds_test, y_test = utils.utils.compute_preds(
    model,
    test_ds,
)

100%|██████████| 384/384 [01:27<00:00,  4.40it/s]
100%|██████████| 32/32 [00:07<00:00,  4.56it/s]
100%|██████████| 32/32 [00:06<00:00,  4.69it/s]
100%|██████████| 63/63 [00:13<00:00,  4.59it/s]


In [13]:
# RBF kernel
kernel = RBF(2)

# compute kernels with respect to x_val_emb
# ex: if x_train_emb has shape (50,10) and x_val_emb has shape (20,10)
# this will compute the kernel for each row in x_train_emb with respect to each row in x_val_emb
# output: (50, 20). Row x, column y = kernel computation between x_train_emb[x], x_val_emb[y]
kernel_train_full = kernel(x_train_full_emb, Y = x_val_emb) # x_train_emb
kernel_val = kernel(x_val_emb, Y = x_val_emb)
kernel_hyper_val = kernel(x_hyper_val_emb, Y = x_val_emb)
kernel_test = kernel(x_test_emb, Y = x_val_emb)

kernel_train_full = np.clip(kernel_train_full, 0.01, 0.99)
kernel_val = np.clip(kernel_val, 0.01, 0.99)
kernel_hyper_val = np.clip(kernel_hyper_val, 0.01, 0.99)
kernel_test = np.clip(kernel_test, 0.01, 0.99)


In [14]:
def get_basis_fns(
    groups,
    kernel_train,
    kernel_val,
    kernel_hyper_val,
    kernel_test,
    y_val,
    age_val,
    add_all,
    ):
    """
    Creates basis functions whose rows denote the membership value for a sample for that basis function.

    Args:
        groups: how many groups to make. If 0
        kernel_train: see kernel computation comment
        kernel_val: see kernel computation comment
        kernel_hyper_val: see kernel computation comment
        kernel_test: see kernel computation comment
        y_val: 1-D array of labels on the val set that will be used for training FWEG.
        age_val: 1-D array of age groups for the val set
        groups: number of RBF kernels to use (these are centered at validaiton
          points that are randomly selected). This is essentially how many groups to make.
        add_all: True iff there should be a group with all samples.

    Returns:
        basis_train: membership of each train sample to each group
        basis_val_train: membership of each val train sample to each group
        basis_val_test: membership of each val test sample to each group
        basis_test: membership of each test sample to each group
    """
    assert groups in [1, 2, 3, 6]
    if groups == 1:
        assert add_all is False
    np.random.seed(RANDOM_SEED)

    if groups == 1:
        basis_train = pd.DataFrame(np.ones(len(kernel_train)), columns=["All"])
        basis_val = pd.DataFrame(np.ones(len(kernel_val)), columns=["All"])
        basis_hyper_val = pd.DataFrame(np.ones(len(kernel_hyper_val)), columns=["All"])
        basis_test = pd.DataFrame(np.ones(len(kernel_test)), columns=["All"])
    else:
        # Computing RBF kernel matrix centred at each validation points
        basis_train = pd.DataFrame()
        basis_val = pd.DataFrame()
        basis_hyper_val = pd.DataFrame()
        basis_test = pd.DataFrame()
        
        if(groups == 2):
            # use gender as the basis
            for cc in range(CLASSES):
                # randomly choose val samples for the classes
                choices = np.where(y_val == cc)[0]
                basis_train[f"cc={cc}"] = kernel_train[:, choices].mean(axis=1)
                basis_val[f"cc={cc}"] = kernel_val[:, choices].mean(axis=1)
                basis_hyper_val[f"cc={cc}"] = kernel_hyper_val[:, choices].mean(axis=1)
                basis_test[f"cc={cc}"] = kernel_test[:, choices].mean(axis=1)
        elif(groups == 3):
            # use age as the basis
            # age groups range from 5 to 8 on val/test
            for ag in range(5, 8):
                # randomly choose val samples for the ages
                choices = np.where(age_val == ag)[0]
                basis_train[f"ag={ag}"] = kernel_train[:, choices].mean(axis=1)
                basis_val[f"ag={ag}"] = kernel_val[:, choices].mean(axis=1)
                basis_hyper_val[f"ag={ag}"] = kernel_hyper_val[:, choices].mean(axis=1)
                basis_test[f"ag={ag}"] = kernel_test[:, choices].mean(axis=1)
        else:
            # use age and gender as the basis
            # age groups range from 5 to 8 on val/test
            for ag in range(5, 8):
                for cc in range(CLASSES):
                    # randomly choose val samples for each (class, age) tuple
                    choices = np.where((y_val == cc) & (age_val == ag))[0]
                    basis_train[f"ag={ag}_cc={cc}"] = kernel_train[:, choices].mean(axis=1)
                    basis_val[f"ag={ag}_cc={cc}"] = kernel_val[:, choices].mean(axis=1)
                    basis_hyper_val[f"ag={ag}_cc={cc}"] = kernel_hyper_val[:, choices].mean(axis=1)
                    basis_test[f"ag={ag}_cc={cc}"] = kernel_test[:, choices].mean(axis=1)
    
    if add_all:
        basis_train['All'] = 1.0
        basis_val['All'] = 1.0
        basis_hyper_val['All'] = 1.0
        basis_test['All'] = 1.0
    
    return basis_train, basis_val, basis_hyper_val, basis_test


In [15]:
CLASSES = 2

# number of groups to make
NUM_GROUPS = 6

# have a group with all samples in it
ADD_ALL = True

In [16]:
# basis_train is just the kernel distance of all train samples
# to a chosen validation sample in some basis. for example, 
# if NUM_RBF = 6 then each (age, gender) tuple gets an associated randomly chosen valid sample
# that has that age and gender
# basis_train will be the kernel of each train sample to each basis
basis_train_full, basis_val, basis_hyper_val, basis_test = get_basis_fns(
    NUM_GROUPS,
    kernel_train_full,
    kernel_val,
    kernel_hyper_val,
    kernel_test,
    y_val,
    age_val,
    add_all = ADD_ALL,
)


In [19]:
# hyperparameters for fweg

# epsilon perturbation
EPSILON = 1e-4

METRIC = "Accuracy" # ['Accuracy', 'F-measure', 'G-mean']

USE_LINEAR_VAL_METRIC = True

NUM_ITERS = 50

In [20]:
fweg = utils.fweg.FWEG(
    METRIC,
    NUM_ITERS,
    EPSILON,
    CLASSES,
    USE_LINEAR_VAL_METRIC,
    RANDOM_SEED
)

val_train_list, grad_norm_list, cond_list = fweg.fit(
    preds_train_full,
    y_train_full,
    basis_train_full,
    preds_val,
    y_val,
    basis_val,
)

Initialization complete!


Val Accuracy: 0.556:   4%|▍         | 2/50 [00:00<00:04,  9.82it/s]

UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier


Val Accuracy: 0.503: 100%|██████████| 50/50 [00:03<00:00, 13.59it/s]


In [21]:
# apply to hyper val set
preds_hyper_val_list, mval_hyper_val_list = fweg.predict(
    preds_hyper_val,
    y_hyper_val,
    basis_hyper_val,
    deterministic=False,
)

best_idx = np.argmax(mval_hyper_val_list)
print(f"Hyper Val: {mval_hyper_val_list[best_idx]}")

Hyper Val: 0.714859437751004


In [22]:
# apply to test set
preds_test_list, mval_test_list = fweg.predict(
    preds_test,
    y_test,
    basis_test,
    deterministic=False,
)
print(f"Test: {mval_test_list[best_idx]}")

Test: 0.7073293172690762


Run below to try many hyperparams.

In [23]:
os.makedirs(RESULTS_DIR, exist_ok=True)
savepath = os.path.join(RESULTS_DIR, f"results_ablation_vs={VAL_FULL_SPLIT}.csv")
saver = utils.record.Results_Recorder(savepath, DATASET)

Results file exists, appending to it...


In [25]:
CLASSES = 2
NUM_ITERS = 50
METRIC = "Accuracy"

groups_list = [1, 2, 3, 6]
groups_descr_list = ["no groups", "gender groups", "age groups", "gender and age groups"]
add_all_list = [False, True]
epsilon_list = [0.0001, 0.001, 1.0]
use_linear_val_metric_list = [False, True]

# this fills in most of the arguments for our basis function creator
# it is missing the `groups` arg and `add_all`. FWEG_Hyperparameter_Search
# is given basis_fn_generator and will fill these in as it iterates over
# the hyperparameters.
basis_fn_generator = functools.partial(
    get_basis_fns,
    kernel_train = kernel_train_full,
    kernel_val = kernel_val,
    kernel_hyper_val = kernel_hyper_val,
    kernel_test = kernel_test,
    y_val = y_val,
    age_val = age_val,
)

In [26]:
fweg_hp_s = \
    utils.fweg.FWEG_Hyperparameter_Search(
    saver,
    CLASSES,
    NUM_ITERS,
    METRIC,
    basis_fn_generator,
    groups_list,
    groups_descr_list,
    add_all_list,
    epsilon_list,
    use_linear_val_metric_list,
    RANDOM_SEED,
)

(best_groups, best_add_all, best_epsilon, best_use_linear_val_metric) = fweg_hp_s.search(
        preds_train_full,
        y_train_full,
        preds_val,
        y_val,
        preds_hyper_val,
        y_hyper_val,
        preds_test,
        y_test,
)

best hyper val: 0.8353, test: 0.8193:  60%|██████    | 29/48 [00:09<00:09,  2.00it/s]

UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier


best hyper val: 0.8353, test: 0.8193:  85%|████████▌ | 41/48 [00:22<00:07,  1.14s/it]

UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier
UNEXPECTED - already had a trivial classifier


best hyper val: 0.8353, test: 0.8193: 100%|██████████| 48/48 [00:32<00:00,  1.46it/s]


In [27]:
saver.close()