In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn.gaussian_process.kernels import RBF
from sklearn import metrics
from tqdm.notebook import tqdm
import umap
import time
import functools

BASE_DIR = '../../../'
import sys
sys.path.append(BASE_DIR)

# custom code
import utils.utils
CONFIG = utils.utils.load_config("../../config.json")
import utils.fweg
import utils.metrics
import utils.record

Using TensorFlow backend.


In [3]:
DATASET = os.path.basename(os.getcwd()) # name of folder this file is in
RANDOM_SEED = CONFIG['random_seed']
# type of noise
# asym: classes flip to a single other class
# sym: classes flip uniformly to any other class
TYPE = CONFIG["experiment_configs"][DATASET]["type"]
 # chance of flip
NOISE_P = CONFIG["experiment_configs"][DATASET]["noise_p"]
HYPER_VAL_SPLIT = CONFIG["experiment_configs"][DATASET]["hyper_val_split"]

EPOCHS = CONFIG["experiment_configs"][DATASET]["epochs"]
BATCH_SIZE = CONFIG["experiment_configs"][DATASET]["batch_size"]
IMAGE_X = CONFIG["experiment_configs"][DATASET]["image_x_size"]
IMAGE_Y = CONFIG["experiment_configs"][DATASET]["image_y_size"]
IMAGE_SIZE = (IMAGE_Y, IMAGE_X)

print(RANDOM_SEED, TYPE, NOISE_P)

# folders for processed, models
PROCESSED_DIR = os.path.join(BASE_DIR, f'processed/{DATASET}/rs={RANDOM_SEED}')
MODELS_DIR = os.path.join(BASE_DIR, f'models/{DATASET}/rs={RANDOM_SEED}')
RESULTS_DIR = os.path.join(BASE_DIR, 'results')

PROCESSED_SAVEPATH = utils.utils.get_savepath(PROCESSED_DIR, DATASET, ".npz", t=TYPE, np=NOISE_P)
BASE_MODEL_SAVEPATH = utils.utils.get_savepath(MODELS_DIR, DATASET, ".h5", mt="base", t=TYPE, np=NOISE_P)

if not os.path.exists(BASE_MODEL_SAVEPATH):
    print(f"warning: model has not been run for rs={RANDOM_SEED}_t={TYPE}_np={NOISE_P}")

15 asym 0.6


In [4]:
with open(PROCESSED_SAVEPATH, 'rb') as f:
    dat = np.load(f)
    
    x_train = dat['x_train']
    y_train = dat['y_train']
    
    x_hyper_train = dat['x_hyper_train']
    y_hyper_train = dat['y_hyper_train']

    x_val = dat['x_val']
    y_val = dat['y_val']
    
    x_hyper_val = dat['x_hyper_val']
    y_hyper_val = dat['y_hyper_val']

    x_test = dat['x_test']
    y_test = dat['y_test']

In [5]:
x_train_full = np.concatenate([x_train, x_hyper_train])
y_train_full = np.concatenate([y_train, y_hyper_train])

In [6]:
x_train_full_flat = x_train_full.reshape(len(x_train_full), -1)
x_val_flat = x_val.reshape(len(x_val), -1)
x_hyper_val_flat = x_hyper_val.reshape(len(x_hyper_val), -1)
x_test_flat = x_test.reshape(len(x_test), -1)

In [7]:
x_train_full_flat.shape, x_val_flat.shape, x_hyper_val_flat.shape, x_test_flat.shape

((49000, 3072), (500, 3072), (500, 3072), (10000, 3072))

In [8]:
X_TRAIN_FULL_EMB_SAVEPATH = utils.utils.get_savepath(PROCESSED_DIR, "x_train_full_emb_V2", ".npy", t=TYPE, np=NOISE_P)
X_VAL_EMB_SAVEPATH = utils.utils.get_savepath(PROCESSED_DIR, "x_val_emb_V2", ".npy", t=TYPE, np=NOISE_P)
X_HYPER_VAL_EMB_SAVEPATH = utils.utils.get_savepath(PROCESSED_DIR, "x_hyper_val_emb_V2", ".npy", t=TYPE, np=NOISE_P)
X_TEST_EMB_SAVEPATH = utils.utils.get_savepath(PROCESSED_DIR, "x_test_emb_V2", ".npy", t=TYPE, np=NOISE_P)

In [9]:
if os.path.exists(X_TRAIN_FULL_EMB_SAVEPATH):
    print('embedding already made, loading saved...')
    x_train_full_emb = np.load(
        X_TRAIN_FULL_EMB_SAVEPATH,
    )

    x_val_emb = np.load(
        X_VAL_EMB_SAVEPATH,
    )

    x_hyper_val_emb = np.load(
        X_HYPER_VAL_EMB_SAVEPATH,
    )
    
    x_test_emb = np.load(
        X_TEST_EMB_SAVEPATH,
    )
    
else:
    print("Making embeddings with UMAP")
    
    n_neighbors = 10
    dim = 10
    dim = 50

    start = time.time()

    umap_emb = umap.UMAP(
        n_neighbors=n_neighbors,
        min_dist=0.5, 
        n_components=dim,
        metric='euclidean',
        random_state = RANDOM_SEED,
    )

    umap_emb.fit(x_train_full_flat)
    
    x_train_full_emb = umap_emb.transform(x_train_full_flat)
    x_val_emb = umap_emb.transform(x_val_flat)
    x_hyper_val_emb = umap_emb.transform(x_hyper_val_flat)
    x_test_emb = umap_emb.transform(x_test_flat)
    
    end = time.time()
    
    # 220 seconds on our machine, a GCP n1-highmem-2 (2 vCPUs, 13 GB memory)
    print(f"Took {end - start} sec")

    # save these to avoid running above again
    np.save(
        X_TRAIN_FULL_EMB_SAVEPATH,
        x_train_full_emb,
    )

    np.save(
        X_VAL_EMB_SAVEPATH,
        x_val_emb,
    )
    
    np.save(
        X_HYPER_VAL_EMB_SAVEPATH,
        x_hyper_val_emb,
    )

    np.save(
        X_TEST_EMB_SAVEPATH,
        x_test_emb,
    )
    
    # delete references to reclaim memory
    del umap_emb, x_train_full_flat, x_val_flat, x_hyper_val_flat, x_test_flat

embedding already made, loading saved...


In [10]:
x_train_full_emb.shape, x_val_emb.shape, x_hyper_val_emb.shape, x_test_emb.shape

((49000, 50), (500, 50), (500, 50), (10000, 50))

In [11]:
# load base model
model = utils.utils.make_resnet(
    depth=2,
    random_state=RANDOM_SEED,
    input_shape=(*IMAGE_SIZE, 3),
    nc=10,
)
model.load_weights(BASE_MODEL_SAVEPATH)

In [12]:
preds_train_full = utils.utils.compute_preds(model, x_train_full, batch_size=BATCH_SIZE)
preds_val = utils.utils.compute_preds(model, x_val, batch_size=BATCH_SIZE)
preds_hyper_val = utils.utils.compute_preds(model, x_hyper_val, batch_size=BATCH_SIZE)
preds_test = utils.utils.compute_preds(model, x_test, batch_size=BATCH_SIZE)

In [13]:
# width of the RBF kernel functions
KER_WIDTH = 2
kernel = RBF(KER_WIDTH)

In [14]:
# compute kernels with respect to x_val_emb
kernel_train_full = kernel(x_train_full_emb, x_val_emb)
kernel_val = kernel(x_val_emb, x_val_emb)
kernel_hyper_val = kernel(x_hyper_val_emb, x_val_emb)
kernel_test = kernel(x_test_emb, x_val_emb)

kernel_train_full = np.clip(kernel_train_full, 0.01, 0.99)
kernel_val = np.clip(kernel_val, 0.01, 0.99)
kernel_hyper_val = np.clip(kernel_hyper_val, 0.01, 0.99)
kernel_test = np.clip(kernel_test, 0.01, 0.99)

In [15]:
def get_basis_fns(
    groups,
    y_val,
    kernel_train,
    kernel_val,
    kernel_hyper_val,
    kernel_test,
    add_all,
):
    """
    Creates a separate dataset whose columns are RBF's or GROUPS or both, and
    rows denote the membership value for a sample for that basis function.
    
    Args:
        groups: number of groups to make. Must be 1 or 10.
        y_val: the labels on the val set
        kernel_train: kernel distance between each train sample and each val sample
        kernel_val: kernel distance between each val sample and other val samples
        kernel_hyper_val: kernel distance between each hyper val sample and each val sample
        kernel_test: kernel distance between each test sample and each val train sample
        add_all: True if a group should be made that holds all samples 

    Returns:
        basis_train: membership of each train sample to each group
        basis_val_train: membership of each val train sample to each group
        basis_val_test: membership of each val test sample to each group
        basis_test: membership of each test sample to each group
    """
    assert groups in (1, 10)
    if groups == 1:
        assert add_all is False
    np.random.seed(RANDOM_SEED)
    
    if groups == 1:
        basis_train = pd.DataFrame(np.ones(len(kernel_train)), columns=["All"])
        basis_val = pd.DataFrame(np.ones(len(kernel_val)), columns=["All"])
        basis_hyper_val = pd.DataFrame(np.ones(len(kernel_hyper_val)), columns=["All"])
        basis_test = pd.DataFrame(np.ones(len(kernel_test)), columns=["All"])
    else:
        # Computing RBF kernel matrix centred at each validation points
        basis_train = pd.DataFrame()
        basis_val = pd.DataFrame()
        basis_hyper_val = pd.DataFrame()
        basis_test = pd.DataFrame()
        
        # 1 group per class
        chosen_classes = list(range(CLASSES))
        
        for cc in chosen_classes:
            choices = np.where(y_val == cc)[0]
            basis_train[f"cc={cc}"] = kernel_train[:, choices].mean(axis=1)
            basis_val[f"cc={cc}"] = kernel_val[:, choices].mean(axis=1)
            basis_hyper_val[f"cc={cc}"] = kernel_hyper_val[:, choices].mean(axis=1)
            basis_test[f"cc={cc}"] = kernel_test[:, choices].mean(axis=1)

    if add_all:
        basis_train['All'] = 1.0
        basis_val['All'] = 1.0
        basis_hyper_val['All'] = 1.0
        basis_test['All'] = 1.0
        
    return basis_train, basis_val, basis_hyper_val, basis_test

In [16]:
CLASSES = 10

# number of groups to make 
NUM_GROUPS = 10

ADD_ALL = True

In [17]:
basis_train_full, basis_val, basis_hyper_val, basis_test = get_basis_fns(
    NUM_GROUPS,
    y_val,
    kernel_train_full,
    kernel_val,
    kernel_hyper_val,
    kernel_test,
    add_all = ADD_ALL,
)

In [18]:
# hyperparameters for fweg. See FWEG's docstrings for an explanation.

EPSILON = 1e-1

METRIC = "Accuracy" # choose from ['Accuracy', 'F-measure', 'G-mean']

USE_LINEAR_VAL_METRIC = False

NUM_ITERS = 5

In [19]:
fweg = utils.fweg.FWEG(
    METRIC,
    NUM_ITERS,
    EPSILON,
    CLASSES,
    USE_LINEAR_VAL_METRIC,
    RANDOM_SEED,
)

val_train_list, grad_norm_list, cond_list = fweg.fit(
    preds_train_full,
    y_train_full,
    basis_train_full,
    preds_val,
    y_val,
    basis_val,
)

Initialization complete!


Val Accuracy: 0.816: 100%|██████████| 5/5 [00:26<00:00,  5.25s/it]


In [20]:
# apply to hyper val set
preds_hyper_val_list, mval_hyper_val_list = fweg.predict(
    preds_hyper_val,
    y_hyper_val,
    basis_hyper_val,
    deterministic=False,
)

best_idx = np.argmax(mval_hyper_val_list)
print(f"Hyper Val: {mval_hyper_val_list[best_idx]}")

Hyper Val: 0.7999999999999999


In [21]:
# apply to test set
preds_test_list, mval_test_list = fweg.predict(
    preds_test,
    y_test,
    basis_test,
    deterministic=False,
)
print(f"Test: {mval_test_list[best_idx]}")

Test: 0.8039000000000001


Run below to try many hyperparams.

In [22]:
os.makedirs(RESULTS_DIR, exist_ok=True)
savepath = os.path.join(RESULTS_DIR, f"results_{DATASET}.csv")
saver = utils.record.Results_Recorder(savepath, DATASET)

Results file exists, appending to it...


In [23]:
CLASSES = 10
NUM_ITERS = 5
METRIC = "Accuracy"

groups_list = [1, 10]
groups_descr_list = ["no groups", "group for each class"]
add_all_list = [False, True]
epsilon_list = [0.0001, 0.001, 0.1, 1.0]
# linearized metric for accuracy is the same as accuracy
use_linear_val_metric_list = [False]

# this fills in most of the arguments for our basis function creator
# it is missing the `groups` arg and `add_all`. FWEG_Hyperparameter_Search
# is given basis_fn_generator and will fill these in as it iterates over
# the hyperparameters.
basis_fn_generator = functools.partial(
    get_basis_fns,
    y_val = y_val,
    kernel_train = kernel_train_full,
    kernel_val = kernel_val,
    kernel_hyper_val = kernel_hyper_val,
    kernel_test = kernel_test,
)



In [24]:
fweg_hp_s = utils.fweg.FWEG_Hyperparameter_Search(
    saver,
    CLASSES,
    NUM_ITERS,
    METRIC,
    basis_fn_generator,
    groups_list,
    groups_descr_list,
    add_all_list,
    epsilon_list,
    use_linear_val_metric_list,
    RANDOM_SEED,
)

(best_groups, best_add_all, best_epsilon, best_use_linear_val_metric) = fweg_hp_s.search(
        preds_train_full,
        y_train_full,
        preds_val,
        y_val,
        preds_hyper_val,
        y_hyper_val,
        preds_test,
        y_test,
)

best hyper val: 0.8, test: 0.8039: 100%|██████████| 16/16 [03:18<00:00, 12.43s/it]  


In [25]:
# find the description for best_group
best_groups_descr = None
for groups, groups_descr in zip(groups_list, groups_descr_list):
    if groups == best_groups:
        best_groups_descr = groups_descr
        break
assert best_groups_descr is not None

In [26]:
# get basis assoicated with best parameters
basis_train_full, basis_val, basis_hyper_val, basis_test = basis_fn_generator(
    groups=best_groups,
    add_all=best_add_all,
)

In [27]:
# we do this because running 60 iterations would increase how long the hyperparameter search
# takes. more optimized code could do early stopping and paralellize the FWEG algorithm
# but for now this extra code is simplest.

# number of iterations for the best-found hyperparams
BEST_NUM_ITERS = 60

fweg = utils.fweg.FWEG(
    METRIC,
    BEST_NUM_ITERS,
    best_epsilon,
    CLASSES,
    best_use_linear_val_metric,
    RANDOM_SEED,
)
val_train_list, _, _ = fweg.fit(
        preds_train_full,
        y_train_full,
        basis_train_full,
        preds_val,
        y_val,
        basis_val,
        verbose=True,
)

# apply to hyper val set
preds_hyper_val_list, mval_hyper_val_list = fweg.predict(
    preds_hyper_val,
    y_hyper_val,
    basis_hyper_val,
    deterministic=False,
)

best_idx = np.argmax(mval_hyper_val_list)
val_score = val_train_list[best_idx]
hyper_val_score = mval_hyper_val_list[best_idx]

# apply to test set
preds_test_list, mval_test_list = fweg.predict(
    preds_test,
    y_test,
    basis_test,
    deterministic=False,
)
test_score = mval_test_list[best_idx]

fweg_params = utils.record.format_fweg_extra(
    BEST_NUM_ITERS,
    best_groups,
    best_groups_descr,
    best_add_all,
    best_epsilon,
    best_use_linear_val_metric,
)
print(f"best params: {fweg_params}. hyper val: {hyper_val_score}, test: {test_score}")

# prefix fweg_params with BEST: for clear parsing
saver.save(
    RANDOM_SEED,
    METRIC,
    "fweg",
    val_score,
    hyper_val_score,
    test_score,
    "BEST:" + fweg_params,
)

Initialization complete!


Val Accuracy: 0.816: 100%|██████████| 60/60 [05:14<00:00,  5.25s/it]


best params: {"groups": 10, "groups_descr": "group for each class", "add_all": true, "epsilon": 0.1, "FW_val_flag": false}. hyper val: 0.7999999999999999, test: 0.8039000000000001


In [28]:
saver.close()