In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import os
import numpy as np
import tensorflow as tf
import keras.backend as K
import matplotlib.pyplot as plt
from sklearn import model_selection
from tqdm.notebook import tqdm
import umap
import time

BASE_DIR = '../../../'
import sys
sys.path.append(BASE_DIR)

# custom code
import utils.utils
CONFIG = utils.utils.load_config("../../config.json")
import utils.papers
import utils.custom_tf

Using TensorFlow backend.


In [4]:
DATASET = os.path.basename(os.getcwd()) # name of folder this file is in
RANDOM_SEED = CONFIG['random_seed']
EPOCHS = CONFIG["experiment_configs"][DATASET]["epochs"]
BATCH_SIZE = CONFIG["experiment_configs"][DATASET]["batch_size"]
IMAGE_X_SIZE = CONFIG["experiment_configs"][DATASET]["image_x_size"]
IMAGE_Y_SIZE = CONFIG["experiment_configs"][DATASET]["image_y_size"]
IMAGE_SIZE = (IMAGE_Y_SIZE, IMAGE_X_SIZE)
VAL_FULL_SPLIT = CONFIG['experiment_configs'][DATASET]['val_full_split']
HYPER_VAL_SPLIT = CONFIG['experiment_configs'][DATASET]['hyper_val_split']

print(DATASET, RANDOM_SEED, VAL_FULL_SPLIT)

# folders for processed, models
DATA_F = os.path.join(BASE_DIR, f"data/{DATASET}/")
PROCESSED_DIR = os.path.join(BASE_DIR, f'processed/{DATASET}/rs={RANDOM_SEED}/vs={VAL_FULL_SPLIT}')
MODELS_DIR = os.path.join(BASE_DIR, f'models/{DATASET}/rs={RANDOM_SEED}/vs={VAL_FULL_SPLIT}')

BASE_MODEL_SAVEPATH = utils.utils.get_savepath(MODELS_DIR, DATASET, ".h5", mt="base") # mt = model_type

# base model saved here
if not os.path.exists(BASE_MODEL_SAVEPATH):
    print(f"warning: no model has been run for rs={RANDOM_SEED}")
    

adience_ablation 15 0.5


In [6]:
# create a val for training extra
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "val"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    seed = RANDOM_SEED,
)

# create a hyper_val for knowing when to stop
hyper_val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "hyper_val"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    seed = RANDOM_SEED,
)

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "test"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    seed = RANDOM_SEED,
)

Found 1992 files belonging to 2 classes.
Found 1992 files belonging to 2 classes.
Using 996 files for training.
Found 1992 files belonging to 2 classes.
Using 996 files for validation.
Found 1992 files belonging to 2 classes.


In [7]:
'''
This will standardize the pixel data
'''
def preprocess(imgs, labels):
    # turn from <0..255> to <0..1>
    imgs = imgs / 255.0
    means = np.array( [0.5, 0.5, 0.5] )
    stds = np.array( [0.5, 0.5, 0.5] )
    imgs = (imgs - means) / stds
    return imgs, labels

In [8]:
val_ds = val_ds.map(preprocess)
hyper_val_ds = hyper_val_ds.map(preprocess)
test_ds = test_ds.map(preprocess)

In [9]:
model = utils.utils.make_resnet(
    depth=2,
    random_state=RANDOM_SEED,
    input_shape=(*IMAGE_SIZE, 3),
    nc=2,
)

model.load_weights(BASE_MODEL_SAVEPATH)

In [10]:
# hyper val acc
preds, labels = utils.utils.compute_preds(
    model,
    hyper_val_ds,
)
(np.argmax(preds, axis=1) == labels).mean()

100%|██████████| 32/32 [00:08<00:00,  3.78it/s]


0.7811244979919679

In [11]:
# test acc
preds, labels = utils.utils.compute_preds(
    model,
    test_ds,
)
(np.argmax(preds, axis=1) == labels).mean()

100%|██████████| 63/63 [00:13<00:00,  4.69it/s]


0.7745983935742972

# Baseline 1: Fine Tune

In [12]:
model.load_weights(BASE_MODEL_SAVEPATH)

In [13]:
optimizer = tf.keras.optimizers.SGD(lr=5e-6, momentum=0.9)

In [14]:
model.compile(optimizer, loss='categorical_crossentropy', metrics = ['accuracy'])

In [15]:
FT_MODEL_SAVEPATH = utils.utils.get_savepath(MODELS_DIR, "adience_ablation", ".h5", mt="ft")

save_best = tf.keras.callbacks.ModelCheckpoint(
    filepath=FT_MODEL_SAVEPATH,
    monitor="val_loss",
    verbose=1,
    save_weights_only=True,
    save_best_only=True,
)


In [16]:
callbacks = [save_best]

In [17]:
model.fit(
        x=val_ds,
        epochs=EPOCHS,
        validation_data=hyper_val_ds,
        callbacks=callbacks,
)

Epoch 1/25
Epoch 00001: val_loss improved from inf to 0.60150, saving model to ../../../models/adience_ablation/rs=15/vs=0.5/adience_ablation_mt=ft.h5
Epoch 2/25
Epoch 00002: val_loss improved from 0.60150 to 0.59297, saving model to ../../../models/adience_ablation/rs=15/vs=0.5/adience_ablation_mt=ft.h5
Epoch 3/25
Epoch 00003: val_loss improved from 0.59297 to 0.58409, saving model to ../../../models/adience_ablation/rs=15/vs=0.5/adience_ablation_mt=ft.h5
Epoch 4/25
Epoch 00004: val_loss improved from 0.58409 to 0.58344, saving model to ../../../models/adience_ablation/rs=15/vs=0.5/adience_ablation_mt=ft.h5
Epoch 5/25
Epoch 00005: val_loss improved from 0.58344 to 0.57443, saving model to ../../../models/adience_ablation/rs=15/vs=0.5/adience_ablation_mt=ft.h5
Epoch 6/25
Epoch 00006: val_loss improved from 0.57443 to 0.57174, saving model to ../../../models/adience_ablation/rs=15/vs=0.5/adience_ablation_mt=ft.h5
Epoch 7/25
Epoch 00007: val_loss improved from 0.57174 to 0.56682, saving 

<tensorflow.python.keras.callbacks.History at 0x7fe718db5110>

In [18]:
model.load_weights(FT_MODEL_SAVEPATH)

In [19]:
# hyper val acc
preds, labels = utils.utils.compute_preds(
    model,
    hyper_val_ds,
)
(np.argmax(preds, axis=1) == labels).mean()

100%|██████████| 32/32 [00:07<00:00,  4.51it/s]


0.8162650602409639

In [20]:
# test acc
preds, labels = utils.utils.compute_preds(
    model,
    test_ds,
)
(np.argmax(preds, axis=1) == labels).mean()

100%|██████████| 63/63 [00:12<00:00,  4.92it/s]


0.8147590361445783