In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import os
import time
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
import tqdm.notebook as notebook

import tensorflow as tf


BASE_DIR = '../../../'
import sys
sys.path.append(BASE_DIR)

# custom code
import utils.utils
CONFIG = utils.utils.load_config("../../config.json")
import utils.custom_tf

Using TensorFlow backend.


In [3]:
DATASET = os.path.basename(os.getcwd()) # name of folder this file is in
RANDOM_SEED = CONFIG['random_seed']
EPOCHS = CONFIG["experiment_configs"][DATASET]["epochs"]
BATCH_SIZE = CONFIG["experiment_configs"][DATASET]["batch_size"]
# imagenet needs this
IMAGE_SIZE = (224, 224)

print(DATASET, RANDOM_SEED)

# folders for processed, models
DATA_F = os.path.join(BASE_DIR, f"data/{DATASET}/")
PROCESSED_DIR = os.path.join(BASE_DIR, f'processed/{DATASET}/rs={RANDOM_SEED}')
MODELS_DIR = os.path.join(BASE_DIR, f'models/{DATASET}/rs={RANDOM_SEED}')


adience 45


In [4]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "train"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    shuffle = False,
)

hyper_train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "hyper_train"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    shuffle = False,
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "val"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    shuffle = False,
)

hyper_val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "hyper_val"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    shuffle = False,
)

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory=os.path.join(PROCESSED_DIR, "test"),
    batch_size=BATCH_SIZE,
    image_size=IMAGE_SIZE,
    label_mode='categorical',
    follow_links=True,
    shuffle = False,
)

Found 9795 files belonging to 2 classes.
Found 2449 files belonging to 2 classes.
Found 398 files belonging to 2 classes.
Found 399 files belonging to 2 classes.
Found 3187 files belonging to 2 classes.


In [5]:
'''
This will standardize the pixel data
'''
def preprocess(imgs, labels):
    # we are using a pretrained resnet for feature extraction, so using this is needed
    imgs = tf.keras.applications.resnet50.preprocess_input(imgs)
    return imgs, labels

In [6]:
# after loading the data, this will efficiently preprocess it in real-time
# this approach is 3x faster than `flow_from_directory`
train_ds = train_ds.map(preprocess)
hyper_train_ds = hyper_train_ds.map(preprocess)
val_ds = val_ds.map(preprocess)
hyper_val_ds = hyper_val_ds.map(preprocess)
test_ds = test_ds.map(preprocess)

In [7]:
resnet50 = tf.keras.applications.ResNet50(
    include_top=True,
    weights='imagenet',
)

In [8]:
resnet50.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
___________________________________________________________________________________________

In [9]:
model = tf.keras.Model(inputs=resnet50.input, outputs=resnet50.layers[-2].output)

In [10]:
def predict_and_norm(model, ds):
    preds = model.predict(ds, verbose=1)
    preds_norm = np.sqrt((preds * preds).sum(axis=1))
    preds = preds / preds_norm[:, np.newaxis]
    return preds

In [11]:
preds_train = predict_and_norm(model, train_ds)
preds_hyper_train = predict_and_norm(model, hyper_train_ds)
preds_val = predict_and_norm(model, val_ds)
preds_hyper_val = predict_and_norm(model, hyper_val_ds)
preds_test = predict_and_norm(model, test_ds)

In [12]:
TRAIN_IMGNET_PREDS = utils.utils.get_savepath(PROCESSED_DIR, "adience_imgnet_preds_train", ".npy")
np.save(
    file=TRAIN_IMGNET_PREDS,
    arr=preds_train,
)

HYPER_TRAIN_IMGNET_PREDS = utils.utils.get_savepath(PROCESSED_DIR, "adience_imgnet_preds_hyper_train", ".npy")
np.save(
    file=HYPER_TRAIN_IMGNET_PREDS,
    arr=preds_hyper_train,
)

VAL_IMGNET_PREDS = utils.utils.get_savepath(PROCESSED_DIR, "adience_imgnet_preds_val", ".npy")
np.save(
    file=VAL_IMGNET_PREDS,
    arr=preds_val,
)

HYPER_VAL_IMGNET_PREDS = utils.utils.get_savepath(PROCESSED_DIR, "adience_imgnet_preds_hyper_val", ".npy")
np.save(
    file=HYPER_VAL_IMGNET_PREDS,
    arr=preds_hyper_val,
)

TEST_IMGNET_PREDS = utils.utils.get_savepath(PROCESSED_DIR, "adience_imgnet_preds_test", ".npy")
np.save(
    file=TEST_IMGNET_PREDS,
    arr=preds_test,
)