In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import os
import tensorflow as tf
import numpy as np
import pandas as pd

BASE_DIR = '../../../'
import sys
sys.path.append(BASE_DIR)

# custom code
import utils.utils
CONFIG = utils.utils.load_config("../../config.json")

Using TensorFlow backend.


In [3]:
DATASET = os.path.basename(os.getcwd()) # name of folder this file is in
RANDOM_SEED = CONFIG['random_seed']
EPOCHS = CONFIG["experiment_configs"][DATASET]["epochs"]
BATCH_SIZE = CONFIG["experiment_configs"][DATASET]["batch_size"]

print(RANDOM_SEED)

PROCESSED_DIR = os.path.join(BASE_DIR, f'processed/{DATASET}/rs={RANDOM_SEED}')
MODELS_DIR = os.path.join(BASE_DIR, f'models/{DATASET}/rs={RANDOM_SEED}')

BASE_MODEL_SAVEPATH = utils.utils.get_savepath(MODELS_DIR, DATASET, ".h5", mt="base") # mt = model_type

if os.path.exists(BASE_MODEL_SAVEPATH):
    print(f"warning: model has been done for rs={RANDOM_SEED}")

55


In [4]:
train_df = pd.read_csv(os.path.join(PROCESSED_DIR, "train.csv"))
hyper_train_df = pd.read_csv(os.path.join(PROCESSED_DIR, "hyper_train.csv"))
val_df = pd.read_csv(os.path.join(PROCESSED_DIR, "val.csv"))
hyper_val_df = pd.read_csv(os.path.join(PROCESSED_DIR, "hyper_val.csv"))
test_df = pd.read_csv(os.path.join(PROCESSED_DIR, "test.csv"))

val_full_df = pd.concat([val_df, hyper_val_df])

In [5]:
x_train = train_df.drop('label', axis=1).values
y_train = train_df['label'].values

x_hyper_train = hyper_train_df.drop('label', axis=1).values
y_hyper_train = hyper_train_df['label'].values

x_val_full = val_full_df.drop('label', axis=1).values
y_val_full = val_full_df['label'].values

x_test = test_df.drop('label', axis=1).values
y_test = test_df['label'].values

In [6]:
y_train = tf.keras.utils.to_categorical(y_train)
y_hyper_train = tf.keras.utils.to_categorical(y_hyper_train)
y_val_full = tf.keras.utils.to_categorical(y_val_full)
y_test = tf.keras.utils.to_categorical(y_test)

In [7]:
model = tf.keras.models.Sequential([
    tf.keras.Input(shape=x_train.shape[1]),
    tf.keras.layers.Dense(2, activation=tf.nn.softmax),
])

In [8]:
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

In [9]:
# Need to save the best model by validation loss
save_best = tf.keras.callbacks.ModelCheckpoint(
    filepath=BASE_MODEL_SAVEPATH,
    monitor="val_loss",
    mode='min',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
)

callbacks = [save_best]

In [10]:
model.fit(
    x_train,
    y_train,
    batch_size = BATCH_SIZE,
    epochs = EPOCHS,
    validation_data = (x_hyper_train, y_hyper_train),
    callbacks=callbacks,
)

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.55548, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 2/100
Epoch 00002: val_loss improved from 0.55548 to 0.52252, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 3/100
Epoch 00003: val_loss improved from 0.52252 to 0.51156, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 4/100
Epoch 00004: val_loss improved from 0.51156 to 0.50599, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 5/100
Epoch 00005: val_loss improved from 0.50599 to 0.50321, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 6/100
Epoch 00006: val_loss improved from 0.50321 to 0.50096, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 7/100
Epoch 00007: val_loss improved from 0.50096 to 0.50023, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 8/100
Epoch 00008: val_loss improved from 0.50023 to 0.49910, saving model to ../../../models/a

Epoch 00025: val_loss did not improve from 0.49493
Epoch 26/100
Epoch 00026: val_loss improved from 0.49493 to 0.49473, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 27/100
Epoch 00027: val_loss improved from 0.49473 to 0.49439, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 28/100
Epoch 00028: val_loss did not improve from 0.49439
Epoch 29/100
Epoch 00029: val_loss improved from 0.49439 to 0.49431, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 30/100
Epoch 00030: val_loss improved from 0.49431 to 0.49426, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 31/100
Epoch 00031: val_loss improved from 0.49426 to 0.49420, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 32/100
Epoch 00032: val_loss did not improve from 0.49420
Epoch 33/100
Epoch 00033: val_loss improved from 0.49420 to 0.49418, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 34/100
Epoch 00034: val_loss did no

Epoch 52/100
Epoch 00052: val_loss did not improve from 0.49371
Epoch 53/100
Epoch 00053: val_loss improved from 0.49371 to 0.49363, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 54/100
Epoch 00054: val_loss did not improve from 0.49363
Epoch 55/100
Epoch 00055: val_loss did not improve from 0.49363
Epoch 56/100
Epoch 00056: val_loss improved from 0.49363 to 0.49355, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 57/100
Epoch 00057: val_loss improved from 0.49355 to 0.49354, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 58/100
Epoch 00058: val_loss improved from 0.49354 to 0.49352, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 59/100
Epoch 00059: val_loss did not improve from 0.49352
Epoch 60/100
Epoch 00060: val_loss improved from 0.49352 to 0.49351, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 61/100
Epoch 00061: val_loss did not improve from 0.49351
Epoch 62/100
Epoch 00062: val_

Epoch 80/100
Epoch 00080: val_loss did not improve from 0.49331
Epoch 81/100
Epoch 00081: val_loss did not improve from 0.49331
Epoch 82/100
Epoch 00082: val_loss improved from 0.49331 to 0.49329, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 83/100
Epoch 00083: val_loss did not improve from 0.49329
Epoch 84/100
Epoch 00084: val_loss improved from 0.49329 to 0.49324, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 85/100
Epoch 00085: val_loss did not improve from 0.49324
Epoch 86/100
Epoch 00086: val_loss improved from 0.49324 to 0.49317, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 87/100
Epoch 00087: val_loss did not improve from 0.49317
Epoch 88/100
Epoch 00088: val_loss improved from 0.49317 to 0.49313, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 89/100
Epoch 00089: val_loss improved from 0.49313 to 0.49311, saving model to ../../../models/adult/rs=55/adult_mt=base.h5
Epoch 90/100
Epoch 00090: val_

<tensorflow.python.keras.callbacks.History at 0x7fa813b55650>

In [11]:
# see the best saved
model.load_weights(BASE_MODEL_SAVEPATH)

In [12]:
# train acc
preds_train = utils.utils.compute_preds(
    model,
    x_train,
    batch_size=BATCH_SIZE,
)
(np.argmax(preds_train, axis=1) == np.argwhere(y_train)[:,1]).mean()

0.7602108640843457

In [13]:
# hyper train acc
preds_hyper_train = utils.utils.compute_preds(
    model,
    x_hyper_train,
    batch_size=BATCH_SIZE,
)
(np.argmax(preds_hyper_train, axis=1) == np.argwhere(y_hyper_train)[:,1]).mean()

0.758607854085241

In [14]:
# val acc
preds_val_full = utils.utils.compute_preds(
    model,
    x_val_full,
    batch_size=BATCH_SIZE,
)
(np.argmax(preds_val_full, axis=1) == np.argwhere(y_val_full)[:,1]).mean()

0.6319018404907976

In [15]:
# test acc
preds_test = utils.utils.compute_preds(
    model,
    x_test,
    batch_size=BATCH_SIZE,
)
(np.argmax(preds_test, axis=1) == np.argwhere(y_test)[:,1]).mean()

0.6192455639657526