In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import os
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn import metrics


BASE_DIR = '../../../'
import sys
sys.path.append(BASE_DIR)

# custom code
import utils.utils
CONFIG = utils.utils.load_config("../../config.json")
import utils.metrics

Using TensorFlow backend.


In [3]:
DATASET = os.path.basename(os.getcwd()) # name of folder this file is in
EVAL_GROUPS = CONFIG['experiment_configs'][DATASET]['eval_groups']
RANDOM_SEED = CONFIG['random_seed']
EPOCHS = CONFIG["experiment_configs"][DATASET]["epochs"]
BATCH_SIZE = CONFIG["experiment_configs"][DATASET]["batch_size"]

print(RANDOM_SEED)

PROCESSED_DIR = os.path.join(BASE_DIR, f'processed/{DATASET}/rs={RANDOM_SEED}')
MODELS_DIR = os.path.join(BASE_DIR, f'models/{DATASET}/rs={RANDOM_SEED}')

BASE_MODEL_SAVEPATH = utils.utils.get_savepath(MODELS_DIR, DATASET, ".h5", mt="base") # mt = model_type

if os.path.exists(BASE_MODEL_SAVEPATH):
    print(f"warning: model has been done for rs={RANDOM_SEED}")

15


In [4]:
train_df = pd.read_csv(os.path.join(PROCESSED_DIR, "train.csv"))
hyper_train_df = pd.read_csv(os.path.join(PROCESSED_DIR, "hyper_train.csv"))
val_df = pd.read_csv(os.path.join(PROCESSED_DIR, "val.csv"))
hyper_val_df = pd.read_csv(os.path.join(PROCESSED_DIR, "hyper_val.csv"))
test_df = pd.read_csv(os.path.join(PROCESSED_DIR, "test.csv"))

val_full_df = pd.concat([val_df, hyper_val_df])

In [5]:
x_train = train_df.drop([*EVAL_GROUPS, 'label'], axis=1).values
y_train = train_df['label'].values

x_hyper_train = hyper_train_df.drop([*EVAL_GROUPS, 'label'], axis=1).values
y_hyper_train = hyper_train_df['label'].values

x_val_full = val_full_df.drop([*EVAL_GROUPS, 'label'], axis=1).values
y_val_full = val_full_df['label'].values

x_test = test_df.drop([*EVAL_GROUPS, 'label'], axis=1).values
y_test = test_df['label'].values

In [6]:
y_train = tf.keras.utils.to_categorical(y_train)
y_hyper_train = tf.keras.utils.to_categorical(y_hyper_train)
y_val_full = tf.keras.utils.to_categorical(y_val_full)
y_test = tf.keras.utils.to_categorical(y_test)

In [7]:
model = tf.keras.models.Sequential([
    tf.keras.Input(shape=x_train.shape[1]),
    tf.keras.layers.Dense(2, activation=tf.nn.softmax),
])

In [8]:
model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

In [9]:
# Need to save the best model by validation loss
save_best = tf.keras.callbacks.ModelCheckpoint(
    filepath=BASE_MODEL_SAVEPATH,
    monitor="val_loss",
    mode='min',
    verbose=1,
    save_best_only=True,
    save_weights_only=True,
)

callbacks = [save_best]

In [10]:
model.fit(
    x_train,
    y_train,
    batch_size = BATCH_SIZE,
    epochs = EPOCHS,
    validation_data = (x_hyper_train, y_hyper_train),
    callbacks=callbacks,
)

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.41096, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 2/100
Epoch 00002: val_loss improved from 0.41096 to 0.37396, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 3/100
Epoch 00003: val_loss improved from 0.37396 to 0.35864, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 4/100
Epoch 00004: val_loss improved from 0.35864 to 0.34978, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 5/100
Epoch 00005: val_loss improved from 0.34978 to 0.34385, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 6/100
Epoch 00006: val_loss improved from 0.34385 to 0.33953, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 7/100
Epoch 00007: val_loss improved from 0.33953 to 0.33637, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 8/100
Epoch 00008: val_loss improved from 0.33637 to 

Epoch 25/100
Epoch 00025: val_loss improved from 0.31901 to 0.31887, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 26/100
Epoch 00026: val_loss improved from 0.31887 to 0.31831, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 27/100
Epoch 00027: val_loss improved from 0.31831 to 0.31805, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 28/100
Epoch 00028: val_loss improved from 0.31805 to 0.31790, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 29/100
Epoch 00029: val_loss improved from 0.31790 to 0.31749, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 30/100
Epoch 00030: val_loss improved from 0.31749 to 0.31729, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 31/100
Epoch 00031: val_loss improved from 0.31729 to 0.31706, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 32/100
Epoch 00032: val_loss improved from

Epoch 00049: val_loss improved from 0.31510 to 0.31508, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 50/100
Epoch 00050: val_loss improved from 0.31508 to 0.31500, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 51/100
Epoch 00051: val_loss improved from 0.31500 to 0.31476, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 52/100
Epoch 00052: val_loss improved from 0.31476 to 0.31473, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 53/100
Epoch 00053: val_loss improved from 0.31473 to 0.31466, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 54/100
Epoch 00054: val_loss improved from 0.31466 to 0.31463, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 55/100
Epoch 00055: val_loss improved from 0.31463 to 0.31459, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 56/100
Epoch 00056: val_loss improved from 0.31459 to 0

Epoch 74/100
Epoch 00074: val_loss did not improve from 0.31402
Epoch 75/100
Epoch 00075: val_loss improved from 0.31402 to 0.31392, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 76/100
Epoch 00076: val_loss did not improve from 0.31392
Epoch 77/100
Epoch 00077: val_loss did not improve from 0.31392
Epoch 78/100
Epoch 00078: val_loss did not improve from 0.31392
Epoch 79/100
Epoch 00079: val_loss improved from 0.31392 to 0.31387, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 80/100
Epoch 00080: val_loss did not improve from 0.31387
Epoch 81/100
Epoch 00081: val_loss improved from 0.31387 to 0.31386, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 82/100
Epoch 00082: val_loss improved from 0.31386 to 0.31385, saving model to ../../../models/adult_bb/rs=15/adult_bb_mt=base.h5
Epoch 83/100
Epoch 00083: val_loss did not improve from 0.31385
Epoch 84/100
Epoch 00084: val_loss improved from 0.31385 to 0.31383, sav

Epoch 00100: val_loss did not improve from 0.31369


<tensorflow.python.keras.callbacks.History at 0x7f1490554d90>