In [1]:
import tensorflow as tf
from model import *
import datetime
from utils import *
from preprocessing import *
import random
import glob
from os.path import join, getctime, basename
from load_data import load_data
from save_model import replace_model_if_better
from keras.models import load_model
from shutil import rmtree

Using TensorFlow backend.


In [2]:
data_dict = load_data('original')
aug_data = augment_ops(data_dict, trim=True, average=0, subsample=5, noise=True, maxpool=False)
total_X_test = aug_data['total_X_test']
total_y_test = aug_data['total_y_test']
total_X_train = aug_data['total_X_train']
total_y_train = aug_data['total_y_train']
total_X_val = aug_data['total_X_val']
total_y_val = aug_data['total_y_val']
save_path = join(get_save_path(), 'AvgPoolCNN')
new_path = join(save_path, 'best_val')
time = str(datetime.datetime.now()).replace(' ', '_')
workpath = join(save_path, time)
print(total_X_train.shape)
print(total_y_train.shape)
print(total_X_test.shape)
print(total_y_test.shape)
print(total_X_val.shape)
print(total_y_val.shape)

# Create result folders 
save_path = join(get_save_path(), 'AvgPoolCNN')
new_path = join(save_path, 'best_val')
time = str(datetime.datetime.now()).replace(' ', '_')
workpath = join(save_path, time)
ensure_dir(new_path)
ensure_dir(workpath)
ensure_dir(save_path)

# Load preprocessed data
aug_data = load_data_pickle(get_save_path())
total_X_test = aug_data['total_X_test']
total_y_test = aug_data['total_y_test']
total_X_train = aug_data['total_X_train']
total_y_train = aug_data['total_y_train']
total_X_val = aug_data['total_X_val']
total_y_val = aug_data['total_y_val']
# total_X_train = np.transpose(total_X_train, (0, 2, 1))
# total_X_val = np.transpose(total_X_val, (0, 2, 1))
# total_X_test = np.transpose(total_X_test, (0, 2, 1))
print(total_X_train.shape)
print(total_y_train.shape)
print(total_X_test.shape)
print(total_y_test.shape)
print(total_X_val.shape)
print(total_y_val.shape)

config = {
    # Network
    'num_inputs': total_X_train.shape[0],
    'input_shape': (total_X_train.shape[1],total_X_train.shape[2],1),
    'epochs': 50,
    'dropout': 0.5,
    'batch_size': 640,
    'l2': 0.05,
    'LSTM': True,
    'lr': 0.001
}

(8460, 22, 100)
(8460, 4)
(2215, 22, 100)
(2215, 4)
(2115, 22, 100)
(2115, 4)
Loading data pickle...
Data pickle loaded.
(11844, 22, 100)
(11844, 4)
(3101, 22, 100)
(3101, 4)
(2961, 22, 100)
(2961, 4)


In [3]:
AvgPoolCNN = AvgPoolCNN()
AvgPoolCNN.build_model(config)
AvgPoolCNN.train(total_X_train, total_y_train, total_X_val, total_y_val, config, workpath)

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 22, 100)           0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 22, 100, 1)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 22, 91, 48)        528       
_________________________________________________________________
batch_normalization_1 (Batch (None, 22, 91, 48)        192       
_________________________________________________________________
dropout_1 (Dropout)          (None, 22, 91, 48)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 1, 91, 40)         42280     
_________________________________________________________________
batch_normalization_2 (Batch (None, 1, 91, 40)         160 

Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.callbacks.History at 0x7fc0f4780208>

In [4]:
preds = [AvgPoolCNN.predict(total_X_test[i::total_X_test.shape[0]], 0) for i in range(total_X_test.shape[0])]
majority_pred = []
for pred in preds:
    arg_maxes = [np.argmax(p) for p in pred]
    votes = np.bincount(arg_maxes)
    out = np.ndarray(shape=(4,), buffer=np.zeros(4), dtype=int)
    out[votes.argmax()] = 1
    majority_pred.append(out)
result = [(a == b).all() for a, b in zip(majority_pred, total_y_test[0:total_X_test.shape[0]])]
raw = AvgPoolCNN.evaluate(total_X_test, total_y_test)
print("Raw Acc result: {}".format(raw[1]))
print("Majority Vote result: {}".format(np.mean(result)))
filepath = join(save_path, 'AvgPoolCNN.pickle')
replace_model_if_better(filepath, np.mean(result), AvgPoolCNN, config)

Raw Acc result: 0.6094807982444763
Majority Vote result: 0.6094808126410836
Old model exists. Comparing performance.
New model is worse than the old one. Will not update the old model


False

In [5]:
# Load the checkpointed model with highest val acc and perform majority voting
model_path = join(workpath, '*.hdf5')
list_of_files = glob.glob(model_path)
latest_file = max(list_of_files, key=getctime)
print(latest_file)
model_val = load_model(latest_file)
print(model_val)
preds = [model_val.predict(total_X_test[i::443], verbose=0) 
         for i in range(443)]
majority_pred = []
for pred in preds:
    arg_maxes = [np.argmax(p) for p in pred]
    votes = np.bincount(arg_maxes)
    out = np.ndarray(shape=(4,), buffer=np.zeros(4), dtype=int)
    out[votes.argmax()] = 1
    majority_pred.append(out)
result = [(a == b).all() for a, b in zip(majority_pred, total_y_test[0:443])]
raw = model_val.evaluate(total_X_test, total_y_test)
print("Raw Acc result: {}".format(raw[1]))
print("Majority Vote result: {}".format(np.mean(result)))
# filepath = join(save_path, 'best_val', 'AvgPoolCNN.pickle')
# replaced = replace_model_if_better(filepath, np.mean(result), model_val, config)
rmtree(workpath)

/home/alexhw/projects/ee247proj/save/AvgPoolCNN/2020-03-16_14:14:02.945234/AvgPoolCNN_best_val.hdf5
<keras.engine.training.Model object at 0x7fc0f40eb6a0>
Raw Acc result: 0.558851957321167
Majority Vote result: 0.5733634311512416
