# Multi Model Building (Batch 1)

In [1]:
FITTING_BATCH_NUM = 1

In [2]:
# Remove TF logging warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [3]:
import pandas as pd
import tensorflow as tf
import keras
from keras import layers
from model_helper_functions import create_fit_and_save_model, send_ifttt_notification, update_top_values
from data_pipeline import prep_data, get_train_val_test_data
from itertools import product
import re
import pathlib
import shutil

# Import correct optimizer
import platform
if platform.system() == "Darwin" and platform.processor() == "arm":
    from keras.optimizers.legacy import RMSprop, Adam
else:
    from keras.optimizers import RMSprop, Adam

In [4]:
# Remove TF logging warnings
tf.get_logger().setLevel('ERROR')

In [5]:
tf.random.set_seed(15)
keras.utils.set_random_seed(15)

In [6]:
data_dir = '../bears'
df = prep_data(data_dir)
train_df, val_df, test_df = get_train_val_test_data(df)

Training set: 70%, Validation set: 22.5%, Test set: 7.5%


In [7]:
image_count = len(list(pathlib.Path(data_dir).glob('*/*')))
print('Total image count:',image_count)
print('Image count equal to dataframe length?', image_count == len(df))

Total image count: 288
Image count equal to dataframe length? True


In [8]:
num_train_samples = len(train_df)
num_val_samples = len(val_df)
num_test_samples = len(test_df)
print('Number of training samples:',num_train_samples)
print('Number of validation samples:',num_val_samples)
print('Number of test samples:',num_test_samples)

Number of training samples: 201
Number of validation samples: 65
Number of test samples: 22


---

## Hyperparameter Grid

In [9]:
g_batch_size = [4, 8]
g_epochs = [20]
g_augmentation_params = [
    # either None or (flip, rotate_factor, zoom_factor, random_flip_str)
    None,
    (True, 0.25, 0.25, 'horizontal'),
]
g_cnn_params = [
    # cnn_units, cnn_filters, cnn_strides
    [(32,), [(3,3)], [(1,1)]],
    [(32, 64), [(5,5), (3,3)], [(2,2), (1,1)]],
]
g_dropout = [0, 0.5]
g_dense_units = [
    (32, 3),
    (64, 3),
]
g_activation = ['relu']
g_optimizer = [Adam]
g_earlystop_patience = [5, 10]
g_reducel_patience = [1]

In [10]:
all_combinations = list(product(g_batch_size, g_epochs, g_augmentation_params, g_cnn_params, g_dropout,
                                g_dense_units, g_activation, g_optimizer, g_earlystop_patience, g_reducel_patience))
print(f'There are {len(all_combinations)} parameter combinations to run.')

There are 64 parameter combinations to run.


In [11]:
# Keep track of model fitting in order to resume at a later time if needed.
progress_file = f'./model_checkpoints_{FITTING_BATCH_NUM}/model_building_progress.csv'
os.makedirs(f'./model_checkpoints_{FITTING_BATCH_NUM}/', exist_ok=True)
if os.path.exists(progress_file):
    print('Using existing progress file.')
    progress = pd.read_csv(progress_file)
else:
    print('Creating new progress file.')
    progress = pd.DataFrame.from_records(all_combinations, columns=['batch_size', 'epochs', 'augmentation_params', 'cnn_params',
                                                                    'dropout', 'dense_units', 'activation', 'optimizer',
                                                                    'earlystop_patience', 'reducel_patience'])
    progress.insert(0, 'model', range(len(all_combinations)))
    progress['finished'] = False
    class_names = '(Adam|RMSprop)'
    progress['optimizer'] = progress['optimizer'].apply(lambda x: re.sub(f'.*{class_names}.*','\\1',str(x)))
    
    progress.to_csv(progress_file, index=False)
    progress = pd.read_csv(progress_file)

Creating new progress file.


In [12]:
display(progress)

Unnamed: 0,model,batch_size,epochs,augmentation_params,cnn_params,dropout,dense_units,activation,optimizer,earlystop_patience,reducel_patience,finished
0,0,4,20,,"[(32,), [(3, 3)], [(1, 1)]]",0.0,"(32, 3)",relu,Adam,5,1,False
1,1,4,20,,"[(32,), [(3, 3)], [(1, 1)]]",0.0,"(32, 3)",relu,Adam,10,1,False
2,2,4,20,,"[(32,), [(3, 3)], [(1, 1)]]",0.0,"(64, 3)",relu,Adam,5,1,False
3,3,4,20,,"[(32,), [(3, 3)], [(1, 1)]]",0.0,"(64, 3)",relu,Adam,10,1,False
4,4,4,20,,"[(32,), [(3, 3)], [(1, 1)]]",0.5,"(32, 3)",relu,Adam,5,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...
59,59,8,20,"(True, 0.25, 0.25, 'horizontal')","[(32, 64), [(5, 5), (3, 3)], [(2, 2), (1, 1)]]",0.0,"(64, 3)",relu,Adam,10,1,False
60,60,8,20,"(True, 0.25, 0.25, 'horizontal')","[(32, 64), [(5, 5), (3, 3)], [(2, 2), (1, 1)]]",0.5,"(32, 3)",relu,Adam,5,1,False
61,61,8,20,"(True, 0.25, 0.25, 'horizontal')","[(32, 64), [(5, 5), (3, 3)], [(2, 2), (1, 1)]]",0.5,"(32, 3)",relu,Adam,10,1,False
62,62,8,20,"(True, 0.25, 0.25, 'horizontal')","[(32, 64), [(5, 5), (3, 3)], [(2, 2), (1, 1)]]",0.5,"(64, 3)",relu,Adam,5,1,False


---

## Fit Models

In [13]:
os.makedirs(f'./model_checkpoints_{FITTING_BATCH_NUM}/val_metrics', exist_ok=True)
os.makedirs(f'./model_checkpoints_{FITTING_BATCH_NUM}/model_histories', exist_ok=True)
top_val_acc = dict()
models = []
for i, params in enumerate(all_combinations):
    if progress.at[i, 'finished']:
        print(f'Model {i} has already been fitted.')
    else:
        try:
            val_metrics = create_fit_and_save_model(f'model{i}', train_df, val_df, test_df, params, FITTING_BATCH_NUM)
            progress.at[i, 'finished'] = True
            progress.to_csv(progress_file, index=False)
            print(f"""Model {i}: validation accuracy= {val_metrics['accuracy']:.4f}, ran epochs= {val_metrics['ran_epochs']}, best epoch= {val_metrics['best_epoch']}, time={val_metrics['total_time']/60:.2f}min.""")
            
            update_top_values(top_val_acc, val_metrics['accuracy'], i, FITTING_BATCH_NUM)
                
        except:
            print(f'ERROR fitting model {i}')

Model 0: validation accuracy= 0.7231, ran epochs= 18, best epoch= 13, time=1.34min.


Model 1: validation accuracy= 0.7231, ran epochs= 20, best epoch= 13, time=1.46min.


Model 2: validation accuracy= 0.8000, ran epochs= 16, best epoch= 11, time=1.38min.


Model 3: validation accuracy= 0.8000, ran epochs= 20, best epoch= 11, time=1.69min.


Model 4: validation accuracy= 0.6000, ran epochs= 12, best epoch= 7, time=0.87min.


Model 5: validation accuracy= 0.6000, ran epochs= 17, best epoch= 7, time=1.21min.


Model 6: validation accuracy= 0.6000, ran epochs= 8, best epoch= 3, time=0.70min.


Model 7: validation accuracy= 0.6000, ran epochs= 13, best epoch= 3, time=1.09min.


Model 8: validation accuracy= 0.6923, ran epochs= 20, best epoch= 17, time=0.38min.


Model 9: validation accuracy= 0.6923, ran epochs= 20, best epoch= 17, time=0.38min.


Model 10: validation accuracy= 0.6923, ran epochs= 18, best epoch= 13, time=0.35min.


Model 11: validation accuracy= 0.6923, ran epochs= 20, best epoch= 13, time=0.37min.


Model 12: validation accuracy= 0.3538, ran epochs= 6, best epoch= 1, time=0.11min.


Model 13: validation accuracy= 0.5538, ran epochs= 20, best epoch= 14, time=0.35min.


Model 14: validation accuracy= 0.3231, ran epochs= 6, best epoch= 1, time=0.11min.


Model 15: validation accuracy= 0.5231, ran epochs= 20, best epoch= 12, time=0.36min.


Model 16: validation accuracy= 0.7077, ran epochs= 16, best epoch= 11, time=1.43min.


Model 17: validation accuracy= 0.7385, ran epochs= 20, best epoch= 16, time=1.77min.


Model 18: validation accuracy= 0.7538, ran epochs= 20, best epoch= 17, time=2.04min.


Model 19: validation accuracy= 0.7538, ran epochs= 20, best epoch= 17, time=2.04min.


Model 20: validation accuracy= 0.5231, ran epochs= 6, best epoch= 1, time=0.53min.


Model 21: validation accuracy= 0.5231, ran epochs= 11, best epoch= 1, time=0.94min.


Model 22: validation accuracy= 0.4308, ran epochs= 6, best epoch= 1, time=0.59min.


Model 23: validation accuracy= 0.5231, ran epochs= 20, best epoch= 15, time=1.96min.


Model 24: validation accuracy= 0.3538, ran epochs= 6, best epoch= 1, time=0.22min.


Model 25: validation accuracy= 0.6000, ran epochs= 20, best epoch= 14, time=0.75min.


Model 26: validation accuracy= 0.3231, ran epochs= 6, best epoch= 1, time=0.22min.


Model 27: validation accuracy= 0.5385, ran epochs= 20, best epoch= 15, time=0.72min.


Model 28: validation accuracy= 0.3385, ran epochs= 7, best epoch= 2, time=0.27min.


Model 29: validation accuracy= 0.5385, ran epochs= 20, best epoch= 13, time=0.75min.


Model 30: validation accuracy= 0.3231, ran epochs= 6, best epoch= 1, time=0.23min.


Model 31: validation accuracy= 0.4154, ran epochs= 20, best epoch= 15, time=0.69min.


Model 32: validation accuracy= 0.5231, ran epochs= 6, best epoch= 1, time=0.30min.


Model 33: validation accuracy= 0.5231, ran epochs= 11, best epoch= 1, time=0.52min.


Model 34: validation accuracy= 0.3231, ran epochs= 6, best epoch= 1, time=0.34min.


Model 35: validation accuracy= 0.7846, ran epochs= 20, best epoch= 20, time=1.24min.


Model 36: validation accuracy= 0.5385, ran epochs= 6, best epoch= 1, time=0.31min.


Model 37: validation accuracy= 0.5385, ran epochs= 11, best epoch= 1, time=0.53min.


Model 38: validation accuracy= 0.4308, ran epochs= 6, best epoch= 1, time=0.34min.


Model 39: validation accuracy= 0.4308, ran epochs= 11, best epoch= 1, time=0.59min.


Model 40: validation accuracy= 0.3231, ran epochs= 8, best epoch= 3, time=0.13min.


Model 41: validation accuracy= 0.6615, ran epochs= 20, best epoch= 20, time=0.32min.


Model 42: validation accuracy= 0.3231, ran epochs= 6, best epoch= 1, time=0.09min.


Model 43: validation accuracy= 0.5538, ran epochs= 20, best epoch= 20, time=0.33min.


Model 44: validation accuracy= 0.3538, ran epochs= 6, best epoch= 1, time=0.10min.


Model 45: validation accuracy= 0.3538, ran epochs= 11, best epoch= 1, time=0.15min.


Model 46: validation accuracy= 0.3231, ran epochs= 6, best epoch= 1, time=0.10min.


Model 47: validation accuracy= 0.3231, ran epochs= 11, best epoch= 1, time=0.15min.


Model 48: validation accuracy= 0.3231, ran epochs= 6, best epoch= 1, time=0.38min.


Model 49: validation accuracy= 0.6000, ran epochs= 20, best epoch= 11, time=1.25min.


Model 50: validation accuracy= 0.4462, ran epochs= 6, best epoch= 1, time=0.41min.


Model 51: validation accuracy= 0.4462, ran epochs= 11, best epoch= 1, time=0.71min.


Model 52: validation accuracy= 0.5846, ran epochs= 8, best epoch= 3, time=0.54min.


Model 53: validation accuracy= 0.5846, ran epochs= 13, best epoch= 3, time=0.81min.


Model 54: validation accuracy= 0.4923, ran epochs= 6, best epoch= 1, time=0.42min.


Model 55: validation accuracy= 0.4923, ran epochs= 11, best epoch= 1, time=0.72min.


Model 56: validation accuracy= 0.3385, ran epochs= 6, best epoch= 1, time=0.18min.


Model 57: validation accuracy= 0.3385, ran epochs= 11, best epoch= 1, time=0.28min.


Model 58: validation accuracy= 0.3231, ran epochs= 6, best epoch= 1, time=0.17min.


Model 59: validation accuracy= 0.3231, ran epochs= 11, best epoch= 1, time=0.28min.


Model 60: validation accuracy= 0.3385, ran epochs= 6, best epoch= 1, time=0.18min.


Model 61: validation accuracy= 0.3385, ran epochs= 11, best epoch= 1, time=0.29min.


Model 62: validation accuracy= 0.3231, ran epochs= 7, best epoch= 2, time=0.22min.


Model 63: validation accuracy= 0.3385, ran epochs= 20, best epoch= 12, time=0.51min.


In [14]:
send_ifttt_notification('finished')

Notification sent


---