# Version IX - Ensemble Massive Data Augmentation (10 models, Color operations + Geometric operations Data Augmentation)

### Imports and initial setup

In [1]:
import import_ipynb

from InitialSetup import dataset, BATCH_SIZE, tf, prepare_callbacks, test_dataset_length, AUTOTUNE, train_models, load_weights_read, evaluate_models, get_labels_logits_and_preds, get_class_preds, get_class_from_sum_of_logits, np, BatchNormalization, LeakyReLU, process_hue, process_contrast, process_brightness, process_saturation, process_rotate, process_shear, process_translate, process_crop

NUM_MODELS = 10

importing Jupyter notebook from InitialSetup.ipynb
Num GPUs Available:  1
Total images in validatation dataset:  12630
(32, 32, 3) (43,)
Total images in dataset:  39209


### Prepare dataset

In [2]:
BATCH_SIZE = 64

dataSolo = dataset
# color ops
dataSolo = dataSolo.map(process_brightness)
dataSolo = dataSolo.concatenate(dataset.map(process_contrast))
dataSolo = dataSolo.concatenate(dataset.map(process_hue))
dataSolo = dataSolo.concatenate(dataset.map(process_saturation))

#geometry ops
dataSolo = dataSolo.concatenate(dataset.map(process_rotate))
dataSolo = dataSolo.concatenate(dataset.map(process_shear))
dataSolo = dataSolo.concatenate(dataset.map(process_translate))
dataSolo = dataSolo.concatenate(dataset.map(process_crop))

dataSolo_size = tf.data.experimental.cardinality(dataSolo).numpy()

dataSolo = dataSolo.cache()
dataSolo = dataSolo.shuffle(buffer_size = dataSolo_size)
dataSolo = dataSolo.batch(batch_size = BATCH_SIZE)
dataSolo = dataSolo.prefetch(buffer_size = AUTOTUNE)

train_size = int(0.8* dataSolo_size)
val_size = int(0.2* dataSolo_size)

train_dataset = dataSolo.take(train_size)
val_dataset = dataSolo.skip(train_size)

file_path_prefix = './Networks/ensembles/V2/V2'    

### Setup models and save folder

In [3]:
models_V2, histories_V2 = train_models(dataSolo, train_size/BATCH_SIZE, val_dataset, val_size/BATCH_SIZE, file_path_prefix, NUM_MODELS)

Epoch 1/20

Epoch 00001: val_accuracy improved from -inf to 0.95768, saving model to ./Networks/ensembles/V2/V2_07\cp.ckpt
Epoch 2/20

Epoch 00002: val_accuracy improved from 0.95768 to 0.98141, saving model to ./Networks/ensembles/V2/V2_07\cp.ckpt
Epoch 3/20

Epoch 00003: val_accuracy improved from 0.98141 to 0.99025, saving model to ./Networks/ensembles/V2/V2_07\cp.ckpt
Epoch 4/20

Epoch 00004: val_accuracy improved from 0.99025 to 0.99162, saving model to ./Networks/ensembles/V2/V2_07\cp.ckpt
Epoch 5/20

Epoch 00005: val_accuracy improved from 0.99162 to 0.99372, saving model to ./Networks/ensembles/V2/V2_07\cp.ckpt
Epoch 6/20

Epoch 00006: val_accuracy improved from 0.99372 to 0.99543, saving model to ./Networks/ensembles/V2/V2_07\cp.ckpt
Epoch 7/20

Epoch 00007: val_accuracy improved from 0.99543 to 0.99653, saving model to ./Networks/ensembles/V2/V2_07\cp.ckpt
Epoch 8/20

Epoch 00008: val_accuracy did not improve from 0.99653
Epoch 9/20

Epoch 00009: val_accuracy did not improve 

### Load Weights and evaluate models

In [3]:
models_V0 = load_weights_read(file_path_prefix, NUM_MODELS)
evaluate_models(models_V0, NUM_MODELS)

198/198 - 10s - loss: 0.0337 - accuracy: 0.9910
198/198 - 3s - loss: 0.0217 - accuracy: 0.9935
198/198 - 3s - loss: 0.0228 - accuracy: 0.9941
198/198 - 3s - loss: 0.0208 - accuracy: 0.9945
198/198 - 3s - loss: 0.0222 - accuracy: 0.9940
198/198 - 3s - loss: 0.0287 - accuracy: 0.9927
198/198 - 3s - loss: 0.0200 - accuracy: 0.9949
198/198 - 3s - loss: 0.0215 - accuracy: 0.9950
198/198 - 3s - loss: 0.0190 - accuracy: 0.9948
198/198 - 3s - loss: 0.0225 - accuracy: 0.9940
average accuracy: 99.384


In [4]:
labels_V0, logits_V0, preds_V0 = get_labels_logits_and_preds(models_V0, NUM_MODELS)
class_preds_V0 = get_class_preds(preds_V0, NUM_MODELS)
class_logits_V0 = get_class_from_sum_of_logits(logits_V0, NUM_MODELS)



### Build list of inputs and labels for test class

In [5]:
test_logits_preds = []

for i in range(test_dataset_length):
    
    aux = []   
    for m in range(NUM_MODELS):        
        aux.extend(logits_V0[m][i])
        
    test_logits_preds.append(aux)

### Compute logit predictions and training labels

In [6]:
dataSolo_len = len(dataSolo.take(-1))
j=0

logits_train = [[] for _ in range(NUM_MODELS)]
labels_aux = []
for images, labs in dataSolo.take(dataSolo_len):

    #Printing progress
    print('\r' + f'{j}/{dataSolo_len}',end="")
    j+=1
    
    labels_aux.extend(labs.numpy())
    for i in range(NUM_MODELS):
        
        logits_train[i].extend(models_V0[i][1].predict(images))
        
labels_train = [np.argmax(i) for i in labels_aux]  

4901/4902

### Build list of train inputs

In [7]:
train_logits_preds = []

for i in range(dataSolo_size):
    
    aux = []
    
    for m in range(NUM_MODELS):
        
        aux.extend(logits_train[m][i])
        
    train_logits_preds.append(aux)

In [8]:
print(len(labels_train), len(train_logits_preds), len(train_logits_preds[0]))

313672 313672 430


In [11]:

stack_model  = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(len(train_logits_preds[0]),)),

  tf.keras.layers.Dense(256),    
  BatchNormalization(),LeakyReLU(alpha=0.01),
  tf.keras.layers.Dropout(0.4),
  tf.keras.layers.Dense(128),    
  BatchNormalization(),LeakyReLU(alpha=0.01),
  tf.keras.layers.Dropout(0.4),
  tf.keras.layers.Dense(64),    
  BatchNormalization(),LeakyReLU(alpha=0.01),
  tf.keras.layers.Dropout(0.4),

  tf.keras.layers.Dense(43, activation='softmax')
])

In [12]:
stack_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy',metrics=['accuracy'])

file_pathV7 = './Networks/ensembles/V2/stacked_V2/StackedMassiveDataAug.ckpt'

callbacksV7 = prepare_callbacks(file_pathV7)

In [13]:
print(len(labels_train), len(train_logits_preds), len(logits_train[0]))

313672 313672 313672


In [14]:
stack_model.fit(np.asarray(train_logits_preds),np.asarray(labels_train),epochs=20, batch_size=BATCH_SIZE,
                        validation_data = (np.asarray(test_logits_preds), np.asarray(labels_V0)), callbacks = callbacksV7)

Epoch 1/20

Epoch 00001: val_accuracy improved from -inf to 0.99184, saving model to ./Networks/ensembles/V2/stacked_V2\StackedMassiveDataAug.ckpt
Epoch 2/20

Epoch 00002: val_accuracy improved from 0.99184 to 0.99462, saving model to ./Networks/ensembles/V2/stacked_V2\StackedMassiveDataAug.ckpt
Epoch 3/20

Epoch 00003: val_accuracy did not improve from 0.99462
Epoch 4/20

Epoch 00004: val_accuracy did not improve from 0.99462
Epoch 5/20

Epoch 00005: val_accuracy did not improve from 0.99462
Epoch 6/20

Epoch 00006: val_accuracy did not improve from 0.99462
Epoch 7/20

Epoch 00007: val_accuracy did not improve from 0.99462
Epoch 8/20

Epoch 00008: val_accuracy did not improve from 0.99462
Epoch 9/20

Epoch 00009: val_accuracy did not improve from 0.99462
Epoch 10/20

Epoch 00010: val_accuracy improved from 0.99462 to 0.99493, saving model to ./Networks/ensembles/V2/stacked_V2\StackedMassiveDataAug.ckpt
Epoch 11/20

Epoch 00011: val_accuracy did not improve from 0.99493
Epoch 12/20

Ep

<tensorflow.python.keras.callbacks.History at 0x25a951e2280>

In [15]:
stack_model.load_weights(file_pathV7)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x25a94ec5220>

In [16]:
pred = stack_model.predict(np.asarray(test_logits_preds))

correct = 0

for i in range(test_dataset_length):
    if np.argmax(pred[i]) == labels_V0[i] :
        correct += 1
        
print(correct, correct/test_dataset_length)

12566 0.9949326999208234
