## Training the AI Model

This is the Jupyter Notebook to train the AI model. It is done via .ipynb, so Google Colab, especially their GPUs can be utilized. To do so click the following:

Runtime --> Change runtime type --> *select GPU*

Throughout the code, the developer can set different ways in how the AI model should be trained. For example, the models hyperparameters can be changed by declaring variabled or the developer can choose wether to tran a VGG model or a custom CNN by (un)commenting lines of code. 

### Mount Drive and Imports

In [1]:
# For Execution in Google Colab
# mount drive and set directory
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# navigate to your working FOLDER 
#FOLDER = '/content/drive/MyDrive/GDrive_Hestia/data/'
#%cd $FOLDER

PATH = '/content/drive/MyDrive/GDrive_Hestia/data/'

Mounted at /content/drive


In [4]:
# For Execution in local Jupyter Notebook

# PATH = ""

In [2]:
# CNN architecture
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout
from keras.layers.convolutional import *
from keras.applications.vgg16 import VGG16
from keras.models import Model

# prediction
from tensorflow.keras.models import load_model
from keras.preprocessing import image

# image pre-processing
from keras.preprocessing.image import ImageDataGenerator

# visualization
# from keras.callbacks import TensorBoard 
# from keras.callbacks import Callback # default callback -> test scores
# from keras.callbacks import ModelCheckpoint # checkpointing model
from keras.callbacks import *

# optimizers
from keras.optimizers import SGD
from keras.optimizers import RMSprop
from keras.optimizers import Adam

# system and standard
import sys
from PIL import Image
sys.modules['Image'] = Image 
from datetime import datetime
import numpy as np
import os

### Program parameter settings

In [3]:
# define data directories
data_src = 'AIModel_data2' # directory with train, dev, test set
train_data_dir = f'{PATH}{data_src}/train' # contains two classes: no_tackle and tackle - one subdirectory per class
dev_data_dir = f'{PATH}{data_src}/dev'
test_data_dir = f'{PATH}{data_src}/test'

# define name of nb aka name of classifier model
nbname = 'CNN_ep5'

# define batch sizes
train_batch_size = 50 # 50  
dev_batch_size = 17 # 17 
test_batch_size = 15 # 15 

# define image size, ideally what its actual site is
# for other CNN architectures (i.e. Transfer Learning) this can deviate
img_width, img_height = 1152, 648
# VGG 16 input shapes
#img_width, img_height = 244, 244

# define number of training epochs
nb_epoch = 5

# define model parameters
learning_rate = 0.001  # other optimizer param are set to default

# define callbacks
#tensorboard = TensorBoard(log_dir=f'TB_logs/{nbname}')

### Data input and data augmentation

In [4]:
# define datagenerators
train_datagen = ImageDataGenerator(
        rescale=1./255)
        
dev_datagen = ImageDataGenerator(rescale=1./255)

test_datagen = ImageDataGenerator(rescale=1./255)



# flow from direcory: Takes the path to a directory & generates batches of augmented data.
# train-batches
train_generator = train_datagen.flow_from_directory(
    directory=train_data_dir,
    target_size=(img_width, img_height),  # the dimensions to which all images found will be resized
    color_mode='rgb',
    classes=['no_tackle', 'tackle'],  # per default alphanumeric order; here specified for dynamic reasons
    class_mode='binary',  # 1D binary labels: 1.0 and 0.0 needed for binary_crossentropy loss; "categorical" will be 2D one-hot encoded labels: [1. 0.] [0. 1.]
    batch_size=train_batch_size,
    shuffle=True, 
)

# valid-batches
dev_generator = dev_datagen.flow_from_directory(
    directory=dev_data_dir,
    target_size=(img_width, img_height),
    color_mode='rgb',
    classes=['no_tackle', 'tackle'],
    class_mode='binary',
    batch_size=dev_batch_size,
    shuffle=True,
)

# test-batches
test_generator = test_datagen.flow_from_directory(
    directory=test_data_dir,
    target_size=(img_width, img_height),
    color_mode='rgb',
    classes=['no_tackle', 'tackle'],
    class_mode='binary',
    batch_size=test_batch_size,
    shuffle=True,
)

Found 400 images belonging to 2 classes.
Found 68 images belonging to 2 classes.
Found 62 images belonging to 2 classes.


### CNN model architecture

##### Sequential model: standard

In [5]:
# keras sequential model
model = Sequential([
    Conv2D(filters=16, kernel_size = (7, 7), strides=(1, 1), padding='same', input_shape=(img_width, img_height, 3)),  # RBG images
    Activation('relu'),
    Conv2D(filters=16, kernel_size = (7, 7), strides=(1, 1), padding='same'),
    Activation('relu'),
    MaxPooling2D(pool_size=(6, 6), strides=(2,2), padding='same'),
    
    Conv2D(filters=16, kernel_size = (7, 7), strides=(1, 1), padding='same'),
    Activation('relu'),
    Conv2D(filters=16, kernel_size = (7, 7), strides=(1, 1), padding='same'),
    Activation('relu'),
    MaxPooling2D(pool_size=(6, 6), strides=(2,2), padding='same'),
    
    Conv2D(filters=32, kernel_size = (5, 5), strides=(1, 1), padding='same'),
    Activation('relu'),
    Conv2D(filters=32, kernel_size = (5, 5), strides=(1, 1), padding='same'),
    Activation('relu'),
    MaxPooling2D(pool_size=(4, 4), strides=(2,2), padding='same'),
    
    Conv2D(filters=32, kernel_size = (5, 5), strides=(1, 1), padding='same'),
    Activation('relu'),
    Conv2D(filters=32, kernel_size = (5, 5), strides=(1, 1), padding='same'),
    Activation('relu'),
    MaxPooling2D(pool_size=(4, 4), strides=(2,2), padding='same'),
    
    Conv2D(filters=64, kernel_size = (3, 3), strides=(1, 1), padding='same'),
    Activation('relu'),
    Conv2D(filters=64, kernel_size = (3, 3), strides=(1, 1), padding='same'),
    Activation('relu'),
    MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding='same'),
    
    Conv2D(filters=64, kernel_size = (3, 3), strides=(1, 1), padding='same'),
    Activation('relu'),
    Conv2D(filters=64, kernel_size = (3, 3), strides=(1, 1), padding='same'),
    Activation('relu'),
    MaxPooling2D(pool_size=(2, 2), strides=(2,2), padding='same'),
    
    Flatten(),
    Dense(units=64),
    Activation('relu'),
    
    Dropout(rate=0.5),
    
    Dense(units=1),
    
    Activation('sigmoid')  # binary clasification
])

##### VGG 16

In [None]:
# keras sequential model: VGG 16.

# create the base model
base_model = VGG16(include_top=False,
                   weights='imagenet',
                   input_shape=(img_width, img_height, 3)
                  )

x = base_model.output # output of original VGG16 model without top

# add FC layers
x = Flatten(input_shape=(None, None, 512))(x) # takes output of base_model and flattens it to 1D tensor
x = Dense(units=128, activation='relu')(x)
x = Dense(units=64, activation='relu')(x)

x = Dropout(0.5)(x) # Dropout of 0.5 is applied to original VGG16 architecture

predictions = Dense(units=1, activation='sigmoid')(x)


# final model instance used for training, combine base_model and newly subsituted top layers
model = Model(inputs=base_model.input, outputs=predictions)

# set how much of the network should be retrained
changing_point = 11 # values: 4, 7, 11, 15, 19 and 0; exact layer can be entered as Keras starts with layer 0
for layer in model.layers[:changing_point]:
   layer.trainable = False
for layer in model.layers[changing_point:]:
   layer.trainable = True

In [6]:
# compilation: configuration of learning process
model.compile(optimizer=Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False),  # best practice hparam from authors of ADAM paper
              loss='binary_crossentropy',  # most useful loss for binary classification
              metrics=['accuracy']  # judge perfomance of model, results not used for training of model
             ) 

### Train AI Model

In [None]:
# to use if keras model should not save; comment in respective callbacks in model.fit
# source: https://stackoverflow.com/questions/63074971/keras-model-save-isnt-saving

# filepath= "/content/drive/MyDrive/GDrive_Hestia/data/models/epochs:{epoch:03d}-val_acc:{val_accuracy}.hdf5"
# checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
# callbacks_list = [checkpoint]

In [7]:
start = datetime.now()  # get current time

# fits the model on data generated batch by batch by ImageDataGenerator
model.fit(train_generator, # train data
                    steps_per_epoch=len(train_generator), # (number of images / batch-size) = len(train_generator) 
                    epochs=nb_epoch, # epoch is iteration over entire data provided
                    verbose=1, # how much output seen on console: 0 = silent, 1 = progress bar, 2 = one line per epoch
                    #callbacks = [tensorboard], # callbacks return information from a training algorithm while training is taking place
                    validation_data=dev_generator, # evaluate loss and other model metrics at the end of each epoch
                    validation_steps=len(dev_generator) # (number of images / batch-size) = len(dev_generator)
                    #callbacks=callbacks_list
                    #validation_freq = None
                   )

end = datetime.now() 
 
print(f"Training execution time: {(end-start).seconds},{int((end-start).microseconds/100)} seconds")

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training execution time: 264,6110 seconds


### Evaluation

In [8]:
# evaluate on test set
testeval = model.evaluate(test_generator, # test data
                          steps = len(test_generator)
                                )
print (f"\nTest Loss: {testeval[0]}")
print (f"Test Accuracy: {testeval[1]}")


Test Loss: 0.0273622814565897
Test Accuracy: 1.0


In [9]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 1152, 648, 16)     2368      
_________________________________________________________________
activation (Activation)      (None, 1152, 648, 16)     0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 1152, 648, 16)     12560     
_________________________________________________________________
activation_1 (Activation)    (None, 1152, 648, 16)     0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 576, 324, 16)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 576, 324, 16)      12560     
_________________________________________________________________
activation_2 (Activation)    (None, 576, 324, 16)      0

### Export Model

In [10]:
# change dir path to save model
os.chdir(PATH)

In [11]:
# save model: comment in / out which saved model version you need for your OS

# needs a read-only disable for savedfolder which under windows 10 on some version numbers is not posssible to achieve  
model.save(f'models/{nbname}_1')

# .h5 is depracted
model.save(f'models/{nbname}_1.h5')

#model.save(f'/models/{nbname}.tf') 

INFO:tensorflow:Assets written to: models/CNN_ep5_1/assets
