In [36]:
import tensorflow as tf
import numpy as np
import os
import random
import math

import pandas as pd
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix

from keras.preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import preprocess_input
from keras.regularizers import l2

import splitfolders

tfk = tf.keras
tfkl = tf.keras.layers
print(tf.__version__)

2.8.2


### Random seed for reproducibility

In [37]:
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)

### Metadata

In [38]:
labels = ['Species1',       # 0
          'Species2',       # 1
          'Species3',       # 2
          'Species4',       # 3
          'Species2',       # 4
          'Species2',       # 5
          'Species7',       # 6
          'Species8',       # 7
          ]

In [39]:
img_w = 96
img_h = 96
input_shape = (96, 96, 3)
classes = 8

class_weights = {0: 2.389358108108108,
                 1: 0.8320588235294117,
                 2: 0.8583131067961165,
                 3: 0.8667279411764706,
                 4: 0.8340212264150944,
                 5: 1.9978813559322033,
                 6: 0.8243006993006993,
                 7: 0.8709975369458128}

epochs = 30
patience_epochs = 10
batch_size = 24

last_nonTrainable_layer = 207

## Data Loader

In [41]:
from PIL import Image
import os

path = "../Matteo/Dataset"

dirs = os.listdir(path)

print(dirs)



['Species1', 'Species2', 'Species3', 'Species4', 'Species5', 'Species6', 'Species7', 'Species8']


In [42]:
# Splitting the main dataset into train and val
dataset_dir = '../Matteo/datasetNoTest'

if not(os.path.exists('../datasetNoTest')) :
    print('splitting')
    splitfolders.ratio('dataset', output='datasetNoTest', seed=seed, ratio=(0.8, 0.2))

# Setting dataset directories
training_dir = os.path.join(dataset_dir, 'train')
validation_dir = os.path.join(dataset_dir, 'val')

## Image Generators

In [43]:
train_data_gen = ImageDataGenerator(
    # Data Augmentation
    rotation_range=20,
    height_shift_range=0.3,
    width_shift_range=0.4,
    zoom_range=0.4,
    horizontal_flip=True,
    vertical_flip=True,
    brightness_range=[0.3,1.4],
    fill_mode='nearest',
    preprocessing_function=preprocess_input)

# Generator
train_gen = train_data_gen.flow_from_directory(
    directory=training_dir,
    target_size=(96,96),
    color_mode='rgb',
    classes=labels,
    class_mode='categorical',
    batch_size=batch_size,
    shuffle=True,
    seed=seed)


Found 3078 images belonging to 8 classes.


In [44]:
test_data_gen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

# Generator
test_gen = test_data_gen.flow_from_directory(
    directory=validation_dir,
    target_size=(96,96),
    color_mode='rgb',
    classes=labels,
    class_mode='categorical',
    batch_size=batch_size,
    shuffle=False,
    seed=seed)

Found 775 images belonging to 8 classes.


## Transfer Learning Model

In [45]:
supernet = tfk.applications.InceptionV3(
    include_top=False,
    weights="imagenet",
    input_shape=(96,96,3)
)

supernet.trainable = True

for i, layer in enumerate(supernet.layers[:last_nonTrainable_layer]):
    layer.trainable=False

for i, layer in enumerate(supernet.layers):
    print(i, layer.name, layer.trainable)

0 input_8 False
1 conv2d_282 False
2 batch_normalization_282 False
3 activation_282 False
4 conv2d_283 False
5 batch_normalization_283 False
6 activation_283 False
7 conv2d_284 False
8 batch_normalization_284 False
9 activation_284 False
10 max_pooling2d_12 False
11 conv2d_285 False
12 batch_normalization_285 False
13 activation_285 False
14 conv2d_286 False
15 batch_normalization_286 False
16 activation_286 False
17 max_pooling2d_13 False
18 conv2d_290 False
19 batch_normalization_290 False
20 activation_290 False
21 conv2d_288 False
22 conv2d_291 False
23 batch_normalization_288 False
24 batch_normalization_291 False
25 activation_288 False
26 activation_291 False
27 average_pooling2d_27 False
28 conv2d_287 False
29 conv2d_289 False
30 conv2d_292 False
31 conv2d_293 False
32 batch_normalization_287 False
33 batch_normalization_289 False
34 batch_normalization_292 False
35 batch_normalization_293 False
36 activation_287 False
37 activation_289 False
38 activation_292 False
39 activati

## Learning Rate Scheduler

In [46]:
def step_decay(epoch):

    initial_lrate = 0.005
    drop = 0.1
    epochs_drop = 10.0

    lrate = initial_lrate * math.pow(drop, math.floor((1+epoch)/epochs_drop))

    return lrate

## Callbacks

In [47]:
from datetime import datetime

def create_folders_and_callbacks(model_name):

    exps_dir = os.path.join('callbackSaves')
    if not os.path.exists(exps_dir):
        os.makedirs(exps_dir)

    now = datetime.now().strftime('%b%d_%H-%M-%S')

    exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
    if not os.path.exists(exp_dir):
        os.makedirs(exp_dir)

    callbacks = []

    # Model checkpoint
    # ----------------
    ckpt_dir = os.path.join(exp_dir, 'ckpts')
    if not os.path.exists(ckpt_dir):
        os.makedirs(ckpt_dir)

    ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp.ckpt'),
                                                       save_weights_only=False, # True to save only weights
                                                       save_best_only=True) # True to save only the best epoch
    callbacks.append(ckpt_callback)

    # Visualize Learning on Tensorboard
    # ---------------------------------
    tb_dir = os.path.join(exp_dir, 'tb_logs')
    if not os.path.exists(tb_dir):
        os.makedirs(tb_dir)

    # By default shows losses and metrics for both training and validation
    tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
                                                 profile_batch=0,
                                                 histogram_freq=1)  # if > 0 (epochs) shows weights histograms
    callbacks.append(tb_callback)


    # Early Stopping
    # --------------
    #es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, mode='max', restore_best_weights=True)
    #callbacks.append(es_callback)


    # Learning Rate Scheduler
    # --------------
    LRS_callback = tf.keras.callbacks.LearningRateScheduler(step_decay)
    callbacks.append(LRS_callback)


    return callbacks

## Network Model

In [48]:
inputs = tfk.Input(shape=input_shape)

x = supernet(inputs)

glob_pooling = tfkl.GlobalAveragePooling2D(name='GlobalPooling')(x)

outputs = tfkl.Dense(
    classes,
    activation='softmax',
    kernel_initializer = tfk.initializers.GlorotUniform(seed),
)(x)


# Connect input and output through the Model class
ft_model = tfk.Model(inputs=inputs, outputs=outputs, name='model')

# Compile the model
ft_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.SGD(momentum=0.9, decay=0.0005, nesterov=False), metrics=['accuracy', tfk.metrics.Precision(), tfk.metrics.Recall()])
ft_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_9 (InputLayer)        [(None, 96, 96, 3)]       0         
                                                                 
 inception_v3 (Functional)   (None, 1, 1, 2048)        21802784  
                                                                 
 dense_5 (Dense)             (None, 1, 1, 8)           16392     
                                                                 
Total params: 21,819,176
Trainable params: 14,156,552
Non-trainable params: 7,662,624
_________________________________________________________________


## Training

In [50]:
callbacks = create_folders_and_callbacks(model_name='InceptionModel')

history = ft_model.fit(
    x = train_gen,
    batch_size = batch_size,
    epochs = epochs,
    validation_data = test_gen,
    class_weight = class_weights,
    callbacks = callbacks
).history

ft_model.save("fineTuningModel")

IndexError: index 7 is out of bounds for axis 1 with size 6

## Plotting

In [None]:
# All the metrics : Accuracy, Precision and Recall
ALPHA = 0.3

plt.figure(figsize=(20,10))

plt.plot(history['accuracy'], label='Accuracy Train', alpha=ALPHA, color='#E64A19')
plt.plot(history['val_accuracy'], label='Accuracy Val', alpha=ALPHA, color='#F57C00')

plt.ylim(0, 1)
plt.title('Metrics')
plt.legend(loc='lower right')
plt.grid(alpha=.3)
plt.show()