In [None]:
#default_exp training

Tensorflow configuration.

Without setting memory growth TF 2 runs into problems, see [this issue](https://github.com/tensorflow/tensorflow/issues/24496)

In [None]:
#export
import tensorflow as tf
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

In [None]:
#export
float_dtype = 'float16'

import numpy as np
import pandas as pd
import os
from deeplearning_image_classification import data_loading

from sklearn import model_selection, metrics

from tensorflow import keras
from tensorflow.keras.mixed_precision import experimental as mixed_precision
import keras_applications

In [None]:
#export

np.random.seed(0)
pd.set_option('display.max_colwidth', 100)
DATA_DIR = data_loading.DATA_DIR

In [None]:
# export

if float_dtype == 'float16': 
    tf.keras.backend.set_floatx('float16')
    tf.keras.backend.set_epsilon(1e-4)
    policy = mixed_precision.Policy('mixed_float16')
    mixed_precision.set_policy(policy)

In [None]:
%cd ..

In [None]:
#export
train_csv_path = os.path.join(DATA_DIR, 'train_metadata.csv')
test_csv_path = os.path.join(DATA_DIR, 'test_metadata.csv')

In [None]:
#export
def get_metadata(train_metadata_path=train_csv_path, test_metadata_path=test_csv_path):
    train_metadata_df = pd.read_csv(train_csv_path)
    test_metadata_df = pd.read_csv(test_csv_path)
    return train_metadata_df, test_metadata_df

In [None]:
train_csv_path

In [None]:
train_metadata_df, test_metadata_df = get_metadata()

In [None]:
sample_size = 11000
sample_val_size = 1000
__, sample_train_val_metadata_df = model_selection.train_test_split(train_metadata_df, test_size=sample_size, random_state=2, stratify=train_metadata_df['class'])
sample_train_metadata_df, sample_val_metadata_df = model_selection.train_test_split(sample_train_val_metadata_df, test_size=sample_val_size, random_state=2, stratify=sample_train_val_metadata_df['class'])

In [None]:
#export
image_size = (224, 224)
train_batch_size = 48 
val_batch_size = 48
test_batch_size = 48


def get_train_val_test_iterators(train_metadata_df, val_metadata_df, test_metadata_df, image_size, train_batch_size, val_batch_size, test_batch_size):
    image_gen = keras.preprocessing.image.ImageDataGenerator(
        preprocessing_function=keras.applications.mobilenet.preprocess_input,
        horizontal_flip=True
    )

    train_image_iterator = image_gen.flow_from_dataframe(train_metadata_df, batch_size=train_batch_size, target_size=image_size,
        shuffle=True)
    val_image_iterator = image_gen.flow_from_dataframe(val_metadata_df, batch_size=val_batch_size, target_size=image_size, shuffle=False)
    test_image_iterator = image_gen.flow_from_dataframe(test_metadata_df, batch_size=test_batch_size, target_size=image_size, shuffle=False)
    return train_image_iterator, val_image_iterator, test_image_iterator

In [None]:
train_image_iterator, val_image_iterator, test_image_iterator = get_train_val_test_iterators(sample_train_metadata_df, sample_val_metadata_df, test_metadata_df, image_size=image_size, train_batch_size=train_batch_size, test_batch_size=test_batch_size, val_batch_size=val_batch_size) 
n_classes = len(train_image_iterator.class_indices)

# Setting up model

We use pretrained MobileNet model for feature extraction

In [None]:
#export
learning_rate = 0.001
beta_1 = 0.9
beta_2 = 0.999
optimizer = keras.optimizers.Adam(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2)
default_metrics = [
    'acc',
   keras.metrics.TopKCategoricalAccuracy(k=5),
   keras.metrics.Precision(),
   keras.metrics.Recall()
]
pretrained = False
freeze_pretrained = False
last_layer_convolutions = 64


def setup_model(n_classes, optimizer, metrics):
    weights =  'imagenet' if pretrained else None
    base_model = keras_applications.mobilenet.MobileNet(weights=None, input_shape=(*image_size, 3), backend=tf.keras.backend, layers=tf.keras.layers, models=tf.keras.models, utils=tf.keras.utils) 
    if freeze_pretrained:
        base_model.trainable = False
    model = keras.Sequential(
        [
            base_model,
            keras.layers.Flatten(),
            keras.layers.Dense(n_classes, dtype=float_dtype),
            keras.layers.Softmax()
        ])
    model.compile(
        loss=keras.losses.CategoricalCrossentropy(),
        optimizer=optimizer,
        metrics=metrics)
    return model

In [None]:
model = setup_model(n_classes, optimizer, default_metrics)

In [None]:
model.fit(train_image_iterator, validation_data=val_image_iterator, epochs=10, use_multiprocessing=True, workers=5)

In [None]:
model.evaluate(test_image_iterator)

In [None]:
y_test_pred = model.predict(test_image_iterator).argmax(axis=1)

In [None]:
y_test = np.array([test_image_iterator.class_indices[c] for c in test_metadata_df['class']])

## Test set accuracy

In [None]:
metrics.accuracy_score(y_test, y_test_pred)

## Script mode

In [None]:
#export
sample_size = 11000
val_size = 1000
epochs = 20 


if __name__ == '__main__':
    train_val_metadata_df, test_metadata_df = get_metadata()
    
    if sample_size != 'all':
        __, train_val_metadata_df = model_selection.train_test_split(
            train_val_metadata_df,
            test_size=sample_size,
            random_state=2,
            stratify=train_val_metadata_df['class']
        )
    train_metadata_df, val_metadata_df = model_selection.train_test_split(
        train_val_metadata_df,
        test_size=val_size,
        random_state=2,
        stratify=train_val_metadata_df['class']
    )
        
    model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath='weights.{epoch:02d}-{val_loss:.2f}.hdf5',
        monitor='val_top_k_categorical_accuracy'
    )
    tensorboard_training_callback = tf.keras.callbacks.TensorBoard(
        log_dir='logs', histogram_freq=0, write_graph=True, write_images=False,
        update_freq=100, profile_batch=2, embeddings_freq=0,
        embeddings_metadata=None
    )
    tensorboard_epoch_callback = tf.keras.callbacks.TensorBoard(
        log_dir='logs', histogram_freq=0, write_graph=True, write_images=False,
        update_freq='epoch', profile_batch=2, embeddings_freq=0,
        embeddings_metadata=None
    )
    callbacks = [model_checkpoint_callback, tensorboard_training_callback, tensorboard_epoch_callback]
    train_image_iterator, val_image_iterator, test_image_iterator = get_train_val_test_iterators(
        train_metadata_df, val_metadata_df, test_metadata_df,
        image_size=image_size, train_batch_size=train_batch_size, test_batch_size=test_batch_size, val_batch_size=val_batch_size) 
    n_classes = len(train_image_iterator.class_indices)
    model = setup_model(n_classes, optimizer, default_metrics)
    model.fit(train_image_iterator, validation_data=val_image_iterator, epochs=epochs, callbacks=callbacks)
    model.save('data/model.hdf5')