In [None]:
#default_exp training

In [None]:
#export
import numpy as np
import pandas as pd
import os
from deeplearning_image_classification import data_loading

from sklearn import model_selection, metrics
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.mixed_precision import experimental as mixed_precision
from tensorflow.keras import applications


np.random.seed(0)
pd.set_option('display.max_colwidth', 100)
DATA_DIR = data_loading.DATA_DIR

In [None]:
%cd ..

Try removing this line on your PC, maybe you will not encounter [this issue](https://github.com/tensorflow/tensorflow/issues/24496)

In [None]:
#export
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices:
    tf.config.experimental.set_memory_growth(device, True)

In [None]:
    
#policy = mixed_precision.Policy('mixed_float16')
#mixed_precision.set_policy(policy)

In [None]:
#export
train_csv_path = os.path.join(DATA_DIR, 'train_metadata.csv')
test_csv_path = os.path.join(DATA_DIR, 'test_metadata.csv')

In [None]:
#export
def get_metadata(train_metadata_path=train_csv_path, test_metadata_path=test_csv_path):
    train_metadata_df = pd.read_csv(train_csv_path)
    test_metadata_df = pd.read_csv(test_csv_path)
    return train_metadata_df, test_metadata_df

In [None]:
train_csv_path

In [None]:
train_metadata_df, test_metadata_df = get_metadata()

In [None]:
sample_size = 11000
sample_val_size = 1000
__, sample_train_val_metadata_df = model_selection.train_test_split(train_metadata_df, test_size=sample_size, random_state=2, stratify=train_metadata_df['class'])
sample_train_metadata_df, sample_val_metadata_df = model_selection.train_test_split(sample_train_val_metadata_df, test_size=sample_val_size, random_state=2, stratify=sample_train_val_metadata_df['class'])

In [None]:
#export
image_size = (224, 224)


def get_train_val_test_iterators(train_metadata_df, val_metadata_df, test_metadata_df, image_size):
    image_gen = keras.preprocessing.image.ImageDataGenerator(
        preprocessing_function=keras.applications.mobilenet_v2.preprocess_input,
        horizontal_flip=True
    )

    train_image_iterator = image_gen.flow_from_dataframe(train_metadata_df, batch_size=32, target_size=image_size,
        shuffle=False)
    val_image_iterator = image_gen.flow_from_dataframe(val_metadata_df, batch_size=32, target_size=image_size, shuffle=False)
    test_image_iterator = image_gen.flow_from_dataframe(test_metadata_df, batch_size=64, target_size=image_size, shuffle=False)
    return train_image_iterator, val_image_iterator, test_image_iterator

In [None]:
train_image_iterator, val_image_iterator, test_image_iterator = get_train_val_test_iterators(sample_train_metadata_df, sample_val_metadata_df, test_metadata_df, image_size=image_size) 
n_classes = len(train_image_iterator.class_indices)

# Setting up model

We use pretrained MobileNet model for feature extraction

In [None]:
#export
learning_rate = 0.001
beta_1 = 0.9
beta_2 = 0.999
optimizer = keras.optimizers.Adam(learning_rate=learning_rate, beta_1=beta_1, beta_2=beta_2)
default_metrics = ['acc', keras.metrics.Precision(), keras.metrics.Recall()]


def setup_model(n_classes, optimizer, metrics):
    base_model = keras.applications.MobileNet(include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False
    model = keras.Sequential(
        [
            base_model,
            keras.layers.AveragePooling2D((2, 2)),
            keras.layers.Convolution2D(64, (2, 2)),
            keras.layers.Flatten(),
            keras.layers.Dense(n_classes),
            keras.layers.Softmax()
        ])
    model.compile(
        loss=keras.losses.CategoricalCrossentropy(),
        optimizer=optimizer,
        metrics=metrics)
    return model

In [None]:
model = setup_model(n_classes, optimizer, default_metrics)

In [None]:
model.summary()

In [None]:
model.fit(train_image_iterator, validation_data=val_image_iterator, epochs=10)

In [None]:
y_test_pred = model.predict(test_image_iterator).argmax(axis=1)

In [None]:
y_test = np.array([test_image_iterator.class_indices[c] for c in test_metadata_df['class']])

## Test set accuracy

In [None]:
metrics.accuracy_score(y_test, y_test_pred)

## Script mode

In [None]:
#export
sample_size = 11000
sample_val_size = 1000
epochs = 10


if __name__ == '__main__':
    print(DATA_DIR)
    train_metadata_df, test_metadata_df = get_metadata()
    __, sample_train_val_metadata_df = model_selection.train_test_split(train_metadata_df, test_size=sample_size, random_state=2, stratify=train_metadata_df['class'])
    sample_train_metadata_df, sample_val_metadata_df = model_selection.train_test_split(sample_train_val_metadata_df, test_size=sample_val_size, random_state=2, stratify=sample_train_val_metadata_df['class'])

    train_image_iterator, val_image_iterator, test_image_iterator = get_train_val_test_iterators(sample_train_metadata_df, sample_val_metadata_df, test_metadata_df, image_size=image_size) 
    n_classes = len(train_image_iterator.class_indices)
    model = setup_model(n_classes, optimizer, default_metrics)
    model.fit(train_image_iterator, validation_data=val_image_iterator, epochs=10)