# Creating a Baseline Tensorflow Model to Predict Pet Popularity

V2 Updates:
* made CNN deeper
* tweaked parameters
* added some image augmentation to the model
* added some documentation

Sources:
* https://www.kaggle.com/ekaterinadranitsyna/pretrained-feature-model-keras 
    * used it to load data and convert it to TF Datasets
* removed the Transfer Learning part for simplicity
* converted Sequential API -> Functional API

In [None]:
# Imports

import os
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras

In [None]:
# Setting the path

PATH = "../input/petfinder-pawpularity-score/"

In [None]:
# Reading in data

train = pd.read_csv("".join([PATH,"train.csv"]))
test = pd.read_csv("".join([PATH,"test.csv"]))
submission = pd.read_csv("".join([PATH,"sample_submission.csv"]))

In [None]:
# Viewing the first few rows

train.head()

In [None]:
# Viewing the shape

train.shape

In [None]:
# Viewing the info of the data

train.info()

In [None]:
# Setting the file path of each image

train["path"] = train["Id"].apply(lambda x: "../input/petfinder-pawpularity-score/train/" + x + ".jpg")
test["path"] = test["Id"].apply(lambda x: "../input/petfinder-pawpularity-score/test/" + x + ".jpg")

In [None]:
# Functions reading and converting data into Tensorflow datasets
# source: https://www.kaggle.com/ekaterinadranitsyna/pretrained-feature-model-kera

AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 64
IMG_SIZE = 224
target = 'Pawpularity'
seed = 0

def set_seed(seed=seed):
    """Utility function to use for reproducibility.
    :param seed: Random seed
    :return: None
    """
    np.random.seed(seed)
    random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'


def set_display():
    """Function sets display options for charts and pd.DataFrames.
    """
    # Plots display settings
    plt.style.use('fivethirtyeight')
    plt.rcParams['figure.figsize'] = 12, 8
    plt.rcParams.update({'font.size': 14})
    # DataFrame display settings
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_rows', None)
    pd.options.display.float_format = '{:.4f}'.format


def id_to_path(img_id: str, dir: str):
    """Function returns a path to an image file.
    :param img_id: Image Id
    :param dir: Path to the directory with images
    :return: Image file path
    """
    return os.path.join(dir, f'{img_id}.jpg')


@tf.function
def get_image(path: str) -> tf.Tensor:
    """Function loads image from a file and preprocesses it.
    :param path: Path to image file
    :return: Tensor with preprocessed image
    """
    print(f"IMAGE PROCESSING {str}")
    ## Decoding the image
    image = tf.image.decode_jpeg(tf.io.read_file(path), channels=3)

    ## Resizing image
    image = tf.cast(tf.image.resize_with_pad(image, IMG_SIZE, IMG_SIZE), dtype=tf.int32)

    return image


@tf.function
def process_dataset(path: str, label: int) -> tuple:
    """Function returns preprocessed image and label.
    :param path: Path to image file
    :param label: Class label
    :return: tf.Tensor with preprocessed image, numeric label
    """
    return get_image(path), label


@tf.function
def get_dataset(x, y=None) -> tf.data.Dataset:
    """Function creates batched optimized dataset for the model
    out of an array of file paths and (optionally) class labels.
    :param x: Input data for the model (array of file paths)
    :param y: Target values for the model (array of class indexes)
    :return TensorFlow Dataset object
    """
    if y is not None:
        ds = tf.data.Dataset.from_tensor_slices((x, y))
        return ds.map(process_dataset, num_parallel_calls=AUTOTUNE) \
            .batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)
    else:
        ds = tf.data.Dataset.from_tensor_slices(x)
        return ds.map(get_image, num_parallel_calls=AUTOTUNE) \
            .batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

def plot_history(hist):
    """Function plots a chart with training and validation metrics.
    :param hist: Tensorflow history object from model.fit()
    """
    # Losses and metrics
    loss = hist.history['loss']
    val_loss = hist.history['val_loss']
    rmse = hist.history['root_mean_squared_error']
    val_rmse = hist.history['val_root_mean_squared_error']

    # Epochs to plot along x axis
    x_axis = range(1, len(loss) + 1)

    fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True)

    ax1.plot(x_axis, loss, 'bo', label='Training')
    ax1.plot(x_axis, val_loss, 'ro', label='Validation', alpha=0.3)
    ax1.set_title('MSE Loss')
    ax1.legend()

    ax2.plot(x_axis, rmse, 'bo', label='Training')
    ax2.plot(x_axis, val_rmse, 'ro', label='Validation', alpha=0.3)
    ax2.set_title('Root Mean Squared Error')
    ax2.set_xlabel('Epochs')
    ax2.legend()

    plt.tight_layout()
    plt.show()

In [None]:
# Splitting train into train and validation sets

train_subset, valid_subset = train_test_split(
    train[['path', target]],
    test_size=.2, shuffle=True, random_state=0
)

In [None]:
# Creating TensorFlow datasets

train_ds = get_dataset(x=train_subset['path'], y=train_subset[target])
valid_ds = get_dataset(x=valid_subset['path'], y=valid_subset[target])
test_ds = get_dataset(x=test['path'])

In [None]:
# Creating the model

def get_model():
    
    ## Setting the Inputs
    inputs = keras.Input(shape=(224, 224, 3))
    x = inputs
    
    ## Preprocessing Layers
    
    ### Rescaling
    x = keras.layers.experimental.preprocessing.Rescaling(1./255)(x)
    
    ## Data Augmentation
    x = keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical")(x)
    x = keras.layers.experimental.preprocessing.RandomRotation(0.2)(x)
    x = keras.layers.experimental.preprocessing.RandomTranslation(0.2,0.2)(x)
    
    ## Convolutional Layers
    
    ### First CNN layer
    x = keras.layers.Conv2D(filters=96, kernel_size=3, strides=2, padding='same', kernel_initializer=tf.keras.initializers.HeNormal())(x)
    x = keras.layers.Activation('relu')(x)
    x = keras.layers.MaxPool2D(2)(x)

    ### Second CNN layer
    x = keras.layers.Conv2D(filters=128, kernel_size=3, strides=2, padding='same', kernel_initializer=tf.keras.initializers.HeNormal())(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation('relu')(x)
    x = keras.layers.MaxPool2D(2)(x)
    
    ### Third CNN layer
    x = keras.layers.Conv2D(filters=256, kernel_size=3, strides=2, padding='same', kernel_initializer=tf.keras.initializers.HeNormal())(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation('relu')(x)
    x = keras.layers.MaxPool2D(2)(x)

    ## Flattening the layer
    x = keras.layers.Flatten()(x)
    
    ## Fully Connected (Dense) Layers
    
    ### First Fully Connected layer w/ Dropout
    x = keras.layers.Dense(128, activation='relu', kernel_initializer=tf.keras.initializers.HeNormal())(x)
    x = keras.layers.Dropout(0.2)(x)
    
    ## Output layer
    output = keras.layers.Dense(1)(x)

    ## Returning the model
    return keras.Model(inputs=inputs, outputs=output)

In [None]:
# Fitting the model

def compile_and_fit(model):
    
    # Creating an exponential decay for learning rate

    LEARNING_RATE = 1e-2
    DECAY_STEPS = 100
    DECAY_RATE = 0.99

    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=LEARNING_RATE,
        decay_steps=DECAY_STEPS, decay_rate=DECAY_RATE,
        staircase=True
    )
    
    # Creating an early stopper

    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=5, restore_best_weights=True
    )
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )
    
    history = model.fit(
        train_ds, 
        validation_data=valid_ds,
        epochs=50,
        use_multiprocessing=True, workers=-1,
        callbacks=[early_stop]
    )
    
    return model, history

In [None]:
# # Applying K-Fold

# from sklearn.model_selection import KFold
# from sklearn.metrics import mean_squared_error

# kf = KFold(5)

# scores = []

# for train_index, valid_index in kf.split(train_subset):
#     print("TRAIN:", train_index, "TEST:", valid_index)
    
#     X_train, X_valid = train_subset.iloc[train_index], train_subset.iloc[valid_index]
    
#     train_ds = get_dataset(x=X_train['path'], y=X_train[target])
#     valid_ds = get_dataset(x=X_valid['path'], y=X_valid[target])
    
#     model = get_model()
    
#     model, history = compile_and_fit(model)
    
#     predictions = model.predict(valid_ds, use_multiprocessing=True, workers=os.cpu_count())
    
#     rmse = mean_squared_error(X_valid[target], predictions, squared=False)
#     print(rmse)
    
#     scores.append(rmse)

# # Printing the results of K-Fold

# print(f"Mean: {np.mean(scores)}, Std: {np.std(scores)}")

In [None]:
# Getting the model

keras.backend.clear_session()

model = get_model()
model.summary()

In [None]:
# Fitting the model

model, history = compile_and_fit(model)
# predictions = model.predict(valid_ds, use_multiprocessing=True, workers=os.cpu_count())

In [None]:
# Plotting accuracy and loss of model

plot_history(history)

## Inference

In [None]:
# Using the model to predict on the test data

test[target] = model.predict(
    test_ds, use_multiprocessing=True, workers=os.cpu_count()
)

In [None]:
# Saving the submission file

test[['Id', target]].to_csv('submission.csv', index=False)
test[['Id', target]].head()

To-Do's
* save model
* remove Duplicate images
* augment the data more
* add Transfer Learning
* add meta data