This notebook is for all those folks who wants to know how to train a multi-input keras model using TF dataset. This notebook covers the following:
* TF Dataset creation for a multi-input model
* Defining layers for a multi-input model (Concatenate)

Advantages of TF dataset is that the model will be trained quickly. As you all know TPU queue is long nowadays in Kaggle and all are not able to get their hands on the TPU. So, for them relying on TF dataset might be a good idea. Also, if you belong to the those lucky ones i.e., you get to train your model on TPU, then you're in for a treat. Cos, with this TF dataset clubbed with TPU you can even train your model for 500 epochs and won't even have to wait for hours, unlike the case in GPUs.

**NOTE:** 
* I've used images which were centre cropped.
* During inference create a pseudo column for the 'Pawpularity' score while loading the test.csv and create the TF dataset in the same manner as train_ds or val_ds in this notebook. O/w you'd get an error while predicting saying 'ValueError: Layer model expects 2 input(s), but it received 1 input tensors.'

In [None]:
# whether to convert the pawpularity scores in the range of 0-1
sigmoid = True

# train validation split size
split_size = 0.1

# input image size
IMAGE_SIZE = [224, 224]

## Import

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Concatenate, Dense
from tensorflow.keras.layers import Flatten, GlobalAveragePooling2D, BatchNormalization
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint

import numpy as np
import pandas as pd
import random
import os
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split

from matplotlib import pyplot as plt
from kaggle_datasets import KaggleDatasets
AUTO = tf.data.experimental.AUTOTUNE
print("Tensorflow version " + tf.__version__)
tf.executing_eagerly()

## Distribution Strategy

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() # TPU detection
except ValueError:
    tpu = None
    gpus = tf.config.experimental.list_logical_devices("GPU")
    
# Select appropriate distribution strategy
if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu) # Going back and forth between TPU and host is expensive. Better to run 128 batches on the TPU before reporting back.
    print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])  
elif len(gpus) > 1:
    strategy = tf.distribute.MirroredStrategy([gpu.name for gpu in gpus])
    print('Running on multiple GPUs ', [gpu.name for gpu in gpus])
elif len(gpus) == 1:
    strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
    print('Running on single GPU ', gpus[0].name)
else:
    strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
    print('Running on CPU')
print("Number of accelerators: ", strategy.num_replicas_in_sync)

## Credentials

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
user_credential = user_secrets.get_gcloud_credential()
user_secrets.set_tensorflow_credential(user_credential)

## Load CSV

In [None]:
data = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
if sigmoid:
    data['Pawpularity'] = data['Pawpularity'] / 100.0

## TF dataset Creation

In [None]:
filenames = tf.io.gfile.glob(str('../input/preprocessing-pawfinder/train/*'))
train_filenames, val_filenames = train_test_split(filenames, test_size=split_size, random_state=123)

columns = list(data.columns)

def create_metadata(filenames):
    meta_data = pd.DataFrame(columns=columns)

    for f in tqdm(filenames):
        Id = os.path.basename(f).replace('.jpg','')
        df = data.loc[data['Id']==Id]
        meta_data = pd.concat([meta_data, df], ignore_index = True)    
        
    return meta_data

train_meta_data = create_metadata(train_filenames)
val_meta_data = create_metadata(val_filenames)

In [None]:
def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    img = tf.cast(img, tf.float32)
    # resize the image to the desired size.
    return tf.image.resize(img, IMAGE_SIZE)

def process_path(file_path):
    # load the raw data from the file as a string
    img = tf.io.read_file(file_path)
    img = decode_img(img)
    return img

def process_dataset(meta_data, filenames):
    if sigmoid:
        labels = meta_data['Pawpularity'].to_numpy().astype('float32')
    else:
        labels = meta_data['Pawpularity'].to_numpy().astype('int32')
            
    meta_data = meta_data.drop(['Id', 'Pawpularity'], axis=1).to_numpy().astype('int32')

    label_dataset = tf.data.Dataset.from_tensor_slices(labels)
    meta_dataset = tf.data.Dataset.from_tensor_slices(meta_data)

    list_ds = tf.data.Dataset.from_tensor_slices(filenames)
    ds = list_ds.map(process_path, num_parallel_calls=AUTO)

    dataset = tf.data.Dataset.zip((ds, meta_dataset))

    ds = tf.data.Dataset.zip((dataset, label_dataset))
    return ds

train_ds = process_dataset(train_meta_data, train_filenames)
val_ds = process_dataset(val_meta_data, val_filenames)

In [None]:
def prepare_for_training(ds, cache=True, shuffle_buffer_size=1024):
    # This is a small dataset, only load it once, and keep it in memory.
    # use `.cache(filename)` to cache preprocessing work for datasets that don't
    # fit in memory.
    ds = ds.batch(128)
    if cache:
        if isinstance(cache, str):
            ds = ds.cache(cache)
        else:
            ds = ds.cache()

    ds = ds.shuffle(buffer_size=shuffle_buffer_size)

    # Repeat forever
    ds = ds.repeat()

    # `prefetch` lets the dataset fetch batches in the background while the model
    # is training.
    ds = ds.prefetch(buffer_size=AUTO)

    return ds

train_ds = prepare_for_training(train_ds)
val_ds = prepare_for_training(val_ds)

In [None]:
train_ds

In [None]:
num_val_images = len(val_filenames)
num_train_images = len(train_filenames)

training_steps_per_epoch = num_train_images // 128
validation_steps_per_epoch = -(-num_val_images // 128)
print("No. of Training images: ", num_train_images, ", Steps per epoch: ", training_steps_per_epoch)
print("No. of Validation Images: ", num_val_images, ", Steps per epoch: ", validation_steps_per_epoch)

## Model

In [None]:
'''
Feature Extraction is performed by EfficientNetB0 pretrained on imagenet weights. 
Input size is 224 x 224.
'''

from tensorflow.keras.applications import EfficientNetB0

def feature_extractor(inputs):

    feature_extractor = EfficientNetB0(input_shape=(224, 224, 3),
                                       include_top=False,
                                       weights='imagenet'
                                      )(inputs)
    
    return feature_extractor


'''
Defines final dense layers for classification.
'''
def classifier(inputs):
    x = GlobalAveragePooling2D()(inputs)
    x = Flatten()(x)
    x = Dense(1024, activation="relu")(x)
    x = BatchNormalization()(x)
    x = Dense(512, activation="relu")(x)
    x = Dense(128, activation="relu")(x)
    return x

'''
Connect the feature extraction and "classifier" layers to build the model.
'''
def final_model(inputs):
    eff_feature_extractor = feature_extractor(inputs)
    classification_output = classifier(eff_feature_extractor)
    return classification_output

'''
Define the model and compile it. 
'''
def define_compile_model(initial_lr):
    input_1 = tf.keras.layers.Input(shape=(224, 224, 3))
    input_2 = tf.keras.layers.Input(shape=(12))

  
    classification_output = final_model(input_1)
    
    concat_layer= Concatenate()([input_2, classification_output])
    x = Dense(128, activation='relu')(concat_layer)
    x = Dense(64, activation='relu')(x)
    
    if sigmoid:
        output = Dense(1, activation='sigmoid')(x)
    else:
        output = Dense(1)(x)
    
    model = Model(inputs=(input_1, input_2), outputs = output)
    
    optimizer = tf.keras.optimizers.SGD(learning_rate=initial_lr) 
    model.compile(optimizer=optimizer, 
                loss = tf.keras.losses.Huber(), 
                metrics=[tf.keras.metrics.RootMeanSquaredError()]
                )
    
    return model

## Training

In [None]:
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("pawfinder_concat.h5",
                                                   save_best_only=True,
                                                   monitor='val_root_mean_squared_error',
                                                   mode='min'
                                                  )


with strategy.scope():
    model = define_compile_model(initial_lr=0.1)

    model.summary()
    
# Train the custom model
history = model.fit(
        train_ds,
        steps_per_epoch=training_steps_per_epoch,
        epochs=2,
        validation_data=val_ds,
        validation_steps=validation_steps_per_epoch,
        callbacks=[checkpoint_cb]
)