# TensorFlow multi-input model + EfficientNet + FE

This is a first try to build multi model, based on notebooks:

https://www.kaggle.com/yamqwe/tf-efficientnet-multi-input

https://www.kaggle.com/yamqwe/tf-nfnet-vit-efn-tta-infer/notebook

https://www.kaggle.com/awsaf49/tf-petfinder-image-tpu-train


Adding cross-featutes idea is from excelent notebook by Ekaterina Dranitsyna: 

https://www.kaggle.com/ekaterinadranitsyna/xgboost-for-tabular-data


In [None]:
!pip install ../input/keras-applications/Keras_Applications-1.0.8/ -f ./ --no-index
!pip install /kaggle/input/efficientnet-keras-source-code/ -q --no-deps

In [None]:
import sklearn
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras

import matplotlib.pyplot as plt

import efficientnet.tfkeras as efn

from sklearn.model_selection import KFold

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler, RobustScaler

## Import datasets

In [None]:
train = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
test = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
sample_submission = pd.read_csv("../input/petfinder-pawpularity-score/sample_submission.csv")

## adding image file names
train["file_path"] = train["Id"].apply(lambda identifier: "../input/petfinder-pawpularity-score/train/" + identifier + ".jpg")
test["file_path"] = test["Id"].apply(lambda identifier: "../input/petfinder-pawpularity-score/test/" + identifier + ".jpg")

train.head()

## Model

In [None]:
%%time

FEATURE_COLUMNS = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']
CAT_FEATURES = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']
TARGET_COLUMN = "Pawpularity"

def add_cross_features(df):
    for feature1 in FEATURE_COLUMNS:    
        for feature2 in FEATURE_COLUMNS:
            if feature1 != feature2:
                x2_feature_name = f'{feature1}-{feature2}'
                if x2_feature_name not in df.columns:
                    df[x2_feature_name] = df[feature1].astype(str) + '_' + df[feature2].astype(str)
                    CAT_FEATURES.append(x2_feature_name)
                    for feature3 in FEATURE_COLUMNS:
                        if feature3 != feature2 and feature3 != feature1:
                            x3_feature_name = f'{feature1}-{feature2}-{feature3}'
                            if x3_feature_name not in df.columns:
                                df[x3_feature_name] = df[feature1].astype(str) + '_' + df[feature2].astype(str) + '_' + df[feature3].astype(str)
                                CAT_FEATURES.append(x3_feature_name)
    return df
                
                
train = add_cross_features(train)
test = add_cross_features(test)

## set all features as categorical
for c in CAT_FEATURES:
    train[c] = keras.utils.to_categorical(train[c])
    test[c] = keras.utils.to_categorical(test[c])

FEATURE_COLUMNS = np.unique(CAT_FEATURES).tolist()
print('features len:', len(FEATURE_COLUMNS))
print('train shape:',train.shape)
print('test shape:',test.shape)

In [None]:
IMAGE_SIZE=128

## Feature model
def build_feature_model(inputs):
    width = 64
    depth = 2
    activation = "relu"
    dropout = 0.1
    x = keras.layers.Dense(width, activation=activation)(inputs)
    for i in range(depth):
        if i == 0:
            x = inputs
        x = keras.layers.Dense(width, activation=activation)(x)
        #x = keras.layers.Dropout(dropout)(x)
        if (i + 1) % 3 == 0:
            x = keras.layers.BatchNormalization()(x)
            x = keras.layers.Concatenate()([x, inputs])
    return x

def RMSE(y_true, y_pred):
    loss = tf.math.sqrt(tf.math.reduce_mean(tf.math.square(tf.subtract(y_true, y_pred))))
    return loss

def block(x, filters, kernel_size, repetitions, pool_size=2, strides=2):
    for i in range(repetitions):
        x = tf.keras.layers.Conv2D(filters, kernel_size, activation='relu', padding='same')(x)
    x = tf.keras.layers.MaxPooling2D(pool_size, strides)(x)
    return x

# CNN + feature model
def build_full_model():
    image_inputs = tf.keras.Input((IMAGE_SIZE, IMAGE_SIZE , 3))
    tabular_inputs = tf.keras.Input(len(FEATURE_COLUMNS))
    efficient_model = efn.EfficientNetB4(include_top=False, 
                                weights='../input/efficientnet-weights-for-keras/noisy-student/notop/efficientnet-b4_noisy-student_notop.h5', 
                                pooling=None)
    image_x = efficient_model(image_inputs)
    #block(x, filters, kernel_size, 2, pool_size=2, strides=2)
    image_x = tf.keras.layers.GlobalAveragePooling2D()(image_x)
    tabular_x = build_feature_model(tabular_inputs)
    x = tf.keras.layers.Concatenate(axis=1)([image_x, tabular_x])
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(64)(x)
    output = tf.keras.layers.Dense(1)(x)
    model = tf.keras.Model(inputs=[image_inputs, tabular_inputs], outputs=[output])
    return model

model = build_full_model()
tf.keras.utils.plot_model(model, show_shapes=True)

## Training

In [None]:
EPOCHS = 30
RANDOM_SEED=42
BATCH_SIZE = 32
TOTAL_SPLITS = 6

def preprocess(image_url, tabular, target):
    image_string = tf.io.read_file(image_url)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.central_crop(image, 1.0)
    image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    return (image, tabular), tf.cast(target, tf.float32)

def get_dataset(X, train_idx, val_idx):

    X_file = X.loc[train_idx, "file_path"]
    X_features = X.loc[train_idx, FEATURE_COLUMNS ]
    y = X.loc[train_idx, TARGET_COLUMN ]

    X_val_file = X.loc[val_idx, "file_path"]
    X_val_features = X.loc[val_idx, FEATURE_COLUMNS ]
    y_val = X.loc[val_idx, TARGET_COLUMN ]
    
    dataset = tf.data.Dataset.from_tensor_slices((X_file, X_features, y)).map(preprocess).shuffle(512).batch(BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)
    val_dataset = tf.data.Dataset.from_tensor_slices((X_val_file, X_val_features, y_val)).map(preprocess).batch(BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)
    return dataset, val_dataset

# convert tagret to 0-1 range
train[TARGET_COLUMN] = train[TARGET_COLUMN]/100

image = np.random.normal(size=(1, IMAGE_SIZE, IMAGE_SIZE, 3))
tabular = np.random.normal(size=(1, len(FEATURE_COLUMNS)))

print(image.shape, tabular.shape)
print(model((image, tabular)).shape)

## Learning-Rate Scheduler

https://www.kaggle.com/awsaf49/tf-petfinder-image-tpu-train#Learning-Rate-Scheduler

In [None]:
def get_lr_callback(batch_size=8, plot=False, scheduler='exp', epochs=EPOCHS ):
    lr_start   = 0.000005
    lr_max     = 0.00000125 * batch_size
    lr_min     = 0.000001
    lr_ramp_ep = 5
    lr_sus_ep  = 0
    lr_decay   = 0.8
   
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
            
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
            
        elif scheduler=='exp':
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
            
        elif scheduler=='cosine':
            decay_total_epochs = epochs - lr_ramp_ep - lr_sus_ep + 3
            decay_epoch_index = epoch - lr_ramp_ep - lr_sus_ep
            phase = math.pi * decay_epoch_index / decay_total_epochs
            cosine_decay = 0.5 * (1 + math.cos(phase))
            lr = (lr_max - lr_min) * cosine_decay + lr_min
        return lr
    if plot:
        plt.figure(figsize=(10,5))
        plt.plot(np.arange(epochs), [lrfn(epoch) for epoch in np.arange(epochs)], marker='o')
        plt.xlabel('epoch'); plt.ylabel('learnig rate')
        plt.title('Learning Rate Scheduler')
        plt.show()

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    return lr_callback

_=get_lr_callback(BATCH_SIZE, plot=True )

In [None]:
%%time

tf.keras.backend.clear_session()
early_stop = tf.keras.callbacks.EarlyStopping(min_delta=1e-4, patience=10)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(factor=0.3,patience=2, min_lr=1e-7)
optimizer = tf.keras.optimizers.Adam(0.001)

models = []
historys = []
kfold = KFold(n_splits=TOTAL_SPLITS, shuffle=True, random_state=RANDOM_SEED)
for index, (train_idx, val_idx) in enumerate(kfold.split(train)):
    
    train_ds, val_ds = get_dataset( train, train_idx, val_idx)
        
    checkpoint_path = "model_%d.h5"%(index)
    checkpoint = tf.keras.callbacks.ModelCheckpoint(checkpoint_path, save_best_only=True)
    callbacks = [early_stop, checkpoint, get_lr_callback(BATCH_SIZE)]    
    
    rmse = tf.keras.metrics.RootMeanSquaredError(name='rmse')
    model.compile(loss=RMSE, optimizer=optimizer, metrics=[rmse])
    history = model.fit(train_ds, epochs=EPOCHS, validation_data=val_ds, callbacks=callbacks)
    
    model.load_weights(checkpoint_path)
    historys.append(history)
    models.append(model)    

## Display metrics

In [None]:
xx = range(0, EPOCHS)
col_metrics = ["loss", "val_loss", "lr"]

f, (ax1, ax2) = plt.subplots(nrows = 1, ncols = 2, figsize=(16, 8))
for hist in historys:
    df = pd.DataFrame(hist.history, columns=col_metrics)
    ax1.plot( df[[col_metrics[0], col_metrics[1]]])
ax2.plot( df[[col_metrics[2]]])

plt.show()

## Submission

In [None]:
def preprocess_test_data(image_url, tabular):
    print(image_url, tabular)
    image_string = tf.io.read_file(image_url)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.central_crop(image, 1.0)
    image = tf.image.resize(image, (IMAGE_SIZE, IMAGE_SIZE))
    # 0 won't be used in prediction, but it's needed in this senario or the tabular variable is treated as label.
    return (image, tabular), 0

test_ds = tf.data.Dataset.from_tensor_slices((test["file_path"], test[FEATURE_COLUMNS])).map(preprocess_test_data).batch(BATCH_SIZE).cache().prefetch(tf.data.AUTOTUNE)

In [None]:
results = []
for model in models:
    results.append(model.predict(test_ds).reshape(-1))

# convert back to 100 range
predictions = np.mean(results, axis=0).reshape(-1)*100

sample_submission["Pawpularity"] = predictions
sample_submission.to_csv("submission.csv", index=False)

In [None]:
sample_submission.head(20)