For this competition I just want to use images to make the prediction and ignore the tabular data for the first attempt.

I mostly used these two sources and my computer vision / deep learning knowledge. 

https://www.kaggle.com/genichiroshimizu/keras-multi-imput-image-resnet50-meta-nn

https://github.com/keras-team/keras/issues/8792


In [None]:
train_mode = False # is it training mode or submission mode

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import load_model
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import load_model

if train_mode:
    import keras
    import tensorflow as tf
    from keras import models
    from keras import layers
    from tensorflow.keras.layers import Dropout
    from tensorflow.keras.optimizers import Adam
    from tensorflow.keras.callbacks import EarlyStopping
    from tensorflow.keras.applications.inception_v3 import InceptionV3

In [None]:
main_folder = '/kaggle/input/petfinder-pawpularity-score/'

test_image_folder  = main_folder + 'test'
test_meta = pd.read_csv(main_folder + 'test.csv')
test_meta['img_fnm'] = test_meta.Id.apply(lambda s: test_image_folder + '/' + s + '.jpg')

if train_mode:
    train_image_folder = main_folder + 'train'
    train_meta = pd.read_csv(main_folder + 'train.csv')
    train_meta['img_fnm'] = train_meta.Id.apply(lambda s: train_image_folder + '/' + s + '.jpg')

In [None]:
#split the data to find the best setting, for final model I will use the whole train dataset
# train_meta, valid_meta = train_test_split(train_meta, test_size = 0.2, random_state=12345)

In [None]:
target_size = 299
if train_mode:
    inceptionv3_pretrained = InceptionV3(input_shape = (target_size, target_size, 3) , 
                                         include_top = False, 
                                         weights = 'imagenet')

    model = models.Sequential()
    model.add(inceptionv3_pretrained)
    model.add(layers.Flatten())
    model.add(Dropout(0.25))
    model.add(layers.Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(layers.Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(layers.Dense(1))

    model.compile(loss='mse', optimizer=Adam(learning_rate=2e-5), metrics=['mse'])
    model.summary()

In [None]:
# Generator for 2 input
if train_mode:
    def gen_flow_for_two_inputs(datagen, batch, x_train, shuffle=True):
        """
        Args:
            datagen(image.ImageDataGenerator): data generator
            batch(int): batch size 
            x_train: dataframe for input img and metadata
            y_train(np.ndarray): label array for output 
            shuffle(bool): bool to shuffle data
        """
        # Pass index to the 2nd parameter instead of labels
        x_train_2 = x_train.set_index('Id')
        batch = datagen.flow_from_dataframe(x_train, batch_size=batch, shuffle=shuffle, 
                                            x_col='img_fnm', y_col='Id', class_mode = 'raw',
                                            target_size=(target_size, target_size))
        while True:
            batch_image, batch_index = batch.next()
            yield batch_image, x_train_2.loc[batch_index, 'Pawpularity'].values

    # Add our data-augmentation parameters to ImageDataGenerator
    train_datagen = ImageDataGenerator(rescale = 1./255.,
                                       rotation_range = 30,
                                       width_shift_range = 0.2,
                                       height_shift_range = 0.2,
                                       shear_range = 0.1,
                                       zoom_range = 0.3,
                                       horizontal_flip = True)

    # val_datagen = ImageDataGenerator(rescale = 1./255.)

    EPOCH = 2
    BATCH = 32

    early_stopping =  EarlyStopping(
                                monitor='val_loss',
                                min_delta=1.0,
                                patience=50,
    )

    log = model.fit(
        x = gen_flow_for_two_inputs(train_datagen, BATCH, train_meta),
        steps_per_epoch = np.ceil(train_meta.shape[0] / BATCH),
    #     validation_data = gen_flow_for_two_inputs(val_datagen, BATCH, valid_meta),
    #     validation_steps = np.ceil(valid_meta.shape[0] / BATCH),
        epochs = EPOCH,
        callbacks=[early_stopping]
        )

In [None]:
if train_mode:
    model.optimizer.learning_rate = 0.5e-6
    log = model.fit(
        x = gen_flow_for_two_inputs(train_datagen, BATCH, train_meta),
        steps_per_epoch = np.ceil(train_meta.shape[0] / BATCH),
    #     validation_data = gen_flow_for_two_inputs(val_datagen, BATCH, valid_meta),
    #     validation_steps = np.ceil(valid_meta.shape[0] / BATCH),
        epochs = 2,
        callbacks=[early_stopping]
        )

In [None]:
if train_mode:
    model.save('InceptionV3_2_64FC_191121.h5')

In [None]:
if not train_mode:
    model = load_model('../input/petfinder-inceptionv3-finetuned/InceptionV3_2_64FC_191121.h5')

# Prediction

In [None]:
if not train_mode:
    #prediction
    test_datagen = ImageDataGenerator(rescale=1/255)

    BATCH = 32

    # Generator for 2 input
    def gen_flow_for_two_inputs_test(datagen, batch, x_train, shuffle=True):
        """
        Args:
            datagen(image.ImageDataGenerator): data generator
            batch(int): batch size 
            x_train: dataframe for input img and metadata
            y_train(np.ndarray): label array for output 
            shuffle(bool): bool to shuffle data
        """
        # Pass index to the 2nd parameter instead of labels
        x_train_2 = x_train.set_index('Id')
        batch = datagen.flow_from_dataframe(x_train, batch_size=batch, shuffle=shuffle, 
                                            x_col='img_fnm', y_col='Id', class_mode = 'raw',
                                            target_size=(target_size, target_size))
        while True:
            batch_image, batch_index = batch.next()
            # Use index values for text(x_text) and labels(y_train)
            yield batch_image, np.zeros(1)

    pred = model.predict_generator(
        generator = gen_flow_for_two_inputs_test(test_datagen, BATCH, test_meta, shuffle=False), verbose= 1,
        steps = np.ceil(test_meta.shape[0] / BATCH)
        )

In [None]:
if not train_mode:
    #save the submission file
    test_meta['Pawpularity'] = pred 
    submission_df = test_meta[['Id','Pawpularity']]
    submission_df.to_csv("submission.csv", index=False)
    submission_df