In [None]:
#import modules
import os
import pandas as pd
import numpy as np
import seaborn as sns
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from tensorflow import keras
from tensorflow.keras.layers import Dense,concatenate, Activation,Add, Dropout, MaxPooling2D, Conv2D, Flatten, Input, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras import regularizers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.python.keras.utils.vis_utils import plot_model
from keras.callbacks import EarlyStopping
from tensorflow.keras.applications.resnet50 import ResNet50

In [None]:
#check gpu
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

In [None]:
comp_path = '../input/petfinder-pawpularity-score'
train_csv_path = '../input/petfinder-pawpularity-score/train.csv'
test_csv_path = '../input/petfinder-pawpularity-score/test.csv'

In [None]:
#read meta data
train_meta = pd.read_csv(train_csv_path)
train_meta.head()

In [None]:
#function for make full path
def make_full_path_train(id):
    return os.path.join(comp_path, 'train', id+'.jpg')
train_meta['Id2'] = train_meta['Id'].apply(make_full_path_train)
train_meta.head()

In [None]:
#split the data
train_meta, valid_meta = train_test_split(train_meta, test_size = 0.2, random_state=0)

In [None]:
# Generator for 2 input
def gen_flow_for_two_inputs(datagen, batch, x_train, shuffle=True):
    """
    Args:
        datagen(image.ImageDataGenerator): data generator
        batch(int): batch size 
        x_train: dataframe for input img and metadata
        y_train(np.ndarray): label array for output 
        shuffle(bool): bool to shuffle data
    """
    # Pass index to the 2nd parameter instead of labels
    x_train_2 = x_train.set_index('Id')
    batch = datagen.flow_from_dataframe(x_train, batch_size=batch, shuffle=shuffle, 
                                        x_col='Id2', y_col='Id', class_mode = 'raw',
                                        target_size=(224, 224))
    while True:
        batch_image, batch_index = batch.next()
        yield [batch_image, 
               x_train_2.loc[batch_index, 
                           ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 
                            'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']].values], x_train_2.loc[batch_index, 'Pawpularity'].values

In [None]:
'''
Definition of Model
Image : Resnet50
Meta data : NN
'''
input1 = Input(shape=(224, 224, 3))
input2 = Input(shape=(12,))
ResNet = ResNet50(include_top=False, weights='imagenet',input_tensor=input1)
x1 = ResNet.output
x1 = GlobalAveragePooling2D()(x1)
x1 = Model(inputs=input1, outputs=x1)

x2 = Model(inputs=input2, outputs=input2)

combined = concatenate([x1.output, x2.output])

z = Dense(128)(combined)
z = BatchNormalization()(z)
z = Activation('relu')(z)
z = Dense(1)(z)

model = Model(inputs=[ResNet.input, x2.input], outputs=z)
model.compile(loss='mse', optimizer='adam', metrics=['mse'])
model.summary()

In [None]:
plot_model(model)

In [None]:
train_datagen = image.ImageDataGenerator(
    rescale=1/255,
    rotation_range = 10,
    width_shift_range = 0.1,
    height_shift_range = 0.1,
    shear_range = 0.1,
    zoom_range = 0.2,
    horizontal_flip = True
    )

val_datagen = image.ImageDataGenerator(
    rescale=1/255,
    )
EPOCH = 2
BATCH = 64

early_stopping =  EarlyStopping(
                            monitor='val_loss',
                            min_delta=5.0,
                            patience=20,
)

log = model.fit(
    x = gen_flow_for_two_inputs(train_datagen, BATCH, train_meta),
    steps_per_epoch = np.ceil(train_meta.shape[0] / BATCH),
    validation_data = gen_flow_for_two_inputs(val_datagen, BATCH, valid_meta),
    validation_steps = np.ceil(valid_meta.shape[0] / BATCH),
    epochs = EPOCH,
    callbacks=[early_stopping]
    )

In [None]:
#save the trainend model
model.save('resnet_multi_3.h5')

In [None]:
# show history
plt.plot(log.history['loss'], label='loss')
plt.plot(log.history['val_loss'], label='val_loss')
plt.legend(frameon=False) 
plt.xlabel("epochs")
plt.ylabel("mse")
plt.show()

In [None]:
#prediction
test_datagen = image.ImageDataGenerator(
    rescale=1/255,
    )

BATCH = 32

test_meta = pd.read_csv(test_csv_path)
def make_full_path_test(id):
    return os.path.join(comp_path, 'test', id+'.jpg')
test_meta['Id2'] = test_meta['Id'].apply(make_full_path_test)

# Generator for 2 input
def gen_flow_for_two_inputs_test(datagen, batch, x_train, shuffle=True):
    """
    Args:
        datagen(image.ImageDataGenerator): data generator
        batch(int): batch size 
        x_train: dataframe for input img and metadata
        y_train(np.ndarray): label array for output 
        shuffle(bool): bool to shuffle data
    """
    # Pass index to the 2nd parameter instead of labels
    x_train_2 = x_train.set_index('Id')
    batch = datagen.flow_from_dataframe(x_train, batch_size=batch, shuffle=shuffle, 
                                        x_col='Id2', y_col='Id', class_mode = 'raw',
                                        target_size=(224, 224))
    while True:
        batch_image, batch_index = batch.next()
        # Use index values for text(x_text) and labels(y_train)
        yield [batch_image, 
               x_train_2.loc[batch_index, 
                           ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 
                            'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']].values], np.zeros(1)

pred = model.predict_generator(
    generator = gen_flow_for_two_inputs_test(test_datagen, BATCH, test_meta, shuffle=False), verbose= 1,
    steps = np.ceil(test_meta.shape[0] / BATCH)
    )

In [None]:
#save the submission file
test_meta['Pawpularity'] = pred 
submission_df = test_meta[['Id','Pawpularity']]
submission_df.to_csv("submission.csv", index=False)
submission_df.head()