## Import

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
# import matplotlib.image as Image
import os

import tensorflow as tf
from tensorflow import keras

In [None]:
direcory_train = '../input/petfinder-pawpularity-score/train/'
directory_test = '../input/petfinder-pawpularity-score/test/'

csv_train = '../input/petfinder-pawpularity-score/train.csv'
csv_test = '../input/petfinder-pawpularity-score/test.csv'

In [None]:
data = pd.read_csv(csv_train)
data.head()

## EDA

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
fig = plt.figure(figsize=(20,5))
arr, bins, patches = plt.hist(data.Pawpularity, bins = 199)
plt.xticks(range(1,101), rotation=90)
plt.show()

### View some images with low and high pawpularity

In [None]:
pawpularity_0 = data[data.Pawpularity <= 5]
pawpularity_30 = data[(data.Pawpularity >= 25) & (data.Pawpularity <= 35)]
pawpularity_100 = data[data.Pawpularity >= 95]

In [None]:
def show_images(paws):
    fig = plt.figure(figsize=(10,10), constrained_layout=True)
    grids = fig.add_gridspec(3,3)
    
    for i in range(3):
        for j in range(3):
            img = Image.open(direcory_train + paws.Id.iloc[i*3 + j] + '.jpg')
            ax = fig.add_subplot(grids[i,j])
            ax.imshow(img)
    
    plt.show()

In [None]:
print('Pawpularity <= 5')
show_images(pawpularity_0)

In [None]:
print('Pawpularity >= 25 and <=35')
show_images(pawpularity_30)

In [None]:
print('Pawpularity >= 95')
show_images(pawpularity_100)

## Convert images to nparray

In [None]:
data_np = []
target = data.Pawpularity

img_shape = (250,250,3)

In [None]:
def make_nparray(data, directory):
    im_array = np.zeros((data.shape[0], img_shape[0], img_shape[1], 3), dtype=np.uint8)
    
    for i in range(data.shape[0]):
        img = Image.open(directory + data.Id.iloc[i] + '.jpg')
        img = img.resize((img_shape[0], img_shape[1]))
        im_array[i] = np.array(img, dtype=np.uint8)
    
    return im_array

In [None]:
# Run this on kaggle notebook
data_np = make_nparray(data, direcory_train)

In [None]:
# No need to run this on local machine. simply save and load from local machine!

def load_data():
    global data_np
    
    import pickle

    # data_file = open('data_file.pkl', 'wb')
    # pickle.dump(data_np, data_file)
    # data_file.close()

    data_file = open('data_file.pkl', 'rb')
    data_np = pickle.load(data_file)
    data_file.close()

# load_data()

## Train

In [None]:
start_lr = 0.000625
min_lr = 0.00001
max_lr = 0.001
rampup_epochs = 5
sustain_epochs = 0
exp_decay = .8

def lrfn(epoch):
    if epoch < rampup_epochs:
        lr = (max_lr - start_lr)/rampup_epochs * epoch + start_lr
        return lr
    elif epoch < rampup_epochs + sustain_epochs:
        return max_lr
    else:
        lr = (max_lr - min_lr) * exp_decay**(epoch-rampup_epochs-sustain_epochs) + min_lr
        return lr
    
lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=True)

rang = np.arange(15)
y = [lrfn(x) for x in rang]
plt.plot(rang, y)
print('Learning rate per epoch:')

In [None]:
def make_model_with_metadata(lr, epoch, meta):
    
    mobileNet = keras.applications.MobileNetV2(input_shape=img_shape,
                                               include_top=False,
                                               weights=None
                                              )
    meta_in = keras.layers.Input((12,))
    X = keras.layers.GlobalAveragePooling2D()(mobileNet.output)
    X = keras.layers.BatchNormalization()(X)
    X = keras.layers.Flatten()(X)
    X = keras.layers.Concatenate()([X, meta_in])
    X = keras.layers.Dense(100)(X)
    X = keras.layers.BatchNormalization()(X)
    X = keras.layers.Dense(1)(X)
    
    opt = keras.optimizers.Adam(lr)
    model = keras.models.Model(inputs = [mobileNet.input, meta_in], outputs = X)
    
    model.compile(optimizer=opt, loss='mean_squared_error', metrics= ['mean_squared_error'])
    
    return model

In [None]:
meta_data = data.loc[:, 'Subject Focus':'Blur']
meta_data_np = np.array(meta_data)

In [None]:
meta_model = make_model_with_metadata(0.0001, 1, meta_data_np)
meta_model.summary()

In [None]:
es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

In [None]:
meta_model.fit(x=[data_np, meta_data_np],
               y=target, batch_size=12,
               epochs=20,
               validation_split=0.15,
               workers=6,
               callbacks=[lr_callback, es_callback])

del meta_data_np
del data_np

## Load Test

In [None]:
test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

In [None]:
test_np = make_nparray(test, directory_test)

In [None]:
test_meta = test.loc[:, 'Subject Focus':]
test_meta = np.array(test_meta)

## Predict

In [None]:
predictions = meta_model.predict([test_np, test_meta])
submission = pd.DataFrame(predictions, columns=['Pawpularity'])
submission['Id'] = test.Id
submission.head()

## Output

In [None]:
submission.to_csv('submission.csv', index=False)

In [None]:
# from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())