In [None]:
import numpy
import pandas

from matplotlib import pyplot
%matplotlib inline
import seaborn

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import tensorflow
import keras
from tensorflow.keras import layers, models, metrics, callbacks

RANDOM_STATE = 20

In [None]:
ROOT = '/kaggle/input/petfinder-pawpularity-score/'

def read_csv(file):
    return pandas.read_csv(ROOT + file + '.csv', sep=',')

In [None]:
df = read_csv('train')

In [None]:
signs = [
    'Subject Focus',
    'Eyes',
    'Face',
    'Near',
    'Action',
    'Accessory',
    'Group',
    'Collage',
    'Human',
    'Occlusion',
    'Info',
    'Blur',
]
for s in signs:
    seaborn.displot(df.loc[df[s] == 1]['Pawpularity'])
    
#seaborn.displot(numpy.log(df['Pawpularity']))
#seaborn.displot(numpy.log(df['Pawpularity']) / numpy.log(100))

#display(df['Pawpularity'].describe())

In [None]:
def sign_neurons():
    model = models.Sequential()
    model.add(layers.Dense(32, input_shape=(12,)))
    model.add(layers.Activation('sigmoid'))  
    model.add(layers.Dense(32))
    model.add(layers.Activation('sigmoid'))  
    
    model.add(layers.Dropout(0.1))
    
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    model.compile(optimizer='adam', loss='mse', metrics=[metrics.RootMeanSquaredError(name='rmse')])
    return model

cf = df.copy()
cf = cf.drop('Id', axis=1)
y = numpy.log(cf['Pawpularity']) / numpy.log(100)
cf = cf.drop('Pawpularity', axis=1)

train_cf, test_cf, train_y, test_y = train_test_split(cf, y, test_size=0.2, random_state=RANDOM_STATE)

stopper = callbacks.EarlyStopping(monitor='val_rmse', patience=10)
signNeurons = sign_neurons()
history = signNeurons.fit(train_cf, train_y, validation_data=(test_cf, test_y), epochs=200, batch_size=256, callbacks=[stopper])

In [None]:
def image_neurons():
    model = models.Sequential()
    n = 32
    i = 128
    
    model.add(layers.Conv2D(n, (3, 3), input_shape=(i, i, 3), padding='valid'))
    model.add(layers.Activation('relu'))
    model.add(layers.Conv2D(n, (3, 3), padding='valid'))
    model.add(layers.Activation('relu'))
    
    model.add(layers.MaxPooling2D())
    n *= 2
    
    model.add(layers.Conv2D(n, (3, 3), padding='valid'))
    model.add(layers.Activation('relu'))
    model.add(layers.Conv2D(n, (3, 3), padding='valid'))
    model.add(layers.Activation('relu'))    
    
    model.add(layers.MaxPooling2D())
    n *= 2
    
    model.add(layers.Conv2D(n, (3, 3), padding='valid'))
    model.add(layers.Activation('relu'))
    model.add(layers.Conv2D(n, (3, 3), padding='valid'))
    model.add(layers.Activation('relu'))    
    
    model.add(layers.MaxPooling2D())
    n *= 2
    
    model.add(layers.Flatten())
    model.add(layers.Dense(n))
    model.add(layers.Activation('relu'))
    
    model.add(layers.BatchNormalization())
    
    model.add(layers.Dropout(0.4))
    
    model.add(layers.Dense(1))
    model.add(layers.Activation('sigmoid'))
    
    model.compile(optimizer='adam', loss='mse', metrics=[metrics.RootMeanSquaredError(name='rmse')])
    return model

stopper = callbacks.EarlyStopping(monitor='val_rmse', patience=40)

In [None]:
imgs = []
y = []
for _, r in df.iterrows():
    imgs.append(tensorflow.image.resize(tensorflow.io.decode_jpeg(tensorflow.io.read_file(ROOT + 'train/' + r['Id'] + '.jpg'), channels=3), [128, 128]))
    y.append(numpy.log(r['Pawpularity']) / numpy.log(100))

imgs = numpy.array(imgs)
y = numpy.array(y)

In [None]:
imageNeurons = image_neurons()

train_images, test_images, train_y, test_y = train_test_split(imgs, y, test_size=0.2, random_state=RANDOM_STATE)

stopper = callbacks.EarlyStopping(monitor='val_rmse', patience=5)
history = imageNeurons.fit(train_images, train_y, validation_data=(test_images, test_y), epochs=20, batch_size=256, callbacks=[stopper])

In [None]:
pyplot.plot(history.history['rmse'])
pyplot.plot(history.history['val_rmse'])
pyplot.ylabel('RMSE')
pyplot.xlabel('Epoch')
pyplot.legend(['training', 'validation'], loc='best')
pyplot.show()

In [None]:
print(numpy.sqrt(mean_squared_error(test_y, (numpy.exp(imageNeurons.predict(test_images) * numpy.log(100)) + numpy.exp(signNeurons.predict(test_cf) * numpy.log(100))) / 2)))
print(numpy.sqrt(mean_squared_error(test_y, numpy.exp(imageNeurons.predict(test_images) * numpy.log(100)))))
print(numpy.sqrt(mean_squared_error(test_y, numpy.exp(signNeurons.predict(test_cf) * numpy.log(100)))))