In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
input_path = Path('/kaggle/input/petfinder-pawpularity-score/')
list(input_path.iterdir())

In [None]:
# sample submission
pd.read_csv(input_path/'sample_submission.csv').head()

In [None]:
train_df = pd.read_csv(input_path/'train.csv')
test_df = pd.read_csv(input_path/'test.csv')
train_imgs = input_path/'train'
test_imgs = input_path/'test'

train_df.shape, test_df.shape

In [None]:
train_df.head()

In [None]:
# sample images check
fig, ax = plt.subplots(1, 4, sharex=True, figsize=(14, 6))

random_train_imgs = train_df.sample(4)
for enum, (key, img_id) in enumerate(random_train_imgs.iterrows()):
    im_read = plt.imread(train_imgs/(img_id.Id+'.jpg'))
    ax[enum].imshow(im_read)
    ax[enum].set_title(img_id.Pawpularity)
plt.tight_layout()
plt.show()

In [None]:
# calculate height and width of images
from PIL import Image
img = Image.open(train_imgs/(train_df.loc[0, 'Id']+'.jpg'))
img.size

In [None]:
train_df['width'], train_df['height'] = zip(*train_df.Id.apply(lambda x: Image.open(train_imgs/(x+'.jpg')).size))

In [None]:
train_df['img_path'] = train_df.Id.apply(lambda x: (train_imgs/(x+'.jpg')).as_posix())
test_df['img_path'] = test_df.Id.apply(lambda x: (test_imgs/(x+'.jpg')).as_posix())
train_df.head()

In [None]:
print(f'Avg width: {train_df.width.mean()}, Avg height: {train_df.height.mean()}')

## Model building

In [None]:
import tensorflow as tf
tf.__version__

In [None]:
from tensorflow import keras
from keras.layers import Dense, Conv2D, MaxPooling2D, Flatten
from keras.preprocessing.image import ImageDataGenerator

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train_df.drop('Pawpularity', axis=1), train_df.Pawpularity, test_size=0.2,
                                                    shuffle=True, random_state=13)
X_train.shape, X_test.shape

In [None]:
train_df['Pawpularity_score'] = train_df.Pawpularity.apply(lambda x: x/100)
train_df.Pawpularity_score.head()

In [None]:
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2,
                                   rotation_range=20,
                                   width_shift_range=20,
                                   height_shift_range=20,
                                   zoom_range=20,
                                   horizontal_flip=True)
train_generator = train_datagen.flow_from_dataframe(dataframe=train_df,
                                                    directory=train_imgs,
                                                    x_col='img_path',
                                                    y_col='Pawpularity_score',
                                                    target_size=(128, 128),
                                                    batch_size=32, seed=13,
                                                    class_mode='raw')

In [None]:
model = keras.models.Sequential([
    Conv2D(64, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='RMSProp', loss='mse', metrics=['mse'])

model.summary()

In [None]:
model.fit(train_generator, epochs=5)

In [None]:
from keras.preprocessing import image

def predict_score(img_path):
    img = image.load_img(img_path, target_size=(128, 128))
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    return model.predict(img)[0][0]*100

In [None]:
test_df['Pawpularity'] = test_df.img_path.apply(lambda x: predict_score(x))
test_df.head()

In [None]:
test_df.loc[:, ['Id', 'Pawpularity']].to_csv('submission.csv', index=False)