In [None]:
from tensorflow.keras.utils import Sequence, to_categorical
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout, Input, Concatenate, AveragePooling2D, GlobalAveragePooling2D, BatchNormalization
import cv2
import numpy as np
import math
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tqdm import tqdm
import os
from keras.callbacks import EarlyStopping

In [None]:
TRAIN_DIR = "../input/petfinder-pawpularity-score/train"
TEST_DIR = "../input/petfinder-pawpularity-score/test"
IMG_WIDTH = 456
IMG_HEIGHT = 456
CHANNELS = 3
BATCH_SIZE = 32
DENSE_FEATURES = [
    'Subject Focus',
    'Eyes',
    'Face',
    'Near',
    'Action',
    'Accessory',
    'Group',
    'Collage',
    'Human',
    'Occlusion',
    'Info',
    'Blur',
]
INCLUDE_DENSE_FEATURES = False
PATIENCE = 3

In [None]:
import pandas as pd

df_train = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
df_test = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
df_train.head()

In [None]:
import matplotlib.pyplot as plt

def get_image_size_distribution():
    data = {"height": [], "width": []}
    for file in tqdm(os.listdir(TRAIN_DIR)):
        path = TRAIN_DIR + "/" + file
        image = cv2.imread(path)
        height, width, channels = image.shape
        data["height"].append(height)
        data["width"].append(width)
    return data

sizes = get_image_size_distribution()
df_distrib = pd.DataFrame(sizes)
fig = plt.figure(figsize=(10,10))
ax = fig.gca()
df_distrib.hist(ax=ax)

In [None]:
import math
from random import shuffle
import numpy as np

class PawpularDataset(Sequence):
    def __init__(self, df, subset, shuffle):
        self.subset = subset
        if subset in ["train", "validation"]:
            self.y = df.Pawpularity.tolist()
        elif subset == "test":
            self.y = []
        self.df = df
        self.ids = df.Id.tolist()
        self.batch_size = BATCH_SIZE
        self.shuffle = shuffle
        self.on_epoch_end()
        
    def __len__(self):
        return math.ceil(len(self.ids)/self.batch_size)

    def shuffle_samples(self):
        samples = list(zip(self.ids, self.y))
        shuffle(samples)
        self.ids, self.y = zip(*samples)
    
    def __getitem__(self, index):
        X = self.ids[index * self.batch_size: (index + 1) * self.batch_size]
        y = self.y[index * self.batch_size: (index + 1) * self.batch_size]

        return self.process_x_and_y(X, y)
    
    def load_image(self, path):
        return img_to_array(load_img(path, target_size=(IMG_HEIGHT, IMG_WIDTH)))
    
    def load_images(self, ids):
        if self.subset in ["train", "validation"]:
            paths = [f"{TRAIN_DIR}/{id_}.jpg" for id_ in ids]
        elif self.subset == "test":
            paths = [f"{TEST_DIR}/{id_}.jpg" for id_ in ids]
            
        return np.asarray([self.load_image(path) for path in paths])
                    
    def load_dense_features(self, ids):
        features = self.df[self.df.Id.isin(ids)]
        features = features[DENSE_FEATURES]
        
        return np.array(features.values).astype("float32")
    
    def process_x_and_y(self, X, y):
        images = self.load_images(X)
        if INCLUDE_DENSE_FEATURES:
            dense_features = self.load_dense_features(X)
            X = [images, dense_features]
        else:
            X = images
        if self.subset in ["train", "validation"]:
            y = tf.convert_to_tensor(y)
            return X, y
        elif self.subset == "test":
            return (X,)
            
    def on_epoch_end(self):
        if self.shuffle and self.subset == "train":
            self.shuffle_samples()

In [None]:
from sklearn.model_selection import train_test_split

train, validation = train_test_split(df_train, test_size=0.15)

train = PawpularDataset(shuffle=True, df=train, subset="train")
validation = PawpularDataset(shuffle=False, df=validation, subset="validation")
test = PawpularDataset(shuffle=False, df=df_test, subset="test")

In [None]:
from tensorflow.keras.applications import EfficientNetB0, EfficientNetB5, EfficientNetB7

input_picture = Input(shape=(IMG_WIDTH, IMG_HEIGHT, CHANNELS))

model = EfficientNetB5(
    weights='../input/effnetsb0-b7-notops/efficientnetb5_notop.h5', 
    include_top=False, 
    input_tensor=input_picture)
model.trainable = False

x = GlobalAveragePooling2D()(model.output)
x = BatchNormalization()(x)
x = Dropout(0.1)(x)
output = Dense(64)(x)
output = Dense(1)(output)
model = Model(inputs=input_picture, outputs=output)

In [None]:
opt = keras.optimizers.Adam(learning_rate=1e-2)
model.compile(
    optimizer=opt,
    loss=keras.losses.MeanSquaredError(),
    metrics=[keras.metrics.RootMeanSquaredError()]
)

In [None]:
model.fit(
    train,
    validation_data=validation,
    epochs=15,
    verbose=2,
    callbacks = [
        EarlyStopping('val_loss', mode='auto', patience=PATIENCE, restore_best_weights=True)],
)

In [None]:
def unfreeze_model(model):
    for layer in model.layers[-20:]:
        if not isinstance(layer, BatchNormalization):
            layer.trainable = True

    opt = keras.optimizers.Adam(learning_rate=1e-4)
    model.compile(
        optimizer=opt,
        loss=keras.losses.MeanSquaredError(),
        metrics=[keras.metrics.RootMeanSquaredError()]
    )

In [None]:
unfreeze_model(model)
model.fit(
    train,
    validation_data=validation,
    epochs=15,
    verbose=2,
    callbacks = [
        EarlyStopping('val_loss', mode='auto', patience=PATIENCE, restore_best_weights=True)],
)

In [None]:
predictions = model.predict(test)
df_test["Pawpularity"] = predictions
df_test = df_test[["Id", "Pawpularity"]]
df_test.to_csv("submission.csv", index=False)