In [None]:
import pandas as pd
data = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/train.csv", sep=',')
data['Id'] = data['Id'] + '.jpg'
data = data.rename(columns={'Id': 'filename'})
data = data.drop(columns=['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'])
data

In [None]:
#data['Pawpularity'] = round(data['Pawpularity'] / 2) * 2
#data['Pawpularity'] = [int(x) for x in data['Pawpularity']]
#data

In [None]:
from sklearn.model_selection import train_test_split
train, val = train_test_split(data, test_size=0.2, random_state=1)  # val 20%
train, test = train_test_split(train, test_size=0.01, random_state=1)  # test 1%
print("shape train: ", train.shape)
print("shape val: ", val.shape)
print("shape test: ", test.shape)

In [None]:
dataset_dir = '/kaggle/input/petfinder-pawpularity-score/train'
width, height = 512, 512
batch_size = 32

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_generator = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=5,
    width_shift_range=0.1,
    height_shift_range=0.1,
    brightness_range=(0.75, 1),
    shear_range=0.1,
    zoom_range=[0.75, 1],
    horizontal_flip=True,
    validation_split=0.2
)
validation_generator = ImageDataGenerator(
    rescale=1.0 / 255
)
test_generator = ImageDataGenerator(
    rescale=1.0 / 255
)

In [None]:
train_generator = train_generator.flow_from_dataframe(
    dataframe=train,
    directory=dataset_dir,
    x_col="filename",
    y_col="Pawpularity",
    class_mode="raw",  # "raw" pour les regressions
    target_size=(width, height),
    batch_size=batch_size
)
validation_generator = validation_generator.flow_from_dataframe(
    dataframe=val,
    directory=dataset_dir,
    x_col="filename",
    y_col="Pawpularity",
    class_mode="raw",
    target_size=(width, height),
    batch_size=batch_size
)
test_generator = test_generator.flow_from_dataframe(
    dataframe=test,
    directory=dataset_dir,
    x_col="filename",
    y_col="Pawpularity",
    class_mode="raw",
    target_size=(width, height),
    batch_size=batch_size
)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
early_stopping_callback = EarlyStopping(
    monitor="val_mean_absolute_percentage_error",
    min_delta=1,  # sous les 1% de mieux, on patiente
    patience=10,  # on patiente max 10 epochs
    verbose=2,
    mode="min",
    restore_best_weights=True
)
model_checkpoint_callback = ModelCheckpoint(
    'efficientNet_reg.h5',
    monitor="val_mean_absolute_percentage_error",
    verbose=0,
    save_best_only=True,
    mode="min",
    save_freq="epoch"
)
callbacks = [early_stopping_callback, model_checkpoint_callback]

In [None]:
import sys
import os
sys.path.insert(0, "/kaggle/input/efnetv2src/efficientnet-v2-keras-main")
sys.path.append('../input/tfkeras-efficientnetsv2/')
from efficientnet_v2 import EfficientNetV2XL

In [None]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, BatchNormalization, Dropout
from tensorflow.keras.metrics import MeanAbsoluteError, MeanAbsolutePercentageError
import tensorflow_addons as tfa

efficientnet = EfficientNetV2XL(
    include_top=False,
    weights='../input/tfkeras-efficientnetsv2/21_ft1k_notop/efficientnetv2-xl-21k-ft1k_notop.h5', 
    input_shape=(height, width, 3)
)

for layer in efficientnet.layers:
        layer.trainable = False

x = GlobalAveragePooling2D(name="avg_pool")(efficientnet.output)
x = BatchNormalization()(x)
x = Dropout(0.2, name="top_dropout")(x)
outputs = Dense(50, name="pred")(x)

efficientnet = Model(inputs=efficientnet.inputs, outputs=outputs)

radam = tfa.optimizers.RectifiedAdam(learning_rate=0.001)
optimizer = tfa.optimizers.Lookahead(radam, sync_period=6, slow_step_size=0.5)

efficientnet.compile(
    optimizer=optimizer,
    loss="mean_absolute_error",
    metrics=[MeanAbsoluteError(), MeanAbsolutePercentageError()]
)

In [None]:
epochs = 100
batch_size = 8
history_efficientnet = efficientnet.fit(
    train_generator,
    epochs=epochs,
    validation_data=validation_generator,
    callbacks=callbacks,
    workers=6  # guess
)

In [None]:
train["Pawpularity"].mean()

In [None]:
mean_baseline = MeanAbsolutePercentageError()
mean_baseline = mean_baseline(
    val["Pawpularity"], train["Pawpularity"].mean()
).numpy()

In [None]:
dict1 = {
    "MAPE": history_efficientnet.history["mean_absolute_percentage_error"],
    "type": "training"
}
dict2 = {
    "MAPE": history_efficientnet.history["val_mean_absolute_percentage_error"],
    "type": "validation"
}
s1 = pd.DataFrame(dict1)
s2 = pd.DataFrame(dict2)
df = pd.concat([s1, s2], axis=0).reset_index()
import seaborn as sns
grid = sns.relplot(
    data=df,
    x=df["index"],
    y="MAPE",
    col="type",
    kind="line",
    legend=False
)
grid.set(ylim=(20, 100))
for ax in grid.axes.flat:
    ax.axhline(
        y=mean_baseline, color="lightcoral", linestyle="dashed"
    )
    ax.set(xlabel="Epoch")
plt.legend(labels=["efficientNet_reg", "mean_baseline"])
plt.show()

In [None]:
import tensorflow as tf
def preprocess(image):  
    return (tf.cast(image, dtype=tf.float32) - 128.00) / 128.00

In [None]:
import numpy as np
plt.figure(figsize=(10, 12))
batch = next(test_generator)  # renvoie 32 images
for i in range(12):
    ax = plt.subplot(4, 3, i + 1)  # 4x3 grid
    image = batch[0][i, :, :, :]  # i-ème image
    img = preprocess(image.reshape(1, width, height, 3))
    pawpularity = batch[1][i]  # i-ème pawpularity
    preds = efficientnet.predict(img)
    prediction = preds.flatten()[0]
    print('pawpularity=', pawpularity, '\tprediction=', round(prediction, 2))
    diff = prediction - pawpularity
    percentDiff = (diff / pawpularity) * 100
    absPercentDiff = np.abs(percentDiff)
    plt.title("Pawpularity: " + str(pawpularity) +\
              "\npred: " + str(round(prediction, 2)) +\
              " (err=" + str(int(absPercentDiff)) + "%)")
    plt.imshow(image)
    plt.axis("off")
plt.show()
plt.close()

In [None]:
import os
from PIL import Image
test_dir = '/kaggle/input/petfinder-pawpularity-score/test'
ids = []
pawpularities = []
for test_image in os.listdir(test_dir):
    image_path = os.path.join(test_dir, test_image)
    id_image = test_image.split('.')[0]
    ids.append(id_image)
    img = Image.open(image_path) 
    img = img.resize((width, height))
    img = preprocess(np.array(img).reshape(1, width, height, 3))
    preds = efficientnet.predict(img)
    prediction = preds.flatten()[0]
    pawpularities.append(prediction)

submission_dict = {
    'Id': ids,
    'Pawpularity': pawpularities
}
submission_df = pd.DataFrame(submission_dict)
print(submission_df)
submission_df.to_csv('submission.csv', index=False, sep=',')