In [None]:
import pandas as pd
data = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/train.csv", sep=',')
data['Id'] = data['Id'] + '.jpg'
data = data.rename(columns={'Id': 'filename'})
data = data.drop(columns=['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'])
data

Copying dataset files to form a classification directories structure

In [None]:
dataset_dir = '/kaggle/input/petfinder-pawpularity-score/train'
width, height = 512, 512
batch_size = 32
nb_classes = 5

import os
path = "/kaggle/working/data"
if not os.path.exists(path):
    os.mkdir(path)

from shutil import copyfile
for i in range(nb_classes):
    mini = i * int(100 / nb_classes)
    maxi = (i + 1) * int(100 / nb_classes)
    class_data = data[(data['Pawpularity'] > mini) & (data['Pawpularity'] <= maxi)]
    class_dir = os.path.join(path, str(mini) + '-' + str(maxi))
    os.mkdir(class_dir)
    for filename in class_data['filename']:
        src = os.path.join(dataset_dir, filename)
        dst = os.path.join(class_dir, filename)
        copyfile(src, dst)

In [None]:
from tensorflow.keras.utils import image_dataset_from_directory
train_ds = image_dataset_from_directory(
  path,
  validation_split=0.2,
  subset="training",
  seed=99,
  image_size=(height, width),
  batch_size=batch_size)
val_ds = image_dataset_from_directory(
  path,
  validation_split=0.2,
  subset="validation",
  seed=99,
  image_size=(height, width),
  batch_size=batch_size)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
early_stopping_callback = EarlyStopping(
    monitor="accuracy",
    min_delta=1,  # sous les 1% de mieux, on patiente
    patience=10,  # on patiente max 10 epochs
    verbose=2,
    mode="min",
    restore_best_weights=True
)
model_checkpoint_callback = ModelCheckpoint(
    'efficientNet_reg.h5',
    monitor="accuracy",
    verbose=0,
    save_best_only=True,
    mode="min",
    save_freq="epoch"
)
callbacks = [early_stopping_callback, model_checkpoint_callback]

In [None]:
import sys
import os
sys.path.insert(0, "/kaggle/input/efnetv2src/efficientnet-v2-keras-main")
sys.path.append('../input/tfkeras-efficientnetsv2/')
from efficientnet_v2 import EfficientNetV2XL

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import RandomFlip, RandomRotation, RandomZoom, RandomTranslation, RandomContrast
data_augmentation = Sequential([
  RandomFlip("horizontal", input_shape=(height, width, 3)),
  RandomRotation(factor=0.4, fill_mode="wrap"),
  RandomZoom(0.2),
  RandomTranslation(height_factor=0.2, width_factor=0.2, fill_mode="wrap"),
  RandomContrast(factor=0.2)
])

In [None]:
from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, BatchNormalization, Dropout
from tensorflow.keras.metrics import MeanAbsoluteError, MeanAbsolutePercentageError
from tensorflow.keras.losses import SparseCategoricalCrossentropy
import tensorflow_addons as tfa

efficientnet = EfficientNetV2XL(
    include_top=False,
    weights='../input/tfkeras-efficientnetsv2/21_ft1k_notop/efficientnetv2-xl-21k-ft1k_notop.h5', 
    input_shape=(height, width, 3),
    classes=nb_classes
)
efficientnet.trainable = False

inputs = Input(shape=(height, width, 3))
x = data_augmentation(inputs)
x = efficientnet(x, training=False)
x = GlobalAveragePooling2D()(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
class1 = Dense(1920, activation='relu')(x)
outputs = Dense(nb_classes, activation='softmax')(class1)
efficientnet = Model(inputs, outputs)

radam = tfa.optimizers.RectifiedAdam(learning_rate=0.01)
optimizer = tfa.optimizers.Lookahead(radam, sync_period=6, slow_step_size=0.5)

efficientnet.compile(optimizer=optimizer,
              loss=SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

In [None]:
epochs = 100
history_efficientnet = efficientnet.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs,
    callbacks=callbacks
)

In [None]:
history_efficientnet.history

In [None]:
import matplotlib.pyplot as plt

dict1 = {
    "accuracy": history_efficientnet.history["accuracy"],
    "type": "training"
}
dict2 = {
    "accuracy": history_efficientnet.history["accuracy"],
    "type": "validation"
}
s1 = pd.DataFrame(dict1)
s2 = pd.DataFrame(dict2)
df = pd.concat([s1, s2], axis=0).reset_index()
import seaborn as sns
grid = sns.relplot(
    data=df,
    x=df["index"],
    y="accuracy",
    col="type",
    kind="line"
)
for ax in grid.axes.flat:
    ax.set(xlabel="Epoch")
plt.show()

In [None]:
import tensorflow as tf
def preprocess(image):
    return (tf.cast(image, dtype=tf.float32) - 128.00) / 128.00

In [None]:
train_ds.class_names

In [None]:
import os
import numpy as np
from PIL import Image
test_dir = '/kaggle/input/petfinder-pawpularity-score/test'
ids = []
pawpularities = []
for test_image in os.listdir(test_dir):
    image_path = os.path.join(test_dir, test_image)
    id_image = test_image.split('.')[0]
    ids.append(id_image)
    img = Image.open(image_path) 
    img = img.resize((width, height))
    img = preprocess(np.array(img).reshape(1, width, height, 3))
    preds = efficientnet.predict(img)
    top_indice = preds[0].argsort()[-1]
    result = train_ds.class_names[top_indice]
    pawpularities.append(result)

In [None]:
data = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/train.csv", sep=',')
X = data.drop(columns=['Id', 'Pawpularity'])
y = data['Pawpularity']
from sklearn.model_selection import train_test_split, GridSearchCV, ParameterGrid
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, 
                                                    random_state=1)
from catboost import CatBoostRegressor
import time
start = time.time()

cb = CatBoostRegressor(depth=5,
                       learning_rate=0.01,
                       n_estimators=50,
                       loss_function='RMSE',
                       task_type='CPU',
                       verbose=False)

cb.fit(X_train, y_train)
y_pred = cb.predict(X_test)

import numpy as np
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error as mse

cb_rmse = np.sqrt(mse(y_test, y_pred))
print("RMSE for CatBoost: ", np.mean(cb_rmse))

end = time.time()
diff = end - start
print('Execution time for CatBoost (in Seconds):', diff)

print("r2_score : {:.2f}".format(r2_score(y_test, y_pred)))

In [None]:
sub_df = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/test.csv", sep=',')
X_sub = sub_df.drop(columns=['Id'])

test_dir = '/kaggle/input/petfinder-pawpularity-score/test'
ids = []
pawpularities = []
for test_image in os.listdir(test_dir):
    image_path = os.path.join(test_dir, test_image)
    id_image = test_image.split('.')[0]
    ids.append(id_image)
    img = Image.open(image_path) 
    img = img.resize((width, height))
    img = preprocess(np.array(img).reshape(1, width, height, 3))
    preds = efficientnet.predict(img)
    top_indice = preds[0].argsort()[-1]
    result = train_ds.class_names[top_indice]
    pawpularities.append(result)

img_dict = {
    'Id': ids,
    'Pawpularity': pawpularities
}
img_df = pd.DataFrame(img_dict)
print(img_df)

print("shape X_sub: ", X_sub.shape)
y_sub = cb.predict(X_sub)
y_sub = [round(x) for x in y_sub]
sub_df['Pawpularity'] = y_sub
sub_df = sub_df.drop(columns=['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'])

In [None]:
print(sub_df)
for id_img in sub_df['Id']:
    result = img_df[img_df['Id'] == id_img]['Pawpularity']
    result = result.values[0]
    mini = int(result.split('-')[0])
    maxi = int(result.split('-')[1])
    p = sub_df[sub_df['Id'] == id_img]['Pawpularity'].values[0]
    print(p, mini, maxi)
    if p < mini:
        p = int(round(p + (mini - p) / 2))
    if p > maxi:
        p = int(round(p - (p - maxi) / 2))
    print("=>", p)
    sub_df.at[sub_df['Id'] == id_img, 'Pawpularity'] = p
    
print(sub_df)
sub_df.to_csv('submission.csv', index=False, sep=',')

In [None]:
import shutil
import os

os.remove('efficientNet_reg.h5')
shutil.rmtree("/kaggle/working/data")
shutil.rmtree("catboost_info")