In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from PIL import Image
from tensorflow import keras
from tensorflow.keras import Input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError



sns.set_theme(style="whitegrid", palette="pastel")


In [None]:
path = Path('../input/petfinder-pawpularity-score')

In [None]:
list(path.glob('*'))

In [None]:
df = pd.read_csv(path / 'train.csv', index_col='Id')

In [None]:
df.head(10)

In [None]:

print(f"Record Count: {df.shape[0]}")
df.head()

In [None]:
fig, ax = plt.subplots(figsize=(14, 5))
sns.histplot(data=df, x='Pawpularity', ax=ax)
ax.set_xlabel(''); ax.set_ylabel('')
ax.set_title('Pawpularity Score Distribution')
plt.show()

In [None]:
sampled = df.sample().index[0]
example_path = path / 'train' / (sampled + '.jpg')
im = Image.open(example_path)
im

In [None]:
train_path = path / 'train'

df.loc[:, 'filename'] = df.index + '.jpg'

IMG_SIZE = 224
RESCALE = 1./255.

datagen = ImageDataGenerator(rescale=RESCALE,validation_split=0.2)

gen_kwargs = dict(
    dataframe=df,
    directory=train_path, 
    x_col='filename',
    y_col='Pawpularity',
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode='raw',
    target_size=(IMG_SIZE, IMG_SIZE)
)
train_generator = datagen.flow_from_dataframe(**gen_kwargs, subset="training")
valid_generator = datagen.flow_from_dataframe(**gen_kwargs, subset="validation")

In [None]:
inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

X = Conv2D(filters=64, kernel_size=3, strides=2, padding='same', activation='relu')(inputs)
X = MaxPool2D(pool_size=(2, 2))(X)

X = Conv2D(filters=128, kernel_size=3, strides=2, padding='same', activation='relu')(X)
X = MaxPool2D(pool_size=(2, 2))(X)

X = Conv2D(filters=256, kernel_size=3, strides=2, padding='same', activation='relu')(X)
X = MaxPool2D(pool_size=(2, 2))(X)

X = Flatten()(X)

X = Dense(512, activation='relu')(X)
X = Dropout(0.3)(X)

outputs = Dense(1)(X)

model = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model.summary()

In [None]:
LEARNING_RATE= 0.0001#1E-3

early_stopper = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
import tensorflow


In [None]:
# tensorflow.keras.losses.MeanAbsoluteError

In [None]:
model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE),
    loss=tensorflow.keras.losses.MeanAbsoluteError(),#MeanSquaredError(),
    metrics=[RootMeanSquaredError()]
)

In [None]:
epochs = 10

history = model.fit(
    train_generator,
    validation_data=valid_generator,
    epochs=epochs,
    callbacks=[early_stopper]
)


In [None]:
df_history = pd.DataFrame(history.history)
df_history.head()


In [None]:
plt.style.use('ggplot')

fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(20, 10), sharex=True)

ax = axes[0]
ax.plot(df_history.loss, label='Training')
ax.plot(df_history.val_loss, label='Validation')
ax.set_title('Loss Per Epoch')

ax = axes[1]
ax.plot(df_history.root_mean_squared_error, label='Training')
ax.plot(df_history.val_root_mean_squared_error, label='Validation')
ax.set_title('RMSE Per Epoch')

plt.xticks(range(len(df_history)), range(1, len(df_history) + 1))

handles, labels = ax.get_legend_handles_labels()
fig.legend(handles, labels, bbox_to_anchor=(0.9, 0.9), 
           fancybox=True, shadow=True)
plt.show()

In [None]:
model.evaluate(valid_generator)

In [None]:
df_test = pd.read_csv(path / 'test.csv', index_col='Id')

test_path = path / 'test'

df_test.loc[:, 'filename'] = df_test.index + '.jpg'

datagen = ImageDataGenerator(rescale=RESCALE)

test_generator = datagen.flow_from_dataframe(dataframe=df_test,
                                             directory=test_path, 
                                             x_col='filename',
                                             y_col=None,
                                             batch_size=32,
                                             seed=42,
                                             shuffle=False,
                                             class_mode=None,
                                             target_size=(IMG_SIZE, IMG_SIZE)
                                            )

In [None]:
yhat_test = model.predict(test_generator, verbose=1)

In [None]:
frames = [
    pd.Series(df_test.index.values, name='Id'),
    pd.Series(yhat_test.ravel(), name='Pawpularity')
]
submission = pd.concat(frames, axis=1)
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)