In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.layers import AveragePooling2D, Conv2D, MaxPool2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
import numpy as np
from tqdm import tqdm

In [None]:
train = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')

In [None]:
train.head()

In [None]:
train['Id'] = train['Id'] + '.jpg'

# Preprocessing CNN

In [None]:
train_datagen = ImageDataGenerator(
                rescale = 1.0/255.0,
                horizontal_flip = True,
                fill_mode = "nearest", zoom_range = 0.2,
                width_shift_range = 0.2, height_shift_range=0.2,
                rotation_range=30,validation_split=0.2
)

In [None]:
train_data = train_datagen.flow_from_dataframe(
            dataframe = train,
            directory='../input/petfinder-pawpularity-score/train',
            x_col = "Id",
            y_col = "Pawpularity",
            class_mode="raw",
            batch_size = 64,
            target_size=(224,224),
            shuffle = True,
            subset='training'
        )

val_data = train_datagen.flow_from_dataframe(
            dataframe = train,
            directory='../input/petfinder-pawpularity-score/train',
            x_col = "Id",
            y_col = "Pawpularity",
            class_mode="raw",
            batch_size = 64,
            target_size=(224,224),
            shuffle = True,
            subset='validation'
        )

# Modelling CNN

In [None]:
bModel = tf.keras.applications.EfficientNetB7(include_top=False,weights='imagenet',input_tensor=Input(shape=(224,224,3)))

In [None]:
hModel = bModel.output
hModel = MaxPool2D(pool_size=(2,2))(hModel)
hModel = Flatten()(hModel)
hModel = Dense(512,activation='relu')(hModel)
hModel = Dropout(0.2)(hModel)
hModel = Dense(1)(hModel)

model = tf.keras.Model(bModel.input,hModel)

for layer in bModel.layers:
  layer.trainable=False

In [None]:
epochs = 25
lrate = 0.001
decay = lrate/epochs
opt = tf.keras.optimizers.Adam(learning_rate=lrate, decay=decay)
model.compile(optimizer=opt, loss=MeanSquaredError(), metrics=[RootMeanSquaredError()])

In [None]:
! mkdir cp

In [None]:
eraly_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.0001, patience=3, verbose=1, mode='auto')
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='loss', min_delta=0.0004, patience=2, factor=0.1, min_lr=1e-6, mode='auto',
                              verbose=1)
cp = tf.keras.callbacks.ModelCheckpoint(filepath='./cp/model.h5', monitor='val_root_mean_squared_error', verbose=0, save_best_only=True)

# Train CNN

In [None]:
TRAIN_STEPS = train_data.n // train_data.batch_size

history = model.fit(train_data,
                       steps_per_epoch=TRAIN_STEPS,
                       validation_data= val_data,
                       epochs=epochs, 
                       callbacks=[eraly_stop, reduce_lr, cp],
                       verbose=1
                     )

In [None]:
best_model = tf.keras.models.load_model('./cp/model.h5')

# Read Test Data

In [None]:
df_test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

In [None]:
df_test

In [None]:
df_test['IdImg'] = df_test['Id'] + '.jpg'

# Predict Test Data

In [None]:
test_datagen = ImageDataGenerator(rescale=1.0/255.0)

test_generator = test_datagen.flow_from_dataframe(dataframe=df_test,
                                             directory='../input/petfinder-pawpularity-score/test', 
                                             x_col='IdImg',
                                             y_col=None,
                                             batch_size=32,
                                             seed=42,
                                             shuffle=False,
                                             class_mode=None,
                                             target_size=(224, 224)
                                            )

In [None]:
yhat_test = best_model.predict(test_generator, verbose=1)

In [None]:
frames = [
    pd.Series(df_test['Id'], name='Id'),
    pd.Series(yhat_test.ravel(), name='Pawpularity')
]
submission = pd.concat(frames, axis=1)
submission.head()

In [None]:
submission.to_csv('submissionCNN.csv', index=False)

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import StratifiedKFold, train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
import numpy as np

In [None]:
X = train.drop(columns=['Id','Pawpularity'])
Y = train['Pawpularity']

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)

In [None]:
random_forest = RandomForestRegressor()
param_grid = {
                 'n_estimators': [10, 50, 100, 500, 1000],
                 'max_depth': [2, 5, 7, 9, 10]
             }

grid_clf = GridSearchCV(random_forest, param_grid, cv=5)
grid_clf.fit(X_train, Y_train)

In [None]:
grid_clf.best_estimator_

In [None]:
y_pred = grid_clf.predict(X_test)

In [None]:
RF_RMSE = np.sqrt(mean_squared_error(Y_test, y_pred))
RF_RMSE

In [None]:
test_X = df_test.drop(columns=['Id','IdImg'])

In [None]:
test_pred = grid_clf.predict(test_X)

In [None]:
frames2 = [
    pd.Series(df_test['Id'], name='Id'),
    pd.Series(test_pred, name='Pawpularity')
]
submission2 = pd.concat(frames2, axis=1)
submission2.head()

In [None]:
submission2.to_csv('submissionRF.csv', index=False)

# Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
LR = LinearRegression()

LR.fit(X_train, Y_train)

In [None]:
y_pred2 = LR.predict(X_test)
LR_RMSE = np.sqrt(mean_squared_error(Y_test, y_pred2))
LR_RMSE

In [None]:
test_pred2 = LR.predict(test_X)

In [None]:
frames3 = [
    pd.Series(df_test['Id'], name='Id'),
    pd.Series(test_pred2, name='Pawpularity')
]
submission3 = pd.concat(frames3, axis=1)
submission3.head()

In [None]:
submission3.to_csv('submissionLR.csv', index=False)

In [None]:
df_join = submission.merge(submission2, how='left', on='Id')
df_join = df_join.merge(submission3, how='left', on='Id')

In [None]:
df_join

In [None]:
df_join['Pawpularity'] = (df_join['Pawpularity_x'] + df_join['Pawpularity_y'] + df_join['Pawpularity'])/3
df_join = df_join.drop(columns=['Pawpularity_x','Pawpularity_y'])

In [None]:
df_join

In [None]:
df_join.to_csv('submissionJoin.csv', index=False)