# PetFinder.my
- Hola amigos, this notebook covers my code for the **PetFinder.my - Pawpularity Contest**, which can be found [here](https://www.kaggle.com/c/petfinder-pawpularity-score).
- In this notebook, I have just used the images and dropped all the meta features. I first used a **VGG-16** to extract a 8192-dimensional representation of each of the images.
- Then I used a custom multi-layer perceptron (MLP), to extract the final value of the `Pawpularity` variable, for each of the points in the dataset.
- Other experiments that I did, include:
    - Using ResNets for feature extraction
    - Determining the 10th and 90th percentiles of the `Pawpularity` variable, and using those percentiles to clip the predicted values of the variable itself.

# Installing and Importing Packages

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from tqdm import tqdm
from joblib import dump, load
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.activations import relu
from tensorflow.keras.metrics import RootMeanSquaredError as RMSE
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.resnet_v2 import ResNet101V2, ResNet50V2
from tensorflow.keras.layers import Dropout, Conv2D, Dense, MaxPool2D, Flatten

# Importing the CSV(s) & Image(s)

In [None]:
train = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
test = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
print(train.shape, test.shape)

train_images_path = '../input/petfinder-pawpularity-score/train'
test_images_path = '../input/petfinder-pawpularity-score/test'

train_images_list = os.listdir(train_images_path)
test_images_list = os.listdir(test_images_path)
train_label = train['Pawpularity']

print(len(train_images_list),len(test_images_list), train_label.shape)

In [None]:
# # Training Only
# train_images = []
# for i in tqdm(range(train.shape[0])):
#     path = os.path.join(train_images_path, train_images_list[i])
#     image = cv2.imread(path)
#     image = image / 255
#     image = cv2.resize(image, (128, 128))
#     train_images.append(image)
# train_images = np.array(train_images)  
# print(train_images.shape)

In [None]:
# Training + Inferencing
test_images = []
for i in tqdm(range(test.shape[0])):
    path = os.path.join(test_images_path, test_images_list[i])
    image = cv2.imread(path)
    image = image / 255
    image = cv2.resize(image, (128, 128))
    test_images.append(image)
test_images = np.array(test_images)  
print(test_images.shape)

# Visualization(s)

In [None]:
plt.hist(train_label, bins = 50);

# Feature Extraction using VGG16

In [None]:
# # To be used for training
# model = VGG16(weights = 'imagenet', include_top = False, input_shape = (128, 128, 3))

# # Freezing the weights of all the layers
# for layer in model.layers:
#     layer.trainable = False
    
# # Saving the model: Training
# model.save('vgg16_model.h5')

In [None]:
# Loading the model: Inferencing
model = load_model('../input/petfindermy-pawpularity-contest/vgg16_model.h5')

In [None]:
# # Training only
# # The top layer of VGG16 gives us (4*4*512) feature vector for each of the images
# train_fea = model.predict(train_images)

# # Reshaping the feature vectors
# train_fea = train_fea.reshape(train_fea.shape[0], -1)
# train_fea = pd.DataFrame(train_fea)
# train_fea.to_csv('train_fea_vgg16.csv', index = False)

In [None]:
train_fea = pd.read_csv('../input/petfindermy-pawpularity-contest/train_fea_vgg16.csv')
print(train_fea.shape)

In [None]:
# Training + Inferencing
# The top layer of VGG16 gives us (4*4*512) feature vector for each of the images
test_fea = model.predict(test_images)

# Reshaping the feature vectors
test_fea = test_fea.reshape(test_fea.shape[0], -1)
print(test_fea.shape)

# Training the Model

In [None]:
model = Sequential(layers = [
    Dense(units = 32, activation = 'relu', input_shape = (8192,)),
    Dropout(0.45),
    Dense(units = 64, activation = 'relu'),
    Dropout(0.45),
    Dense(units = 64, activation = 'relu'),
    Dropout(0.45),
    Dense(units = 4, activation = 'relu'),
    Dense(units = 1, activation = 'relu'),
])

model.summary()

In [None]:
# Defining the callbacks and optimizers
adam = Adam(learning_rate = 0.001)
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.9, patience = 5, 
    min_lr = 0.0001, min_delta = 0.001)

model.compile(loss = "mse", optimizer = adam, metrics = RMSE())
predictor = model.fit(train_fea, train_label, validation_split = 0.2, 
    epochs = 100, batch_size = 32, callbacks = [reduce_lr])

In [None]:
# Training (RMSE) = 17.96 | Validation (RMSE) = 20.5583
# Saving the model: Training
# model.save('mlp8_model_v2.h5')

# Loading the model: Inferencing
model = load_model('../input/petfindermy-pawpularity-contest/mlp8_model_v2.h5')

# Making the Submission

In [None]:
y_test_preds = model.predict(test_fea)
submission = pd.DataFrame()
submission['Id'] = test['Id']
submission['Pawpularity'] = y_test_preds
submission.to_csv('submission.csv',index = False)