# Convolutional Neural Network and Random Forest on Evaluating Pet Pawpularity (PetFinder.my)
In this notebook, I will build a Convolution Neural Network and a Random Forest to predict pet popularity from images and metadata.

## Basic Imports

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # plotting images
import cv2 # used for downloading and resizing images

# libraries for CNN
import tensorflow as tf
from tensorflow import keras
from keras import layers

# libraries for RF
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Data Handling
First I will import the data from the csv files.

In [None]:
metadata_train = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
metadata_test = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")

In [None]:
metadata_train.head()

I'll then import the image data.

In [None]:
train_filenames = os.listdir("../input/petfinder-pawpularity-score/train")
test_filenames = os.listdir("../input/petfinder-pawpularity-score/test")

train_str = "../input/petfinder-pawpularity-score/train/"
test_str = "../input/petfinder-pawpularity-score/test/"

train_size = metadata_train.size//metadata_train.iloc[0,:].size
test_size = metadata_test.size//metadata_test.iloc[0,:].size

images_train = np.ndarray((train_size, 28, 28, 3)) # resizing all images to 28 x 28 (3 channels)
images_test = np.ndarray((test_size, 28, 28, 3))

index = 0
for train_file in train_filenames:
    img = cv2.imread(train_str + train_file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, dsize=(28, 28), interpolation=cv2.INTER_NEAREST)
    images_train[index] = img
    if ((index+1)%100 == 0):
        print(f"{index+1} completed in train")
    index += 1

index = 0
for test_file in test_filenames:
    img = cv2.imread(test_str + test_file)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, dsize=(28, 28), interpolation=cv2.INTER_NEAREST)
    images_test[index] = img
    print(f"{index+1} completed in test")
    index += 1

Then I will split the training data into training and validation data.

In [None]:
from sklearn.model_selection import train_test_split
X_metadata = metadata_train.iloc[:, 1:13]
y_train = metadata_train["Pawpularity"]
metadata_training, metadata_valid, y_training, y_valid = train_test_split(X_metadata, y_train, test_size = 0.2, shuffle = False)
images_training, images_valid, _, _2 = train_test_split(images_train, y_train, test_size = 0.2, shuffle = False)

In [None]:
# Display a few images
plt.figure(figsize=(10, 10))
for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images_training[i].astype("uint8"))
    plt.title(y_training[i])
    plt.axis("off")

## Training the Convolutional Neural Network
First, I will train the convolutional neural network. The architecture is shown below.

In [None]:
# building the model
cnn_model = keras.Sequential([
    layers.BatchNormalization(input_shape=[28, 28, 3], axis=1),
    layers.Conv2D(filters=16, kernel_size=3, activation='relu'),
    layers.Dropout(0.2),
    layers.BatchNormalization(axis=1),
    layers.Conv2D(filters=32, kernel_size=5, activation='relu'),
    layers.Dropout(0.2),
    layers.BatchNormalization(axis=1),
    layers.Conv2D(filters=64, kernel_size=7, activation='relu'),
    layers.Dropout(0.2),
    layers.BatchNormalization(axis=1),
    layers.Conv2D(filters=64, kernel_size=7, activation='relu'),
    layers.MaxPool2D(),
    layers.BatchNormalization(axis=1),
    layers.Flatten(),
    layers.Dense(units=1024, activation='relu'),
    layers.Dense(units=1, activation='relu'),
])

# compiling the model with 'adam' optimizer,
# "mean_squared_error" loss, and RootMeanSquaredError metric
cnn_model.compile(
    optimizer='adam',
    loss='mean_squared_error',
    metrics=[keras.metrics.RootMeanSquaredError()]
)

In [None]:
keras.utils.plot_model(cnn_model, show_shapes=True, show_layer_names=False)

In [None]:
# fitting the model with Early Stopping
callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, restore_best_weights=True)

history = cnn_model.fit(
    images_training, y_training,
    batch_size = 16,
    validation_data = (images_valid, y_valid),
    epochs=100,
    callbacks=[callback]
)

In [None]:
# predictions from the convolutional neural network
predictions_cnn = cnn_model.predict(images_test)
predictions_cnn

## Training the Random Forest
Next, I will train the random forest.

In [None]:
# creating the RandomForestRegressor model with 100 estimators and 4 jobs (for faster processing)
rf_model = RandomForestRegressor(n_estimators = 100, n_jobs = 4, random_state = 0)
rf_model.fit(metadata_training, y_training)

import math
predictions_rf_valid = rf_model.predict(metadata_valid)
print("Random Forest error: " + str(math.sqrt(mean_squared_error(y_valid, predictions_rf_valid))))

In [None]:
predictions_rf = rf_model.predict(metadata_test.iloc[:, 1:13])

In [None]:
predictions_rf = np.resize(predictions_rf, (test_size,1))
predictions_rf

## Combining the Models
Here I will combine the results of the two trained models with a 80:20 weighted ratio.

In [None]:
final_predictions = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
final_predictions["Pawpularity"] = 0.8*predictions_cnn + 0.2*predictions_rf
final_predictions = final_predictions.iloc[:, 0:14:13]
final_predictions.to_csv("submission.csv", index = False)
print(final_predictions)
print("Final submission created!")