In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from tensorflow.keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization, Conv2D, MaxPooling2D
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras import regularizers, optimizers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import concatenate
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
from matplotlib import image 
import seaborn as sns
import cv2

In [None]:
train_csv_path = "../input/petfinder-pawpularity-score/train.csv"
test_csv_path = "../input/petfinder-pawpularity-score/test.csv"

train_image_path = "../input/petfinder-pawpularity-score/train/"
test_image_path = "../input/petfinder-pawpularity-score/test/"

In [None]:
train_df= pd.read_csv(train_csv_path)
test_df= pd.read_csv(test_csv_path)

train_df["Id"]=train_df["Id"].apply(lambda Id: Id+".jpg")
test_df["Id"]=test_df["Id"].apply(lambda Id: Id+".jpg")


train_df.head()

In [None]:
train_df.isna().sum()

In [None]:
train_df.describe()

In [None]:
train_df.info()

In [None]:
fig, axs = plt.subplots(3, 3, figsize= (20,20))

for i in range(9):
    img_path = train_image_path + train_df.iloc[i]["Id"]
    img = image.imread(img_path)
    row, col = i // 3, i % 3
    axs[row][col].imshow(img)
    axs[row][col].set_title("Pawpularity :" + str(train_df.iloc[i]["Pawpularity"]))
    axs[row][col].axis("off")



In [None]:
fig, aix = plt.subplots(3,4)
fig.set_figheight(15)
fig.set_figwidth(15)

df_columns = train_df.drop(["Id","Pawpularity"], axis="columns").columns
for index, df_col in enumerate(df_columns):
    row = index // 4
    col = index % 4
    aix[row][col].hist(train_df[df_col])
    aix[row][col].set_title(df_col)

In [None]:
plt.hist(train_df["Pawpularity"])
plt.title("Pawpularity")
plt.grid(True)
plt.show()

In [None]:
train_df[["Pawpularity"]] = train_df[["Pawpularity"]] / 100 

In [None]:
train_X, validation_X, train_y, Validation_Y = train_test_split(train_df.drop(columns=["Pawpularity"] , axis="columns"), train_df[["Pawpularity"]], test_size=0.2, shuffle=5)

In [None]:
train_X.shape, validation_X.shape, test_df.shape

In [None]:
train_X = train_X.reset_index(drop =True)
train_X_mlp= train_X.drop(columns=["Id"], axis="columns")
validation_X_mlp= validation_X.drop(columns=["Id"], axis="columns")

In [None]:
def get_image_array(train_df, validation_df, test_df):
    train_images = []
    validation_images = []
    test_images = []
    train_image_path = '../input/petfinder-pawpularity-score/train/' 
    test_image_path  =  '../input/petfinder-pawpularity-score/test/'
   
    for img_name in train_df['Id']:
        img_path = f"{train_image_path}{img_name}"
        image = cv2.imread(img_path)
        image = cv2.resize(image, (64,64))
        train_images.append(image)
   
    for validation_img_name in validation_df['Id']:
        val_img_path = f"{train_image_path}{validation_img_name}"
        val_image = cv2.imread(val_img_path)
        val_image = cv2.resize(val_image, (64,64))
        validation_images.append(val_image)
    
    for test_img_name in test_df['Id']:
        test_img_path = f"{test_image_path}{test_img_name}"
        test_img = cv2.imread(test_img_path)
        test_img = cv2.resize(test_img, (64,64))
        test_images.append(test_img)
       
    return np.array(train_images), np.array(validation_images), np.array(test_images)

train_images, validation_images, test_images = get_image_array(train_X, validation_X, test_df)
train_images = train_images / 255.0
validation_images = validation_images / 255.0
test_images = test_images / 255.0

In [None]:
test_df= test_df.drop(columns=["Id"], axis="columns")

In [None]:
def create_mlp(dims):
    model = Sequential([
        Dense(512, input_dim=dims, activation="relu"),
        Dropout(0.2),
        Dense(256, activation="relu"),
        Dropout(0.2),
        Dense(4, activation="relu")
    ])
    return model

In [None]:
def create_cnn(dims):
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(filters=128,  kernel_size=(3,3), input_shape=dims, activation='relu'),
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv2D(filters=40, kernel_size=(3,3), activation='relu'),
        tf.keras.layers.MaxPool2D(),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(40, activation='relu'),
        tf.keras.layers.Dense(4)
    ])
    return model

In [None]:
mlp = create_mlp(12)
cnn = create_cnn((64, 64, 3))
combinedInput = concatenate([mlp.output, cnn.output])

In [None]:
x = Dense(4, activation="relu")(combinedInput)
x = Dense(1, activation="linear")(x)

In [None]:
model = Model(inputs=[mlp.input, cnn.input], outputs=x)

In [None]:
model.summary()

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=1e-3/200)
model.compile(loss='mse', optimizer=opt, metrics = tf.keras.metrics.RootMeanSquaredError())

In [None]:

model.fit(x=[train_X_mlp.values, train_images], y=train_y,validation_data=([validation_X_mlp.values, validation_images], Validation_Y), epochs=100, batch_size=100)


In [None]:
predictions = model.predict([test_df.values, test_images])
predictions = predictions * 100
predictions

In [None]:
submission_output = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')

predictions_round = np.round(predictions, 2)
submission_output['Pawpularity'] = predictions_round

# Output submission file to csv
submission_output.to_csv('submission.csv', index=False)