In [None]:
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import KFold
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns


In [None]:
train = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

In [None]:
train["file_path"] = train["Id"].apply(lambda x: "../input/petfinder-pawpularity-score/train/" + x + ".jpg")
test["file_path"] = test["Id"].apply(lambda x: "../input/petfinder-pawpularity-score/test/" + x + ".jpg")

In [None]:
sns.set(rc={'figure.figsize':(15,5)})
fig = plt.figure()
sns.histplot(data=train, x='Pawpularity', bins=100)
plt.axvline(train['Pawpularity'].mean(), c='blue', ls='-', lw=3, label='Mean Pawpularity')
plt.axvline(train['Pawpularity'].median(),c='red',ls='-',lw=3, label='Median Pawpularity')
plt.title('Distribution of Pawpularity Scores', fontsize=20, fontweight='bold')
plt.legend()
plt.show()

In [None]:
sns.set(rc={'figure.figsize':(30,10)})
sns.heatmap(train.corr(), annot=True, fmt='.1g', cmap='coolwarm', square=True)
plt.title('Correlation Matrix', fontsize=20, fontweight='bold')
plt.show()


In [None]:
image_size = 128
def preprocess(image_url):
    image_string = tf.io.read_file(image_url)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.central_crop(image, 1.0)
    image = tf.image.resize(image, (image_size, image_size))
    return image
x_train=[]
for i in train['file_path']:
    x1=preprocess(i)
    x_train.append(x1)

In [None]:
test2=[]
for i in test['file_path']:
    x1=preprocess(i)
    test2.append(x1)

test2=np.array(test2)

In [None]:
x_train=np.array(x_train)
y_train=train['Pawpularity']
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x_train,y_train,test_size=0.2)

In [None]:
inputs=keras.Input(shape=(128,128,3))
x=inputs
x=keras.layers.Conv2D(filters=8,kernel_size=3,strides=2,padding='same',activation='relu')(x)
x=keras.layers.Conv2D(filters=16,kernel_size=3,strides=2,padding='same',activation='relu')(x)
x=keras.layers.Conv2D(filters=32,kernel_size=3,strides=2,padding='same',activation='relu')(x)
x=keras.layers.Flatten()(x)
x=keras.layers.Dense(128, activation = "relu")(x)
x=keras.layers.Dropout(0.5)(x)
output = tf.keras.layers.Dense(1)(x)
model = tf.keras.Model(inputs=inputs, outputs=output)

In [None]:
keras.utils.plot_model(model, show_shapes=True, show_layer_names=False)

In [None]:
early_stop = tf.keras.callbacks.EarlyStopping(
    patience=5
    )
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    factor=0.1,
    patience=2, 
    min_lr=1e-9
    )
callbacks = [early_stop,reduce_lr]

In [None]:
model.compile(loss='mse', optimizer='Adam', metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse"), "mae", "mape"])

In [None]:
history = model.fit(x_train,y_train, epochs=50,batch_size=64,validation_data = (x_test,y_test),callbacks=callbacks)

In [None]:
cnn_pred=model.predict(test2)

In [None]:
cnn=pd.DataFrame()
cnn['Id']=test['Id']
cnn['Pawpularity']=cnn_pred
cnn.head()

rigde regree

In [None]:
cols=['Id','Pawpularity','file_path']
x_train=train.drop(cols,axis=1)

In [None]:
y_train=train['Pawpularity']

In [None]:
x_train,x_test,y_train,y_test=train_test_split(x_train,y_train,test_size=0.2)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import time
import math

lr_reg = LinearRegression()

start = time.time()
lr_reg.fit(x_train, y_train)
stop = time.time()

lr_reg_pred = lr_reg.predict(x_test)

print(f'Training time: {round((stop - start),3)} seconds')
lr_reg_RMSE = math.sqrt(mean_squared_error(y_test, lr_reg_pred))
print(f'Liner_reg_RMSE: {round(lr_reg_RMSE,3)}')

In [None]:
test1=test.drop('Id',axis=1)
test2=test1.iloc[:,:-1]
Pred=lr_reg.predict(test2)

In [None]:
rid=pd.DataFrame()
rid['Id']=test['Id']
rid['Pawpularity']=Pred
rid.head()

In [None]:
merge=pd.DataFrame()
merge['Id']=rid['Id']
merge['Pawpularity']=(cnn['Pawpularity']+rid['Pawpularity'])/2
merge.to_csv('submission.csv',index=False)
merge.head()