In [None]:
import warnings
warnings.filterwarnings("ignore")
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler,PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_absolute_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.callbacks import LearningRateScheduler
import xgboost as xg
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error

# Reading data

In [None]:
train_df = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
test_df = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")

In [None]:
train_df.head()

In [None]:
test_df.head()

In [None]:
print(train_df.shape)
print(test_df.shape)

# Preparing data

In [None]:
X = np.array(train_df[train_df.columns[1:-1]])
y = np.array(train_df['Pawpularity'])

In [None]:
xtr, xte, ytr, yte = train_test_split(X, y, test_size=0.15,random_state=244)

# Building models and checking performance

## Random Forest Regressor

In [None]:
model1 = RandomForestRegressor(max_depth = 15)

model1.fit(xtr,ytr)

predictions = model1.predict(xte)

print("Error: " , np.sqrt(mean_squared_error(yte, predictions)))

## XGBoost

In [None]:
model2 = xg.XGBRegressor(n_estimators = 512,max_depth = 20,objective ='reg:squarederror')
model2.fit(xtr, ytr)

predictions = model2.predict(xte)

print("Error: " , np.sqrt(mean_squared_error(yte, predictions)))

## Linear Regression (polynomial space)

In [None]:
poly2 = PolynomialFeatures(degree=2)
poly3 = PolynomialFeatures(degree=3)
poly4 = PolynomialFeatures(degree=4)
poly5 = PolynomialFeatures(degree=5)
poly6 = PolynomialFeatures(degree=6)
poly7 = PolynomialFeatures(degree=7)

xtr2 = poly2.fit_transform(xtr)
xtr3 = poly3.fit_transform(xtr)
xtr4 = poly4.fit_transform(xtr)
xtr5 = poly5.fit_transform(xtr)
xtr6 = poly6.fit_transform(xtr)
xtr7 = poly7.fit_transform(xtr)


model3 = LinearRegression()

model3.fit(xtr2,ytr)
predictions = model3.predict(poly2.fit_transform(xte))
print("Polynomial Regression error(degree=2): ", np.sqrt(mean_squared_error(yte,predictions)))
model3.fit(xtr3,ytr)
predictions = model3.predict(poly3.fit_transform(xte))
print("Polynomial Regression error(degree=3): ", np.sqrt(mean_squared_error(yte,predictions)))
model3.fit(xtr4,ytr)
predictions = model3.predict(poly4.fit_transform(xte))
print("Polynomial Regression error(degree=4): ", np.sqrt(mean_squared_error(yte,predictions)))

As we can see polynom of degree 2 showed pretty fine results. Let's test it on whole data

In [None]:
X2 = poly2.fit_transform(X)

model3.fit(X2,y)
predictions = model3.predict(poly2.fit_transform(X))
print("Polynomial Regression error(degree=2): ", np.sqrt(mean_squared_error(y,predictions)))

## NN ? why not

In [None]:
model4 = Sequential()
model4.add(InputLayer(input_shape=(xtr.shape[1],)))

for _ in range(3):
    model4.add(Dense(128,activation="relu",kernel_initializer="normal"))

model4.add(Dense(1,activation="linear",kernel_initializer="normal"))

optim = Adam(
    learning_rate=0.0005,
    beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-07,
    amsgrad=False,
    name="Adam",
)

model4.compile(loss="mse",optimizer='adam',metrics="mse")
model4.summary()

In [None]:
model4.fit(xtr,ytr,batch_size=32,epochs=100,validation_data=(xte,yte))

In [None]:
print("Error on test data: " , np.sqrt(model4.evaluate(xte, yte,verbose = 0)[1]))
print("Error on whole data: " ,np.sqrt(model4.evaluate(X, y,verbose = 0)[1]))

# Saving results

In [None]:
submission_x = np.array(test_df[test_df.columns[1:]])
Id = np.array(test_df['Id'])

Here we can choose any model and by submitting check which one is the best. Let's take as an example Random Forests results

In [None]:
predictions = model1.predict(submission_x)
submission_df = pd.DataFrame()

submission_df['Id'] = Id
submission_df['Pawpularity'] = predictions
submission_df.to_csv('submission.csv',index=False)