# PetFinder: Meta-features
- Hola amigos, this notebook covers my code for the **PetFinder.my - Pawpularity Contest**, which can be found [here](https://www.kaggle.com/c/petfinder-pawpularity-score).

# Installing and Importing Packages

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from tqdm import tqdm
from joblib import dump, load
from catboost import CatBoostRegressor

# Scikit-Learn Imports
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# Importing the CSV(s)

In [None]:
train = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")
test = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")
train_label = train['Pawpularity']
print(train.shape, test.shape, train_label.shape)

In [None]:
train.head()

# Extracting the meta-features

In [None]:
train_meta = train.drop(columns = ['Id', 'Pawpularity'])
test_meta = test.drop(columns = ['Id'])
print(train_meta.shape, test_meta.shape)

# Training the Model

In [None]:
X_train, X_val, y_train, y_val = train_test_split(train_meta, train_label, test_size = 0.15)
print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

In [None]:
# # Linear Regression
# lr = LinearRegression()
# lr.fit(X_train, y_train)
# y_train_preds = lr.predict(X_train)
# y_val_preds = lr.predict(X_val)

# print("Training RMSE ->", mean_squared_error(y_train, y_train_preds, squared=False))
# print("Validation RMSE ->", mean_squared_error(y_val, y_val_preds, squared=False))

In [None]:
# # Random Forest Model
# params = {
#     'n_estimators': [50, 100, 150, 200, 250],
#     'max_depth': [None, 25, 20, 15],
#     'min_samples_split': [2, 4, 6],
#     'max_features': ["auto", "sqrt", "log2"]
# }

# # rfr = RandomForestRegressor()
# # clf = GridSearchCV(rfr, param_grid = params, n_jobs = -1)
# # clf.fit(X_train, y_train)
# # print(clf.best_params_)

# # Training with best params
# rfr = RandomForestRegressor(max_depth = 25, max_features = 'log2', 
#     min_samples_split = 6, n_estimators = 100)
# rfr.fit(X_train, y_train)

# y_train_preds = rfr.predict(X_train)
# y_val_preds = rfr.predict(X_val)

# print("Training RMSE ->", mean_squared_error(y_train, y_train_preds, squared=False))
# print("Validation RMSE ->", mean_squared_error(y_val, y_val_preds, squared=False))

In [None]:
# # Gradient Boosting Model
# params = {
#     'n_estimators': [50, 100, 150, 200, 250],
#     'max_depth': [None, 3, 5, 7],
#     'min_samples_split': [2, 4, 6],
#     'max_features': ["auto", "sqrt", "log2"]
# }

# # gbr = GradientBoostingRegressor()
# # clf = GridSearchCV(gbr, param_grid = params, n_jobs = -1)
# # clf.fit(X_train, y_train)
# # print(clf.best_params_)

# # Training with best params
# gbr = GradientBoostingRegressor(max_depth = 3, max_features = 'log2',
#     min_samples_split = 2, n_estimators = 50)
# gbr.fit(X_train, y_train)

# y_train_preds = gbr.predict(X_train)
# y_val_preds = gbr.predict(X_val)

# print("Training RMSE ->", mean_squared_error(y_train, y_train_preds, squared=False))
# print("Validation RMSE ->", mean_squared_error(y_val, y_val_preds, squared=False))

In [None]:
# # CatBoost
# params = {
#     'iterations': [50, 100, 150, 200, 250, 300],
#     'learning_rate': [None, 0.0001, 0.001, 0.01, 0.1, 1],
#     'random_seed': [7, 42],
#     'depth': [11, None],
#     'verbose': [0]
# }

# model = CatBoostRegressor()
# grid_search_result = model.grid_search(params, X=X_train, y=y_train)

# y_train_preds = model.predict(X_train)
# y_val_preds = model.predict(X_val)

# print("Training RMSE ->", mean_squared_error(y_train, y_train_preds, squared=False))
# print("Validation RMSE ->", mean_squared_error(y_val, y_val_preds, squared=False))

In [None]:
# model.get_all_params()

# Making the Submission

In [None]:
# Retraining the model on the entire training set
model = CatBoostRegressor(iterations = 50, learning_rate = 0.1, random_seed = 7, 
    depth = 11, verbose = 0)
model.fit(train_meta, train_label)

In [None]:
y_test_preds = model.predict(test_meta)
submission = pd.DataFrame()
submission['Id'] = test['Id']
submission['Pawpularity'] = y_test_preds
submission.to_csv('submission.csv',index = False)

In [None]:
submission.head()