## Importação de Bibliotecas

In [None]:
import pandas as pd
import numpy as np

## Importação dos Dados

In [None]:
df_train = pd.read_csv("data/train.csv")
X_train = df_train.iloc[:, :-1].values
y_train = df_train.iloc[:, -1].values

df_kaggle = pd.read_csv("data/test.csv")
X_kaggle = df_kaggle.iloc[:, :].values

## Utilização de *Scaler* para padronizar colunas

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train[:, 84:89] = sc.fit_transform(X_train[:, 84:89])
# X_test[:, 33:] = sc.transform(X_test[:, 33:])
sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train.reshape(-1,1))

sc_kaggle = StandardScaler()
X_kaggle[:, 84:89] = sc_kaggle.fit_transform(X_kaggle[:, 84:89])

## Utilização de *Gradient Boosting* com a biblioteca *CatBoost* para treinar modelo de regressão

In [None]:
from catboost import CatBoostRegressor

regressor = CatBoostRegressor()
regressor.fit(X_train, y_train)

## Avaliação preliminar do modelo de regressão utilizando *cross validation score*

In [None]:
from sklearn.model_selection import cross_val_score
from datetime import datetime as dt
timestamp = dt.today()

accuracies = cross_val_score(estimator=regressor, X=X_train, y=y_train, cv=10)
print(f"Accuracy: {accuracies.mean()}")
print(f"StdDev: {accuracies.std()}")
try:
    with open("data/cross_val.log", mode="x+") as cvlog:
        cvlog.write("timestamp,accuracy,stddev")
        cvlog.write(f"\n{timestamp},{accuracies.mean()},{accuracies.std()}")
except:
    with open("data/cross_val.log", mode="a") as cvlog:
        cvlog.write(f"\n{timestamp},{accuracies.mean()},{accuracies.std()}")

## Predição da regressão do conjunto de teste utilizando o modelo treinado

In [None]:
y_kaggle = sc_y.inverse_transform(regressor.predict(X_kaggle))

## Exportação do CSV para submissão no Kaggle

In [None]:
with open(f"data/kaggle/kaggle_{timestamp.strftime('%d-%m-%Y_%Hh%M')}.csv", mode="w") as out:
    out.write("Id,preco")
    for i in range(len(y_kaggle)):
        out.write(f"\n{i},{round(y_kaggle[i], 2):.2f}")