## Imports 

In [4]:
import common.common_machine_learning as common
import pandas as pd
import numpy as np
from category_encoders import OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
import time

### Ignore Future Warnings 

In [6]:
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

## Constantes 

In [5]:
TARGET = 'precio'
RANDOM_SEMILLA = 3 # numero favorito (de la suerte)

## Carga y pre pre proceso del set de datos 

In [15]:
train = common.cargar_set_optimizado('sets_de_datos/train.csv', index_col = 0)

X_train = train.drop([TARGET], axis = 1).copy()
y_train = train[TARGET].copy()

X_test = common.cargar_set_optimizado('sets_de_datos/test.csv', index_col = 0)

X_train = common.eliminar_columnas_complejas(X_train)
X_test = common.eliminar_columnas_complejas(X_test)

X_train = X_train.fillna(value = {'tipodepropiedad' : 'nan', 'provincia' : 'nan', 'ciudad' : 'nan'})
X_test = X_test.fillna(value = {'tipodepropiedad' : 'nan', 'provincia' : 'nan', 'ciudad' : 'nan'})

## Pipeline 

In [8]:
columns_pipe = ColumnTransformer(transformers = [
    ('nan_to_mean', SimpleImputer(strategy = 'mean'), ['metrostotales', 'metroscubiertos', 'antiguedad']),
    ('nan_to_cero', SimpleImputer(strategy = 'constant', fill_value = 0), ['habitaciones', 'banos', 'garages'])
])

pre_processor_pipe = Pipeline(steps =[
    ('ordinal_encoder', OrdinalEncoder(cols = ['tipodepropiedad', 'provincia', 'ciudad'])),
    ('columns_pipe', columns_pipe)
])

xgb_model = XGBRegressor(
    objective = 'reg:squarederror',
    subsample = 0.5,
    n_estimators = 750,
    min_child_weight = 5,
    max_depth = 9,
    learning_rate = 0.03,
    reg_lambda = 1,
    gamma = 0.75,
    colsample_bytree = 0.6
)

pipe = Pipeline(steps = [
    ('preprocessing', pre_processor_pipe), 
    ('xgb_regressor', xgb_model)
])


## Entrenamiento 

In [16]:
pipe = pipe.fit(X_train, y_train)

## Prediccion 

In [17]:
y_pred = pipe.predict(X_test)

In [18]:
res = pd.DataFrame(y_pred, index=X_test.index, columns=['target'])
res.to_csv("submits/submit_xgboost_0.csv", header=True)