In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import ExtraTreeRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import BaggingRegressor

In [4]:
def leer_csv(ruta):
    return pd.read_csv(ruta)

In [5]:
TEST_SIZE = 0.25
RANDOM_STATE = 42

def separar_train_test(df_train, logaritmico):
    X = df_train.copy().drop(columns = ['precio', 'log_precio'])
    if logaritmico:
        y = df_train['log_precio']
    else:
        y = df_train['precio']
    return train_test_split(X, y, test_size = TEST_SIZE, random_state = RANDOM_STATE)
    

In [10]:
RUTA_TRAIN = 'data/train_preproc.csv'
RUTA_TEST = 'data/test_preproc.csv'

def predecir(modelos, nombres_modelos, logaritmico):

    df_train = leer_csv(RUTA_TRAIN)
    df_test = leer_csv(RUTA_TEST)

    to_predict = df_test.copy()

    X_train, X_test, Y_train, Y_test = separar_train_test(df_train, logaritmico)
    
    resultado = []
    
    for nombre,modelo in zip(nombres_modelos, modelos):
        print('------------------------------------------------')
        print('Comienza a entrenar: '+ nombre)
        print()
        
        modelo = modelo   
        modelo.fit(X_train, Y_train)
        prediccion = modelo.predict(X_test)
        
        if logaritmico:
            score = mean_absolute_error(np.exp(prediccion), np.exp(Y_test))
        else:
            score = mean_absolute_error(prediccion, Y_test)
            
        resultado.append((str(nombre), str(score)))

        print('**FINALIZADO** ' + nombre + ' Score: ' + str(score))
        print()
    return resultado



In [13]:
CON_PRECIO_LOGARITMICO = True

MODELOS = [RandomForestRegressor(n_estimators = 200, max_depth = 500, n_jobs = -1), XGBRegressor(), BaggingRegressor()]
NOMBRES_MODELOS = ['RandomForestRegressor', 'XGBoost', 'Bagging']

resultado = predecir(MODELOS, NOMBRES_MODELOS, CON_PRECIO_LOGARITMICO)

------------------------------------------------
Comienza a entrenar: RandomForestRegressor

**FINALIZADO** RandomForestRegressor Score: 573173.0531633393

------------------------------------------------
Comienza a entrenar: XGBoost

**FINALIZADO** XGBoost Score: 674079.7111432704

------------------------------------------------
Comienza a entrenar: Bagging

**FINALIZADO** Bagging Score: 607716.6846137382

