In [1]:
from pandas import MultiIndex, Int16Dtype
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("fivethirtyeight")
sns.set_style('darkgrid')

import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib

from sklearn.preprocessing import MinMaxScaler, StandardScaler, Normalizer, Binarizer, RobustScaler
from sklearn.preprocessing import OneHotEncoder, LabelEncoder, PowerTransformer
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.compose import ColumnTransformer


from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingRegressor, RandomForestRegressor
from sklearn.linear_model import ElasticNet, SGDRegressor, LinearRegression, Lasso
from sklearn.svm import SVR
from sklearn.linear_model import BayesianRidge,LinearRegression, LogisticRegression
from sklearn.kernel_ridge import KernelRidge
from lightgbm import LGBMRegressor
# from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score
from sklearn.pipeline import make_pipeline, Pipeline

from sklearn.model_selection import KFold, ShuffleSplit, LeaveOneOut, StratifiedKFold
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# Warnings configuration
# ==============================================================================
import warnings
# warnings.filterwarnings('ignore')

In [2]:
datos = pd.read_csv("../../Data/DataFrame_Final_Cierre_Cluster.csv",parse_dates=['FECHA'])

In [3]:
datos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2117155 entries, 0 to 2117154
Data columns (total 24 columns):
 #   Column               Dtype         
---  ------               -----         
 0   ESTACION             int64         
 1   ANIO                 int64         
 2   MES                  int64         
 3   DIA                  int64         
 4   HORA                 int64         
 5   FECHA                datetime64[ns]
 6   DIA_SEMANA           int64         
 7   AM_PM                object        
 8   TEMPORADA            object        
 9   TEMPORADA_NUM        int64         
 10  Es_Festivo           int64         
 11  Es_FinSemana         int64         
 12  TEMPERATURA          float64       
 13  VIENTO               float64       
 14  PRESION              int64         
 15  HUMEDAD              int64         
 16  PRECIPITACION_1h     float64       
 17  PRECIPITACION_3h     float64       
 18  DESC_TIEMPO          object        
 19  DESC_TIEMPO_detalle  

### Preparación datos

In [9]:

bicimad_def = datos.groupby(['ESTACION','ANIO','MES','DIA','TEMPORADA','DIA_SEMANA', 'Es_Festivo', 'Es_FinSemana'
                          ]).agg(DEMANDA=('DEMANDA', 'sum'),
                                 TEMP_MAX= ('TEMPERATURA','max'),
                                 TEMP_MIN = ('TEMPERATURA','min'),
                                 HUMEDAD = ('HUMEDAD','mean'),
                                 VIENTO = ('VIENTO','mean'),
                                 PRESION = ('PRESION', 'mean'),
                                 PRECIPITACION_1h = ('PRECIPITACION_1h', 'mean'),
                                 PRECIPITACION_3h = ('PRECIPITACION_3h', 'mean'),
                                 DESC_TIEMPO = ('DESC_TIEMPO', lambda x: x.value_counts().index[0]))
bicimad_def = bicimad_def.reset_index()


In [10]:
bicimad_def.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 129331 entries, 0 to 129330
Data columns (total 17 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   ESTACION          129331 non-null  int64  
 1   ANIO              129331 non-null  int64  
 2   MES               129331 non-null  int64  
 3   DIA               129331 non-null  int64  
 4   TEMPORADA         129331 non-null  object 
 5   DIA_SEMANA        129331 non-null  int64  
 6   Es_Festivo        129331 non-null  int64  
 7   Es_FinSemana      129331 non-null  int64  
 8   DEMANDA           129331 non-null  int64  
 9   TEMP_MAX          129331 non-null  float64
 10  TEMP_MIN          129331 non-null  float64
 11  HUMEDAD           129331 non-null  float64
 12  VIENTO            129331 non-null  float64
 13  PRESION           129331 non-null  float64
 14  PRECIPITACION_1h  129331 non-null  float64
 15  PRECIPITACION_3h  129331 non-null  float64
 16  DESC_TIEMPO       12

In [11]:
#cat_cols= ['ANIO', 'MES', 'DIA_SEMANA', 'TEMPORADA','DESC_TIEMPO','AM_PM', 'Es_Festivo', 'Es_FinSemana']
#cat_cols= ['MES', 'DIA_SEMANA', 'TEMPORADA', 'Es_Festivo', 'Es_FinSemana','DESC_TIEMPO']
cat_cols= ['TEMPORADA', 'MES', 'DIA_SEMANA','Es_Festivo', 'Es_FinSemana', 'DESC_TIEMPO']

num_cols= ['TEMP_MAX','TEMP_MIN','HUMEDAD','VIENTO','PRESION']

In [14]:
bicimad = bicimad_def.copy()

#bicimad = datos.drop(['CLUSTER_FullCols','TEMPORADA_NUM','CLUSTER_soloDemanda','CLUSTER_soloGeo','DIA','ANIO','DESC_TIEMPO_detalle'], axis=1)


bicimad['FECHA'] =pd.to_datetime({'year': bicimad['ANIO'],
                                          'month': bicimad["MES"]
                                          ,'day':  bicimad['DIA']
                                          },
                                          format='%d-%m-%Y', errors='coerce')

bicimad['MES']   = bicimad['MES'].astype('category')
bicimad['DIA_SEMANA'] = bicimad['DIA_SEMANA'].astype('category')
bicimad['Es_Festivo'] = bicimad['Es_Festivo'].astype('category')
bicimad['Es_FinSemana'] = bicimad['Es_FinSemana'].astype('category')
bicimad['DESC_TIEMPO'] = bicimad['DESC_TIEMPO'].astype('category')

bicimad = pd.get_dummies(bicimad, columns=cat_cols, drop_first=True)

X = bicimad[['TEMP_MAX','TEMP_MIN','HUMEDAD','VIENTO','PRESION','PRECIPITACION_1h','PRECIPITACION_3h']]

scaler = StandardScaler()
bicimad[['TEMP_MAX','TEMP_MIN','HUMEDAD','VIENTO','PRESION','PRECIPITACION_1h','PRECIPITACION_3h']] = scaler.fit_transform(X)

bicimad = bicimad.drop(['ANIO','DIA'], axis=1)

bicimad

Unnamed: 0,ESTACION,DEMANDA,TEMP_MAX,TEMP_MIN,HUMEDAD,VIENTO,PRESION,PRECIPITACION_1h,PRECIPITACION_3h,FECHA,...,DIA_SEMANA_7,Es_Festivo_1,Es_FinSemana_1,DESC_TIEMPO_Clouds,DESC_TIEMPO_Drizzle,DESC_TIEMPO_Fog,DESC_TIEMPO_Mist,DESC_TIEMPO_Rain,DESC_TIEMPO_Snow,DESC_TIEMPO_Thunderstorm
0,1,39,-1.026930,-1.067735,0.725362,1.669834,1.987357,-0.250454,-0.203229,2018-01-01,...,0,1,0,1,0,0,0,0,0,0
1,1,75,-0.832167,-0.403777,0.907454,0.805625,2.143212,-0.250454,-0.203229,2018-01-02,...,0,0,0,1,0,0,0,0,0,0
2,1,68,-0.730798,-0.274473,1.818144,0.479964,2.107721,-0.250454,-0.203229,2018-01-03,...,0,0,0,1,0,0,0,0,0,0
3,1,77,-0.684101,-0.252680,1.492257,0.760986,1.297043,-0.250454,-0.203229,2018-01-04,...,0,0,0,1,0,0,0,0,0,0
4,1,55,-1.038320,-0.226529,1.506126,0.818633,-0.624397,1.093123,0.580350,2018-01-05,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129326,175,87,-0.286600,-0.597008,-0.082428,1.111414,0.251076,-0.250454,-0.203229,2020-02-25,...,0,0,0,1,0,0,0,0,0,0
129327,175,158,-0.548563,-0.551969,-0.413577,0.754113,0.557564,-0.250454,-0.203229,2020-02-26,...,0,0,0,0,0,0,0,0,0,0
129328,175,173,-0.180676,-0.533082,-0.386994,1.215193,0.574207,-0.250454,-0.203229,2020-02-27,...,0,0,0,0,0,0,0,0,0,0
129329,175,166,-0.184093,-0.621707,-0.369102,-0.882947,0.427748,-0.250454,-0.203229,2020-02-28,...,0,0,0,0,0,0,0,0,0,0


In [15]:
bicimad.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 129331 entries, 0 to 129330
Data columns (total 39 columns):
 #   Column                    Non-Null Count   Dtype         
---  ------                    --------------   -----         
 0   ESTACION                  129331 non-null  int64         
 1   DEMANDA                   129331 non-null  int64         
 2   TEMP_MAX                  129331 non-null  float64       
 3   TEMP_MIN                  129331 non-null  float64       
 4   HUMEDAD                   129331 non-null  float64       
 5   VIENTO                    129331 non-null  float64       
 6   PRESION                   129331 non-null  float64       
 7   PRECIPITACION_1h          129331 non-null  float64       
 8   PRECIPITACION_3h          129331 non-null  float64       
 9   FECHA                     129331 non-null  datetime64[ns]
 10  TEMPORADA_OTONO           129331 non-null  uint8         
 11  TEMPORADA_PRIMAVERA       129331 non-null  uint8         
 12  TE

In [16]:
seed = 99

models = list()
models.append(('RFR', RandomForestRegressor(random_state=seed)))
models.append(('GBR', GradientBoostingRegressor(random_state=seed)))
models.append(('LGBMR', LGBMRegressor(random_state=seed)))
models.append(('XGBR', XGBRegressor(random_state=seed)))

In [17]:
Estaciones = [145,58,148,157,163]

# Cluster 0 -> 145
# Cluster 1 -> 58
# Cluster 2 -> 148
# Cluster 3 -> 157
# Cluster 4 -> 163

In [18]:
from sklearn.model_selection import GridSearchCV

# Create the parameter grid based on the results of random search 
param_grid_RF = {
    'bootstrap': [True],
    'max_depth': [80, 90, 100, 110],
    'max_features': [2, 3,5,7],
    'min_samples_leaf': [1,2,3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100,500,1000]
}

param_grid_GB = {'learning_rate': [0.01,0.02,0.04, 0.1],
              'subsample'    : [0.9, 0.5, 0.2, 0.1],
              'n_estimators' : [100,500,1000, 1500],
               'max_depth'    : [80, 90, 100,110]
                 }

param_grid_LGBM = {
    'learning_rate': [0.005, 0.01],
    'n_estimators': [8,16,24],
    'num_leaves': [6,8,12,16],
    'boosting_type' : ['gbdt', 'dart'],
    'objective' : ['binary'],
    'max_bin':[255, 510],
    'random_state' : [500],
    'colsample_bytree' : [0.64, 0.65, 0.66],
    'subsample' : [0.7,0.75],
    'reg_alpha' : [1,1.2],
    'reg_lambda' : [1,1.2,1.4],
    }

param_grid_XGB = {
        'min_child_weight': [1, 5, 10],
        'gamma': [0.5, 1, 1.5, 2, 5],
        'subsample': [0.6, 0.8, 1.0],
        'colsample_bytree': [0.6, 0.8, 1.0],
        'max_depth': [80, 90, 100,110]
        }

## Tuneado modelo Random Forest

In [24]:
for i in Estaciones:
    
    bicimad_est = bicimad[bicimad['ESTACION'] == i]
    
    X_train = bicimad_est.loc[(bicimad["FECHA"] <= '2020-02-15 23:59:00')].drop(['DEMANDA',"FECHA",'ESTACION'], axis=1)
    X_test = bicimad_est.loc[(bicimad["FECHA"] >= '2020-02-16 00:00:00')].drop(['DEMANDA',"FECHA",'ESTACION'], axis=1)
    y_train = bicimad_est.loc[(bicimad["FECHA"] <= '2020-02-15 23:59:00')]['DEMANDA']
    y_test = bicimad_est.loc[(bicimad["FECHA"] >= '2020-02-16 00:00:00')]['DEMANDA']
    
    print()
    print(f'Estacion: {i}')
    print('Columnas: '+str(len(bicimad_est.columns)))
    
    # Create a based model
    rf = RandomForestRegressor(random_state=seed)
    # Instantiate the grid search model
    grid_search = GridSearchCV(estimator = rf, param_grid = param_grid_RF, cv = 3, n_jobs = -1, verbose = 2)
    grid_search.fit(X_train, y_train)
    print(grid_search.best_params_)


Estacion: 145
Columnas: 39
Fitting 3 folds for each of 720 candidates, totalling 2160 fits
{'bootstrap': True, 'max_depth': 80, 'max_features': 7, 'min_samples_leaf': 1, 'min_samples_split': 8, 'n_estimators': 1000}

Estacion: 58
Columnas: 39
Fitting 3 folds for each of 720 candidates, totalling 2160 fits
{'bootstrap': True, 'max_depth': 80, 'max_features': 7, 'min_samples_leaf': 2, 'min_samples_split': 10, 'n_estimators': 500}

Estacion: 148
Columnas: 39
Fitting 3 folds for each of 720 candidates, totalling 2160 fits
{'bootstrap': True, 'max_depth': 80, 'max_features': 7, 'min_samples_leaf': 1, 'min_samples_split': 8, 'n_estimators': 1000}

Estacion: 157
Columnas: 39
Fitting 3 folds for each of 720 candidates, totalling 2160 fits
{'bootstrap': True, 'max_depth': 80, 'max_features': 7, 'min_samples_leaf': 2, 'min_samples_split': 8, 'n_estimators': 100}

Estacion: 163
Columnas: 39
Fitting 3 folds for each of 720 candidates, totalling 2160 fits
{'bootstrap': True, 'max_depth': 80, 'max_

## Tuneado modelo Gradient Boosting

In [26]:
for i in Estaciones:
    
    bicimad_est = bicimad[bicimad['ESTACION'] == i]
    
    X_train = bicimad_est.loc[(bicimad["FECHA"] <= '2020-02-15 23:59:00')].drop(['DEMANDA',"FECHA",'ESTACION'], axis=1)
    X_test = bicimad_est.loc[(bicimad["FECHA"] >= '2020-02-16 00:00:00')].drop(['DEMANDA',"FECHA",'ESTACION'], axis=1)
    y_train = bicimad_est.loc[(bicimad["FECHA"] <= '2020-02-15 23:59:00')]['DEMANDA']
    y_test = bicimad_est.loc[(bicimad["FECHA"] >= '2020-02-16 00:00:00')]['DEMANDA']
    
    print()
    print(f'Estacion: {i}')
    print('Columnas: '+str(len(bicimad_est.columns)))
    
    # Create a based model
    GB = GradientBoostingRegressor(random_state=seed)
    # Instantiate the grid search model
    grid_search = GridSearchCV(estimator = GB, param_grid = param_grid_GB, cv = 3, n_jobs = -1, verbose = 2)
    grid_search.fit(X_train, y_train)
    print(grid_search.best_params_)
    



Estacion: 145
Columnas: 39
Fitting 3 folds for each of 256 candidates, totalling 768 fits
{'learning_rate': 0.01, 'max_depth': 80, 'n_estimators': 500, 'subsample': 0.2}

Estacion: 58
Columnas: 39
Fitting 3 folds for each of 256 candidates, totalling 768 fits
{'learning_rate': 0.02, 'max_depth': 80, 'n_estimators': 100, 'subsample': 0.2}

Estacion: 148
Columnas: 39
Fitting 3 folds for each of 256 candidates, totalling 768 fits
{'learning_rate': 0.02, 'max_depth': 80, 'n_estimators': 100, 'subsample': 0.2}

Estacion: 157
Columnas: 39
Fitting 3 folds for each of 256 candidates, totalling 768 fits
{'learning_rate': 0.02, 'max_depth': 80, 'n_estimators': 100, 'subsample': 0.2}

Estacion: 163
Columnas: 39
Fitting 3 folds for each of 256 candidates, totalling 768 fits
{'learning_rate': 0.02, 'max_depth': 80, 'n_estimators': 100, 'subsample': 0.2}


## Tuneado modelo LBGM

In [22]:
for i in Estaciones:
    
    bicimad_est = bicimad[bicimad['ESTACION'] == i]
    
    X_train = bicimad_est.loc[(bicimad["FECHA"] <= '2020-02-15 23:59:00')].drop(['DEMANDA',"FECHA",'ESTACION'], axis=1)
    X_test = bicimad_est.loc[(bicimad["FECHA"] >= '2020-02-16 00:00:00')].drop(['DEMANDA',"FECHA",'ESTACION'], axis=1)
    y_train = bicimad_est.loc[(bicimad["FECHA"] <= '2020-02-15 23:59:00')]['DEMANDA']
    y_test = bicimad_est.loc[(bicimad["FECHA"] >= '2020-02-16 00:00:00')]['DEMANDA']
    
    print()
    print(f'Estacion: {i}')
    print('Columnas: '+str(len(bicimad_est.columns)))
    
    # Create a based model
    LGBM = LGBMRegressor(random_state=seed)
    # Instantiate the grid search model
    grid_search = GridSearchCV(estimator = LGBM, param_grid = param_grid_LGBM, cv = 3, n_jobs = -1, verbose = 2)
    grid_search.fit(X_train, y_train)
    print(grid_search.best_params_)


Estacion: 145
Columnas: 39
Fitting 3 folds for each of 3456 candidates, totalling 10368 fits
{'boosting_type': 'gbdt', 'colsample_bytree': 0.64, 'learning_rate': 0.005, 'max_bin': 255, 'n_estimators': 8, 'num_leaves': 6, 'objective': 'binary', 'random_state': 500, 'reg_alpha': 1, 'reg_lambda': 1, 'subsample': 0.7}

Estacion: 58
Columnas: 39
Fitting 3 folds for each of 3456 candidates, totalling 10368 fits
{'boosting_type': 'gbdt', 'colsample_bytree': 0.64, 'learning_rate': 0.005, 'max_bin': 255, 'n_estimators': 8, 'num_leaves': 6, 'objective': 'binary', 'random_state': 500, 'reg_alpha': 1, 'reg_lambda': 1, 'subsample': 0.7}

Estacion: 148
Columnas: 39
Fitting 3 folds for each of 3456 candidates, totalling 10368 fits
{'boosting_type': 'gbdt', 'colsample_bytree': 0.64, 'learning_rate': 0.005, 'max_bin': 255, 'n_estimators': 8, 'num_leaves': 6, 'objective': 'binary', 'random_state': 500, 'reg_alpha': 1, 'reg_lambda': 1, 'subsample': 0.7}

Estacion: 157
Columnas: 39
Fitting 3 folds for ea

## Tuneado XGBoost

In [27]:
for i in Estaciones:
    
    bicimad_est = bicimad[bicimad['ESTACION'] == i]
    
    X_train = bicimad_est.loc[(bicimad["FECHA"] <= '2020-02-15 23:59:00')].drop(['DEMANDA',"FECHA",'ESTACION'], axis=1)
    X_test = bicimad_est.loc[(bicimad["FECHA"] >= '2020-02-16 00:00:00')].drop(['DEMANDA',"FECHA",'ESTACION'], axis=1)
    y_train = bicimad_est.loc[(bicimad["FECHA"] <= '2020-02-15 23:59:00')]['DEMANDA']
    y_test = bicimad_est.loc[(bicimad["FECHA"] >= '2020-02-16 00:00:00')]['DEMANDA']
    
    print()
    print(f'Estacion: {i}')
    print('Columnas: '+str(len(bicimad_est.columns)))
    
    # Create a based model
    XGB = XGBRegressor(random_state=seed)
    # Instantiate the grid search model
    grid_search = GridSearchCV(estimator = XGB, param_grid = param_grid_XGB, cv = 3, n_jobs = -1, verbose = 2)
    grid_search.fit(X_train, y_train)
    print(grid_search.best_params_)


Estacion: 145
Columnas: 39
Fitting 3 folds for each of 540 candidates, totalling 1620 fits
{'colsample_bytree': 1.0, 'gamma': 1, 'max_depth': 80, 'min_child_weight': 1, 'subsample': 0.8}

Estacion: 58
Columnas: 39
Fitting 3 folds for each of 540 candidates, totalling 1620 fits
{'colsample_bytree': 0.8, 'gamma': 2, 'max_depth': 80, 'min_child_weight': 1, 'subsample': 0.8}

Estacion: 148
Columnas: 39
Fitting 3 folds for each of 540 candidates, totalling 1620 fits
{'colsample_bytree': 1.0, 'gamma': 1, 'max_depth': 80, 'min_child_weight': 5, 'subsample': 1.0}

Estacion: 157
Columnas: 39
Fitting 3 folds for each of 540 candidates, totalling 1620 fits
{'colsample_bytree': 0.6, 'gamma': 1, 'max_depth': 80, 'min_child_weight': 5, 'subsample': 1.0}

Estacion: 163
Columnas: 39
Fitting 3 folds for each of 540 candidates, totalling 1620 fits
{'colsample_bytree': 1.0, 'gamma': 5, 'max_depth': 80, 'min_child_weight': 5, 'subsample': 0.6}


# NO BORRAR

In [None]:
# Create a based model
rf = RandomForestRegressor()
# Instantiate the grid search model
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, 
                          cv = 3, n_jobs = -1, verbose = 2)

In [None]:
for i in Estaciones:
    
    bicimad_est = bicimad[bicimad['ESTACION'] == i]
    
    X_train = bicimad_est.loc[(bicimad["FECHA"] <= '2020-02-15 23:59:00')].drop(['DEMANDA',"FECHA",'ESTACION'], axis=1)
    X_test = bicimad_est.loc[(bicimad["FECHA"] >= '2020-02-16 00:00:00')].drop(['DEMANDA',"FECHA",'ESTACION'], axis=1)
    y_train = bicimad_est.loc[(bicimad["FECHA"] <= '2020-02-15 23:59:00')]['DEMANDA']
    y_test = bicimad_est.loc[(bicimad["FECHA"] >= '2020-02-16 00:00:00')]['DEMANDA']
    
    print(f'Estacion: {i}')
    print('Columnas: '+str(len(bicimad_est.columns)))
    
    for name, model in models:
        resultado = model.fit(X_train, y_train)
        score = model.score(X_train, y_train, sample_weight=None)
        #score_cv = cross_val_score(model, X, y, cv=5)
        y_pred = model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        rmse = mse**(0.5)
        print(f'Model: {name} ') 
        print('R^2 train: {:0.2f}.'.format(score))
        print("MSE: %.2f" % mse)
        print("RMSE: %.2f" % rmse)
        print()

In [None]:
bicimad_est = bicimad[bicimad['ESTACION'] == 58]
    
X_train = bicimad_est.loc[(bicimad["FECHA"] <= '2020-02-15 23:59:00')].drop(['DEMANDA',"FECHA",'ESTACION'], axis=1)
X_test = bicimad_est.loc[(bicimad["FECHA"] >= '2020-02-16 00:00:00')].drop(['DEMANDA',"FECHA",'ESTACION'], axis=1)
y_train = bicimad_est.loc[(bicimad["FECHA"] <= '2020-02-15 23:59:00')]['DEMANDA']
y_test = bicimad_est.loc[(bicimad["FECHA"] >= '2020-02-16 00:00:00')]['DEMANDA']


model = RandomForestRegressor(random_state=seed,
                                 bootstrap = True,
                                 max_depth = 100,
                              max_features = 3,
                          min_samples_leaf = 3,
                         min_samples_split = 12,
                              n_estimators = 100)
resultado = model.fit(X_train, y_train)
predictions = resultado.predict(X_test)
score =model.score(X_test, y_test, sample_weight=None)


print(score)
plt.figure(figsize=(20,8))
plt.plot(y_test.values, label='Original')
plt.plot(predictions, color='red', label='Predicction')
plt.legend()

feat_importances = pd.DataFrame(model.feature_importances_, index=X_train.columns, columns=["Importance"])
feat_importances.sort_values(by='Importance', ascending=False, inplace=True)
feat_importances[:30]


In [None]:
from sklearn.model_selection import cross_validate
from sklearn.model_selection import RepeatedKFold

cv = RepeatedKFold(n_splits=3, n_repeats=5, random_state=123)
cv_scores = cross_validate(
                estimator = model,
                X         = X_train,
                y         = y_train,
                scoring   = ('r2', 'neg_root_mean_squared_error'),
                cv        = cv,
                return_train_score = True
            )

cv_scores = pd.DataFrame(cv_scores)
cv_scores