In [3]:
import os
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import VotingRegressor
import joblib

# Crear la carpeta para guardar los modelos si no existe
output_directory = 'modelos_voting'
os.makedirs(output_directory, exist_ok=True)

# Cargar los datos desde los archivos proporcionados
costos_df = pd.read_excel('Costos_Subprocesos.xlsx')
indicadores_minas_df = pd.read_excel('Indicadores_Minas.xlsx')
indicadores_carguio_df = pd.read_excel('Indicadores_Carguio.xlsx')
indicadores_perforacion_df = pd.read_excel('Indicadores_Perforación.xlsx')
indicadores_transporte_df = pd.read_excel('Indicadores_Transporte.xlsx')

# Convertir las fechas a formato datetime en costos
costos_df['Subproceso_Costo'] = pd.to_datetime(costos_df['Subproceso_Costo'])

# Convertir las fechas a formato datetime en los DataFrames de indicadores
for df in [indicadores_minas_df, indicadores_carguio_df, indicadores_perforacion_df, indicadores_transporte_df]:
    df['Fecha'] = pd.to_datetime(df['Fecha'])

# Unir todos los DataFrames de indicadores en uno solo
indicadores_df = indicadores_minas_df.copy()
indicadores_df = pd.merge(indicadores_df, indicadores_carguio_df, on='Fecha', how='inner', suffixes=('_minas', '_carguio'))
indicadores_df = pd.merge(indicadores_df, indicadores_perforacion_df, on='Fecha', how='inner', suffixes=('', '_perforacion'))
indicadores_df = pd.merge(indicadores_df, indicadores_transporte_df, on='Fecha', how='inner', suffixes=('', '_transporte'))

# Unir el DataFrame de costos con el DataFrame combinado de indicadores
df_unificado = pd.merge(costos_df, indicadores_df, left_on='Subproceso_Costo', right_on='Fecha', how='inner')
df_unificado = df_unificado.drop(columns=['Fecha'])  # Eliminar la columna duplicada de Fecha

# Calcular la correlación entre todos los subprocesos y los indicadores usando Spearman
subprocesos_cols = costos_df.columns[1:]  # Excluyendo la columna de fecha
indicadores_cols = indicadores_df.columns[1:]  # Excluyendo la columna de fecha
correlation_matrix = df_unificado.corr(method='spearman')
correlation_filtered = correlation_matrix.loc[subprocesos_cols, indicadores_cols]
top_correlations = correlation_filtered.apply(lambda x: x.nlargest(3), axis=1)

# Definir los modelos base para el Voting Regressor
modelos_base = [
    ('lr', LinearRegression()),
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('gb', GradientBoostingRegressor(n_estimators=100, random_state=42)),
    ('xgb', XGBRegressor(n_estimators=100, random_state=42)),
    ('lgbm', LGBMRegressor(n_estimators=100, random_state=42)),
    ('mlp', MLPRegressor(hidden_layer_sizes=(64, 32), max_iter=500, random_state=42)),
    ('svm', SVR())
]

# Crear el Voting Regressor
voting_regressor = VotingRegressor(estimators=modelos_base)

# Crear un diccionario para almacenar los resultados del Voting Regressor
resultados_voting = {}
indicadores_utilizados = {}
predicciones_por_subproceso = {}

# Escalar los datos
scaler = StandardScaler()

# Crear imputador para reemplazar NaNs con la media
imputer = SimpleImputer(strategy='mean')

X_train_scaled = scaler.fit_transform(imputer.fit_transform(df_unificado[indicadores_cols]))
X_test_scaled = scaler.transform(imputer.transform(df_unificado[indicadores_cols]))

# Realizar predicciones usando el Voting Regressor
for subproceso in subprocesos_cols:
    indicadores_seleccionados = top_correlations.loc[subproceso].dropna().index.tolist()
    
    if len(indicadores_seleccionados) > 0:
        # Guardar los indicadores seleccionados
        indicadores_utilizados[subproceso] = indicadores_seleccionados
        
        # Utilizar todos los indicadores seleccionados para el entrenamiento
        X_train = df_unificado.loc[df_unificado['Subproceso_Costo'] < '2022-01-01', indicadores_seleccionados]
        y_train = df_unificado.loc[df_unificado['Subproceso_Costo'] < '2022-01-01', subproceso]
        X_test = df_unificado.loc[(df_unificado['Subproceso_Costo'] >= '2022-01-01') & (df_unificado['Subproceso_Costo'] < '2023-01-01'), indicadores_seleccionados]
        
        # Imputar valores faltantes en los conjuntos de datos
        X_train = imputer.fit_transform(X_train)
        X_test = imputer.transform(X_test)
        
        # Verificar si y_train tiene valores constantes
        if y_train.nunique() == 1:
            print(f"Advertencia: El subproceso {subproceso} tiene un objetivo constante en el conjunto de entrenamiento.")
            continue  # Omitir este subproceso
        
        # Entrenar el Voting Regressor
        voting_regressor.fit(X_train, y_train)
        
        # Guardar el modelo entrenado para futuras predicciones
        joblib.dump(voting_regressor, os.path.join(output_directory, f'{subproceso}_voting_model.pkl'))
        
        # Predecir para 2022
        y_pred_2022 = voting_regressor.predict(X_test)

        costo_real_2022 = df_unificado.loc[(df_unificado['Subproceso_Costo'] >= '2022-01-01') & (df_unificado['Subproceso_Costo'] < '2023-01-01'), subproceso].values
        
        # Calcular las diferencias y diferencias porcentuales
        diferencias = y_pred_2022 - costo_real_2022
        diferencias_pct = [(dif / real) * 100 if real != 0 else np.inf for dif, real in zip(diferencias, costo_real_2022)]
        
        # Guardar resultados en el diccionario
        for mes, pred, real, dif, dif_pct in zip(pd.date_range('2022-01-01', '2022-12-01', freq='MS').strftime("%B-%Y"), y_pred_2022, costo_real_2022, diferencias, diferencias_pct):
            resultados_voting.setdefault(subproceso, {}).update({
                f'Predicción {mes}': pred,
                f'Costo Real {mes}': real,
                f'Diferencia {mes}': dif,
                f'Diferencia % {mes}': dif_pct
            })
        
        # Guardar predicciones por subproceso para el segundo archivo
        predicciones_por_subproceso[subproceso] = y_pred_2022

# Convertir los resultados a un DataFrame
resultados_voting_df = pd.DataFrame(resultados_voting).T

# Crear un DataFrame con las predicciones por subproceso y mes
predicciones_df = pd.DataFrame(predicciones_por_subproceso, index=pd.date_range('2022-01-01', '2022-12-01', freq='MS').strftime("%B-%Y"))

# Guardar los resultados del Voting Regressor en Excel
output_file_path = 'Resultados_Voting_Regressor.xlsx'
costos_output_path = 'costos_output.xlsx'

with pd.ExcelWriter(output_file_path) as writer:
    resultados_voting_df.to_excel(writer, sheet_name='Voting Regressor Resultados')

with pd.ExcelWriter(costos_output_path) as writer:
    predicciones_df.to_excel(writer, sheet_name='Predicciones')

# Imprimir la precisión total del Voting Regressor
precision_voting = resultados_voting_df.replace([np.inf, 0], np.nan).mean().mean()
print(f"Precisión total del Voting Regressor: {precision_voting:.2f}%")

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000026 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 28
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 2096037.888204




[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 0
[LightGBM] [Info] Start training from score 8927.441654




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000038 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 54
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 71561.957995




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000143 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 74
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 663863.909143




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 74
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 206333.743343




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000029 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 27
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 44222.329115




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000021 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 158.248964
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000011 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 25
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 1
[LightGBM] [Info] Start training from score -8177.180524




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 59
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score -1053681.014965




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000018 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 59
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 26804.568747




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000023 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 48
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 3099379.349032




[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 0
[LightGBM] [Info] Start training from score 1592.648905




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000025 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 50
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 267413.581364




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000054 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 74
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 763746.741569




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000033 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 26
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 99843.868075




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000026 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 53
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score -424379.378631




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000015 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 60
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score -28511.950993




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000015 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 59
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 1140245.026408




[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 0
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000029 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 43
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score -452614.231734




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000027 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 33
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 2
[LightGBM] [Info] Start training from score 4419.559474




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000031 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 30
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 1635327.253521




[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 0
[LightGBM] [Info] Start training from score 2395.057824




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 62
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 76408.711185




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 70
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 172152.482173




[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 0
[LightGBM] [Info] Start training from score 7.215477




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000031 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 25
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 32488.039778




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000025 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 33
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score -299383.182191




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000029 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 29
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 12530.959991




[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 0
[LightGBM] [Info] Start training from score 620843.702766




Advertencia: El subproceso SERVICIOS TERCEROS LLANO tiene un objetivo constante en el conjunto de entrenamiento.
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000030 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 22
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 145.688909




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 63
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 4866286.535211




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000020 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 9
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 1
[LightGBM] [Info] Start training from score 2607.664278




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000023 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 36
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 7344.702252




[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 0
[LightGBM] [Info] Start training from score 2611.895136




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000020 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 61
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 3242788.214789




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000022 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 50
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 259986.235035




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000028 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 17
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 2
[LightGBM] [Info] Start training from score 114131.410982




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 74
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 654518.553037




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000025 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 52
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score -2132579.272007




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 32
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 2
[LightGBM] [Info] Start training from score 58030.332334




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 61
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 10313200.985915




[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 0
[LightGBM] [Info] Start training from score 2177.378727




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000028 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 50
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 525884.621699




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000019 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 70
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 1201823.479313




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000037 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 25
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 206173.385288




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 61
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 514469.238996




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000017 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 65
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 1805589.748019




[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 0
[LightGBM] [Info] Start training from score 51086.248239




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000022 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 50
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 103318.359455




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000015 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 74
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 262665.343709




Advertencia: El subproceso TRONADURA TESORO NOR ESTE tiene un objetivo constante en el conjunto de entrenamiento.
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000031 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 25
[LightGBM] [Info] Number of data points in the train set: 71, number of used features: 3
[LightGBM] [Info] Start training from score 43871.248519
Precisión total del Voting Regressor: 476106257578.32%




In [5]:
import numpy as np

# Reemplaza los valores infinitos o extremadamente grandes en las diferencias porcentuales
resultados_voting_df = resultados_voting_df.replace([np.inf, -np.inf], np.nan)

# Limita el rango de las diferencias porcentuales para evitar distorsiones
diferencias_limited = resultados_voting_df.applymap(lambda x: np.nan if abs(x) > 100 else x)

# Calcular la precisión total como la media de todas las diferencias (excluyendo valores fuera del rango establecido)
precision_voting = diferencias_limited.mean().mean()
print(f"Precisión total del Voting Regressor: {precision_voting:.2f}%")

Precisión total del Voting Regressor: -10.04%


  diferencias_limited = resultados_voting_df.applymap(lambda x: np.nan if abs(x) > 100 else x)


In [7]:
import os
print(os.getcwd())

C:\Users\l.vilches\Documents\GitHub\Hackathon2-2024\1. Desarollo\3. Analisis de bases de indicadores
