In [1]:
# Usa un modelo de Gradient Boosted Trees
# para predecir el consumo eléctrico
# y la cantidad de usuarios.

import pandas as pd
import numpy as np

import xgboost as xgb

from joblib import load

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [2]:
# Datos.
path_drive = "/content/drive/MyDrive/Colab/Subsidio_electricidad/" 
path_data = path_drive + "data/"
fname = "data.csv"

# Escenarios
RCP = ["RCP4p5", "RCP8p5"]
fut = [2030, 2050, 2070]

# Directorios.
#dirs = ["Actual"] + RCP
dirs = [RCP[1]]
csv = ".csv"
path_data = [path_drive + "data/" + x + "/data." for x in dirs]
f_path = []
for p in path_data:
    f_path += [p + str(x) + csv for x in fut]
model_name = ["reg_model_consumo.joblib",
    "reg_model_usuarios.joblib" ]

# Cargamos los modelo entrenado.
reg_model = [load(path_drive + model_name[0]),
    load(path_drive + model_name[1]) ]

In [13]:
# Iteramos para todos los escenarios.
for i in f_path:
    data = pd.read_csv(i)
    # Unimos las tarifas 1 y DAC.
    data["Consumo_1*"] += data["Consumo_DAC"]
    data["Usuarios_1*"] += data["Usuarios_DAC"]
    data.drop(["Consumo_DAC", "Usuarios_DAC"],
        axis = 1, inplace = True)

    # Escogemos el conjunto de features para la predicción.
    X = data[["Tmean", "CDD_mean", "Pre_Tmean", "$GLP",
        "Poblacion", "PIB", "Año", "CVE_INEGI"]].copy()
    X.rename({"Poblacion": "Población",
        "Tmean": "T_mean"}, axis = 1, inplace = True)

    # Sacamos el logaritmo.
    X[["Población", "PIB"]] = np.log(
        X[["Población", "PIB"]])

    # Hacemos la predicción con los modelos
    data["Consumo_1*"] = np.exp(reg_model[0].predict(X))
    data["Usuarios_1*"] = reg_model[1].predict(X)
    
    data.to_csv(i, index = False)