In [1]:
#!pip install xgboost

In [2]:
import os
import pandas as pd
import numpy as np

import psycopg2
import sqlalchemy 
from sqlalchemy import create_engine
from sqlalchemy import text

import xgboost as xgb
from sklearn.metrics import mean_absolute_error, mean_squared_error

from datetime import timedelta

In [3]:
def conexion_RDS():

    with open("credentials/bbdd.json") as f:
        bbdd = json.load(f)
    
    host=bbdd["host"]
    database=bbdd["database"]
    user=bbdd["user"]
    password=bbdd["password"]

    engine = create_engine(f"postgresql+psycopg2://{user}:{password}@{host}/{database}")
    #Bloque de test - cierra automáticamente la conexión
    with engine.connect() as conn:
        rows = conn.execute(text("SELECT 1")).all()
        df = pd.read_sql_table(table_name = "demanda_electrica", con = conn)
        print(df.head())

    connection = engine.connect()
    df = pd.read_sql_table(table_name = "demanda_electrica", con = connection)
    connection.close()
    engine.dispose()

    return df
    


In [4]:
def clean_df(df):
    df_clean=df.copy()
    #limpiamos dataframe de lo que no nos interesa
    df_clean["converted_datetime_utc_date"] = pd.to_datetime(df_clean["datetime_utc"], utc=True)
    df_clean = df_clean.drop(columns = ["datetime", "datetime_utc", "tz_time", "geo_id", "geo_name"])
    #introducimos 1 dias anterior 2 y 7
    df_clean["past_day"]=df_clean["value"].shift(1)
    df_clean["past_2day"]=df_clean["value"].shift(2)
    df_clean["past_7day"]=df_clean["value"].shift(7)
    #features especiales
    df_clean["day_week"] = df_clean["converted_datetime_utc_date"].dt.dayofweek    # 0=lunes, 6=domingo
    df_clean["day"] = df_clean["converted_datetime_utc_date"].dt.day
    df_clean["month"] = df_clean["converted_datetime_utc_date"].dt.month
    df_clean["year"] = df_clean["converted_datetime_utc_date"].dt.year
    #dias a futuro
    df_clean["nextday"]=df_clean["value"].shift(-1)
    df_clean["next2day"]=df_clean["value"].shift(-2)
    df_clean["next3day"]=df_clean["value"].shift(-3)

    return df_clean
    
    
    

In [5]:
def train_test(df):
    corte  = df_clean["converted_datetime_utc_date"].max() - pd.Timedelta(days=60)

    test_df = df_clean[df_clean["converted_datetime_utc_date"] >= corte]
    train_df = df_clean[df_clean["converted_datetime_utc_date"] <  corte]
    
    print(f"Train: \n Fecha min: {train_df["converted_datetime_utc_date"].min()} \n Fecha max: {train_df["converted_datetime_utc_date"].max()} \n Len: {len(train_df)}")
    print(f"Test: \n Fecha min: {test_df["converted_datetime_utc_date"].min()} \n Fecha max: {test_df["converted_datetime_utc_date"].max()} \n Len: {len(test_df)}")
        
    X_train=train_df[["value","past_day","past_2day","past_7day","day_week","day","month","year"]]
    y_train=train_df["nextday"]
    y_train2=train_df["next2day"]
    y_train3=train_df["next3day"]
    X_test=test_df[["value","past_day","past_2day","past_7day","day_week","day","month","year"]]
    y_test=test_df["nextday"]
    y_test2=test_df["next2day"]
    y_test3=test_df["next3day"]

    return X_train,X_test,y_train,y_train2,y_train3,y_test,y_test2,y_test3



In [6]:
def modelo(X_train,X_test,y_train,y_train2,y_train3,y_test,y_test2,y_test3):
    #para un dia
    model1 = xgb.XGBRegressor(
        n_estimators=500,
        learning_rate=0.05,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.8
    )
    
    model1.fit(X_train,y_train,eval_set=[(X_test,y_test)],verbose=False)
    
    #para 2 dias
    model2 = xgb.XGBRegressor(
        n_estimators=500,
        learning_rate=0.05,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.8
    )
    
    model2.fit(X_train,y_train2,eval_set=[(X_test,y_test2)],verbose=False)
    
    #para 3 dias
    model3 = xgb.XGBRegressor(
        n_estimators=500,
        learning_rate=0.05,
        max_depth=6,
        subsample=0.8,
        colsample_bytree=0.8
    )
    
    model3.fit(X_train,y_train3,eval_set=[(X_test,y_test3)],verbose=False)

    return(model1,model2,model3)
    

In [7]:
def pred_dias(dias,prediccion,model1,model2,model3,X_test,y_test):
    if dias==1:
        nextday=model1.predict(prediccion)[0]
        y_pred=model1.predict(X_test)
        mae = mean_absolute_error(y_test, y_pred)
        return(nextday,mae)
        
    if dias==2:
        next2days=model2.predict(prediccion)[0]
        y_pred2=model2.predict(X_test)
        mae = mean_absolute_error(y_test, y_pred2)
        return(next2days,mae)
        
    if dias==3:
        next3days=model3.predict(prediccion)[0]
        y_pred3=model3.predict(X_test)
        mae = mean_absolute_error(y_test, y_pred3)
        return(next3days,mae)

In [8]:
if not os.path.isfile("demanda.csv"):

    df=conexion_RDS()
    df.to_csv("demanda.csv",index=False)

else:
    df=pd.read_csv("demanda.csv")
    print(df.head(20))

        value                       datetime          datetime_utc  \
0   3187298.0  2014-01-01T00:00:00.000+01:00  2013-12-31T23:00:00Z   
1   3916726.0  2014-01-02T00:00:00.000+01:00  2014-01-01T23:00:00Z   
2   3957472.0  2014-01-03T00:00:00.000+01:00  2014-01-02T23:00:00Z   
3   3726719.0  2014-01-04T00:00:00.000+01:00  2014-01-03T23:00:00Z   
4   3511386.0  2014-01-05T00:00:00.000+01:00  2014-01-04T23:00:00Z   
5   3309795.0  2014-01-06T00:00:00.000+01:00  2014-01-05T23:00:00Z   
6   4153137.0  2014-01-07T00:00:00.000+01:00  2014-01-06T23:00:00Z   
7   4255484.0  2014-01-08T00:00:00.000+01:00  2014-01-07T23:00:00Z   
8   4298819.0  2014-01-09T00:00:00.000+01:00  2014-01-08T23:00:00Z   
9   4292097.0  2014-01-10T00:00:00.000+01:00  2014-01-09T23:00:00Z   
10  3867730.0  2014-01-11T00:00:00.000+01:00  2014-01-10T23:00:00Z   
11  3601448.0  2014-01-12T00:00:00.000+01:00  2014-01-11T23:00:00Z   
12  4334742.0  2014-01-13T00:00:00.000+01:00  2014-01-12T23:00:00Z   
13  4433714.0  2014-

In [9]:
df_testeo=clean_df(df)
df_clean=df_testeo.dropna()
X_train,X_test,y_train,y_train2,y_train3,y_test,y_test2,y_test3=train_test(df_clean)
model1,model2,model3=modelo(X_train,X_test,y_train,y_train2,y_train3,y_test,y_test2,y_test3)

Train: 
 Fecha min: 2014-01-07 23:00:00+00:00 
 Fecha max: 2025-07-13 22:00:00+00:00 
 Len: 4206
Test: 
 Fecha min: 2025-07-14 22:00:00+00:00 
 Fecha max: 2025-09-12 22:00:00+00:00 
 Len: 61


In [10]:
X_train

Unnamed: 0,value,past_day,past_2day,past_7day,day_week,day,month,year
7,4255484.0,4153137.0,3309795.0,3187298.0,1,7,1,2014
8,4298819.0,4255484.0,4153137.0,3916726.0,2,8,1,2014
9,4292097.0,4298819.0,4255484.0,3957472.0,3,9,1,2014
10,3867730.0,4292097.0,4298819.0,3726719.0,4,10,1,2014
11,3601448.0,3867730.0,4292097.0,3511386.0,5,11,1,2014
...,...,...,...,...,...,...,...,...
4208,8705226.0,8742427.0,8484419.0,9245079.0,2,9,7,2025
4209,8460125.0,8705226.0,8742427.0,9069531.0,3,10,7,2025
4210,7140880.0,8460125.0,8705226.0,8000435.0,4,11,7,2025
4211,6469054.0,7140880.0,8460125.0,7337905.0,5,12,7,2025


In [12]:
prediccion=df_testeo.iloc[-1:][["value","past_day","past_2day","past_7day","day_week","day","month","year"]]
dias=2
valor_predecido,error=pred_dias(dias,prediccion,model1,model2,model3,X_test,y_test)
print(f"la prediccion para {dias} dias seria: {valor_predecido} con el MAE: {error}")

la prediccion para 2 dias seria: 8092269.0 con el MAE: 472785.6967213115
