In [27]:
import sys
import os

# Add the src directory to the system path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

# Now you can import the functions from utils.py
from utils import load_predictions, get_gold_data, score_data, save_predictions

import loguru as logging
import pandas as pd
import glob
import joblib
import os
import numpy as np
import requests
import zipfile
import pendulum

# Initialize the logger
logger = logging.logger

# set plotly as the default plotting library
pd.options.plotting.backend = "plotly"

# Variables
LEVELS = [0.9, 0.95, 0.99]

In [28]:
def download_and_extract_model():
    github_token = os.environ.get('GITHUB_TOKEN')
    if not github_token:
        logger.warning("GITHUB_TOKEN not found in environment variables. Skipping model download.")
        return

    url = "https://api.github.com/repos/pedroachagas/energy_demand/actions/artifacts"
    headers = {"Authorization": f"token {github_token}"}

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        artifacts = response.json()["artifacts"]

        logger.info(f"Found {len(artifacts)} artifacts")
        artifact_names = [artifact["name"] for artifact in artifacts]
        logger.info(f"Available artifacts: {', '.join(artifact_names)}")

        model_artifacts = [artifact for artifact in artifacts if artifact["name"] == "trained-model"]
        if not model_artifacts:
            raise ValueError("No 'trained-model' artifact found")

        model_artifact = model_artifacts[0]
        logger.info(f"Downloading artifact: {model_artifact['name']}")
        download_url = model_artifact["archive_download_url"]
        zip_content = requests.get(download_url, headers=headers).content

        # Save as zip file
        with open("model.zip", "wb") as zip_file:
            zip_file.write(zip_content)

        # Extract zip file
        with zipfile.ZipFile("model.zip", "r") as zip_ref:
            zip_ref.extractall("model_folder")

        # Find the extracted joblib file
        joblib_files = glob.glob("model_folder/*.joblib")
        if not joblib_files:
            raise ValueError("No .joblib file found in the extracted contents")

        logger.info(f"Model file extracted: {joblib_files[0]}")

    except Exception as e:
        logger.error(f"Error in download_and_extract_model: {str(e)}")
        raise

def update_predictions(df_hist, preds):
    # Ensure 'ds' column is of datetime type in both dataframes
    df_hist['ds'] = pd.to_datetime(df_hist['ds'])
    preds['ds'] = pd.to_datetime(preds['ds'])

    # Merge the dataframes on 'ds'
    merged = pd.merge(preds, df_hist[['ds', 'y']], on='ds', how='left', suffixes=('_pred', '_hist'))

    # Update 'y' column in merged dataframe
    merged['y'] = merged['y_hist'].fillna(merged['y_pred'])

    # Drop unnecessary columns
    updated_preds = merged.drop(['y_pred', 'y_hist'], axis=1)

    # Ensure the columns are in the same order as the original preds dataframe
    updated_preds = updated_preds[preds.columns]

    return updated_preds

In [29]:
# Download and extract the trained model
download_and_extract_model()

# Find the extracted joblib file
joblib_files = glob.glob("model_folder/*.joblib")
if not joblib_files:
    logger.error("No .joblib file found. Unable to proceed with scoring.")

model_path = joblib_files[0]

# Load the trained model
try:
    model = joblib.load(model_path)
    logger.info(f"Model loaded successfully from {model_path}")
except Exception as e:
    logger.error(f"Error loading model: {str(e)}")
    raise

[32m2024-08-26 11:52:28.905[0m | [1mINFO    [0m | [36m__main__[0m:[36mdownload_and_extract_model[0m:[36m15[0m - [1mFound 2 artifacts[0m
[32m2024-08-26 11:52:28.905[0m | [1mINFO    [0m | [36m__main__[0m:[36mdownload_and_extract_model[0m:[36m17[0m - [1mAvailable artifacts: trained-model, trained-model[0m
[32m2024-08-26 11:52:28.906[0m | [1mINFO    [0m | [36m__main__[0m:[36mdownload_and_extract_model[0m:[36m24[0m - [1mDownloading artifact: trained-model[0m
[32m2024-08-26 11:52:28.905[0m | [1mINFO    [0m | [36m__main__[0m:[36mdownload_and_extract_model[0m:[36m17[0m - [1mAvailable artifacts: trained-model, trained-model[0m
[32m2024-08-26 11:52:28.906[0m | [1mINFO    [0m | [36m__main__[0m:[36mdownload_and_extract_model[0m:[36m24[0m - [1mDownloading artifact: trained-model[0m
[32m2024-08-26 11:52:32.718[0m | [1mINFO    [0m | [36m__main__[0m:[36mdownload_and_extract_model[0m:[36m41[0m - [1mModel file extracted: model_folder

In [30]:
# Get the latest data
df_hist = get_gold_data()
df_hist

[32m2024-08-26 11:52:32.797[0m | [1mINFO    [0m | [36mutils[0m:[36mget_gold_data[0m:[36m181[0m - [1mFetching data from Gold layer for date[0m


Unnamed: 0,ds,y,unique_id
0,2021-01-01,517292.1985,0
1,2021-01-02,663260.0435,0
2,2021-01-03,656525.3775,0
3,2021-01-04,796937.2295,0
4,2021-01-05,875973.0590,0
...,...,...,...
1328,2024-08-21,931550.5060,0
1329,2024-08-22,929876.4890,0
1330,2024-08-23,934259.6355,0
1331,2024-08-24,855712.3575,0


In [31]:
# Load existing predictions
existing_predictions = load_predictions()
existing_predictions

[32m2024-08-26 11:52:34.208[0m | [1mINFO    [0m | [36mutils[0m:[36mload_predictions[0m:[36m313[0m - [1mLoading predictions[0m
[32m2024-08-26 11:52:34.584[0m | [1mINFO    [0m | [36mutils[0m:[36mload_predictions[0m:[36m321[0m - [1mLoading predictions from: data/energy_consumption/predictions/predictions_20240826.parquet[0m


Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,2024-06-15,816371.131,0,,,,,,,,...,,,,,,,,,,
1,2024-06-16,727934.964,0,,,,,,,,...,,,,,,,,,,
2,2024-06-17,851129.980,0,,,,,,,,...,,,,,,,,,,
3,2024-06-18,886222.722,0,,,,,,,,...,,,,,,,,,,
4,2024-06-19,890934.761,0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,2024-10-20,,0,712766.358043,692467.552712,689902.6250,724257.242590,659443.844252,663527.283062,668631.581575,...,733314.735485,735089.810519,736864.885553,738639.960586,740415.035620,747402.363645,754389.691670,761377.019695,764870.683708,767665.614918
128,2024-10-21,,0,834457.443438,837472.069932,813441.5000,847489.372095,781134.929646,785218.368456,790322.666970,...,856546.864990,858321.940024,860097.015057,861872.090091,863647.165125,870634.493150,877621.821175,884609.149200,888102.813213,890897.744423
129,2024-10-22,,0,857685.169414,877190.085476,870827.6250,881413.783335,804362.655623,808446.094433,813550.392946,...,890471.276230,892246.351264,894021.426297,895796.501331,897571.576365,904558.904390,911546.232415,918533.560440,922027.224453,924822.155663
130,2024-10-23,,0,867049.261881,868184.734182,897830.4375,884675.751780,813726.748089,817810.186900,822914.485413,...,893733.244675,895508.319709,897283.394742,899058.469776,900833.544810,907820.872835,914808.200860,921795.528885,925289.192898,928084.124108


In [32]:
from utils import get_gold_data, train_model, split_data
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from load_dotenv import load_dotenv

import joblib

# Initialize logger
logger = logging.logger

# Load environment variables
load_dotenv()

LEVELS = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99]
RANDOM_STATE = 0
N_ESTIMATORS = 100

# Load environment variables
MODEL_START_DATE = os.environ["MODEL_START_DATE"]
MODEL_SPLIT_DATE = '2024-08-13'

print('Dates:\n')
print(f'MODEL_START_DATE: {MODEL_START_DATE}')
print(f'MODEL_SPLIT_DATE: {MODEL_SPLIT_DATE}')

data = get_gold_data()
df_train, df_oot = split_data(data, MODEL_START_DATE, MODEL_SPLIT_DATE)
models = [
        CatBoostRegressor(random_state=RANDOM_STATE, n_estimators=N_ESTIMATORS),
        LGBMRegressor(random_state=RANDOM_STATE, n_estimators=N_ESTIMATORS),
        XGBRegressor(random_state=RANDOM_STATE, n_estimators=N_ESTIMATORS),
        RandomForestRegressor(random_state=RANDOM_STATE, n_estimators=N_ESTIMATORS)
    ]

def create_model(data, models):
    # Train the models
    model_path = "../model_folder/local_model.joblib"
    model = train_model(data, models)
    joblib.dump(model, model_path)

    return model_path

model_path = create_model(df_train, models)

[32m2024-08-26 11:52:35.418[0m | [1mINFO    [0m | [36mutils[0m:[36mget_gold_data[0m:[36m181[0m - [1mFetching data from Gold layer for date[0m


Dates:

MODEL_START_DATE: 2021-02-01
MODEL_SPLIT_DATE: 2024-08-13


[32m2024-08-26 11:52:35.966[0m | [1mINFO    [0m | [36mutils[0m:[36mtrain_model[0m:[36m243[0m - [1mTraining model[0m


Learning rate set to 0.275978
0:	learn: 70774.6686066	total: 779us	remaining: 77.2ms
1:	learn: 58068.3323165	total: 1.19ms	remaining: 58.3ms
2:	learn: 50160.6278422	total: 1.85ms	remaining: 59.9ms
3:	learn: 43511.3050968	total: 2.48ms	remaining: 59.5ms
4:	learn: 38833.1035645	total: 3.14ms	remaining: 59.7ms
5:	learn: 35419.0805799	total: 3.76ms	remaining: 58.9ms
6:	learn: 33237.4263180	total: 4.47ms	remaining: 59.4ms
7:	learn: 31375.5576179	total: 5.07ms	remaining: 58.3ms
8:	learn: 29826.4886356	total: 5.92ms	remaining: 59.9ms
9:	learn: 28855.5233298	total: 6.62ms	remaining: 59.6ms
10:	learn: 27963.6056265	total: 7.2ms	remaining: 58.3ms
11:	learn: 27308.4279913	total: 7.78ms	remaining: 57.1ms
12:	learn: 26763.3891598	total: 8.48ms	remaining: 56.7ms
13:	learn: 26327.5390211	total: 9.29ms	remaining: 57.1ms
14:	learn: 26027.7827140	total: 9.96ms	remaining: 56.4ms
15:	learn: 25626.6781128	total: 10.5ms	remaining: 55.3ms
16:	learn: 25290.1811191	total: 11.1ms	remaining: 54.3ms
17:	learn: 24

In [33]:
# load the model
model = joblib.load(model_path)
model.predict(1)

Unnamed: 0,unique_id,ds,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor
0,0,2024-08-13,859596.398764,850939.848937,839761.3125,851227.58824


In [34]:
levels = [10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 99]
forecasts = model.predict(60, level=levels)
forecasts


Prediction intervals are calculated using 1-step ahead cross-validation, with a constant width for all horizons. To vary the error by horizon, pass PredictionIntervals(h=h) to the `prediction_intervals` argument when refitting the model.



Unnamed: 0,unique_id,ds,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,CatBoostRegressor-lo-80,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,0,2024-08-13,859596.398764,850939.848937,839761.3125,851227.58824,806273.884972,810357.323783,815461.622296,825670.219322,...,860285.081135,862060.156169,863835.231203,865610.306236,867385.38127,874372.709295,881360.03732,888347.365345,891841.029358,894635.960568
1,0,2024-08-14,875220.631804,856238.799488,855502.3125,860289.549795,821898.118012,825981.556822,831085.855336,841294.452362,...,869347.04269,871122.117724,872897.192758,874672.267791,876447.342825,883434.67085,890421.998875,897409.3269,900902.990913,903697.922123
2,0,2024-08-15,870283.608108,852988.14601,860784.625,857293.592145,816961.094316,821044.533127,826148.83164,836357.428666,...,866351.08504,868126.160074,869901.235108,871676.310141,873451.385175,880438.7132,887426.041225,894413.36925,897907.033263,900701.964473
3,0,2024-08-16,861137.980859,839947.253419,877438.4375,850462.025335,807815.467067,811898.905878,817003.204391,827211.801418,...,859519.51823,861294.593264,863069.668298,864844.743331,866619.818365,873607.14639,880594.474415,887581.80244,891075.466453,893870.397663
4,0,2024-08-17,764442.585897,773187.398652,807096.375,777417.84734,711120.072106,715203.510916,720307.809429,730516.406456,...,786475.340235,788250.415269,790025.490302,791800.565336,793575.64037,800562.968395,807550.29642,814537.624445,818031.288458,820826.219668
5,0,2024-08-18,671796.906147,689779.036894,712674.625,693787.78856,618474.392356,622557.831166,627662.129679,637870.726706,...,702845.281455,704620.356489,706395.431522,708170.506556,709945.58159,716932.909615,723920.23764,730907.565665,734401.229677,737196.160887
6,0,2024-08-19,784012.475135,795677.922422,816174.6875,810691.769685,730689.961343,734773.400154,739877.698667,750086.295693,...,819749.26258,821524.337614,823299.412647,825074.487681,826849.562715,833836.89074,840824.218765,847811.54679,851305.210803,854100.142013
7,0,2024-08-20,820548.768365,837593.494161,862734.125,859579.515265,767226.254573,771309.693384,776413.991897,786622.588923,...,868637.00816,870412.083194,872187.158227,873962.233261,875737.308295,882724.63632,889711.964345,896699.29237,900192.956383,902987.887593
8,0,2024-08-21,845948.581725,857932.816201,863036.1875,871205.35526,792626.067933,796709.506744,801813.805257,812022.402283,...,880262.848155,882037.923189,883812.998223,885588.073256,887363.14829,894350.476315,901337.80434,908325.132365,911818.796378,914613.727588
9,0,2024-08-22,861441.177374,866629.548279,862055.0,875694.910505,808118.663583,812202.102393,817306.400906,827514.997933,...,884752.4034,886527.478434,888302.553467,890077.628501,891852.703535,898840.03156,905827.359585,912814.68761,916308.351623,919103.282833


In [35]:
existing_predictions.groupby('ds')['CatBoostRegressor'].apply(lambda x: pd.isna(x).sum()).plot()

In [36]:
merged = existing_predictions.merge(forecasts, on=['ds', 'unique_id'], how='left', suffixes=('', '_forecast'))

for col in forecasts.columns:
    if col in ['ds', 'unique_id']:
        continue
    merged[col] = merged[col].fillna(merged[col + '_forecast'])

In [37]:
merged.groupby('ds')['CatBoostRegressor'].apply(lambda x: pd.isna(x).sum()).plot()

In [38]:
backfilled_preds = merged.drop([col + '_forecast' for col in forecasts.columns if col not in ['unique_id', 'ds']], axis=1)
backfilled_preds

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,2024-06-15,816371.131,0,,,,,,,,...,,,,,,,,,,
1,2024-06-16,727934.964,0,,,,,,,,...,,,,,,,,,,
2,2024-06-17,851129.980,0,,,,,,,,...,,,,,,,,,,
3,2024-06-18,886222.722,0,,,,,,,,...,,,,,,,,,,
4,2024-06-19,890934.761,0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127,2024-10-20,,0,712766.358043,692467.552712,689902.6250,724257.242590,659443.844252,663527.283062,668631.581575,...,733314.735485,735089.810519,736864.885553,738639.960586,740415.035620,747402.363645,754389.691670,761377.019695,764870.683708,767665.614918
128,2024-10-21,,0,834457.443438,837472.069932,813441.5000,847489.372095,781134.929646,785218.368456,790322.666970,...,856546.864990,858321.940024,860097.015057,861872.090091,863647.165125,870634.493150,877621.821175,884609.149200,888102.813213,890897.744423
129,2024-10-22,,0,857685.169414,877190.085476,870827.6250,881413.783335,804362.655623,808446.094433,813550.392946,...,890471.276230,892246.351264,894021.426297,895796.501331,897571.576365,904558.904390,911546.232415,918533.560440,922027.224453,924822.155663
130,2024-10-23,,0,867049.261881,868184.734182,897830.4375,884675.751780,813726.748089,817810.186900,822914.485413,...,893733.244675,895508.319709,897283.394742,899058.469776,900833.544810,907820.872835,914808.200860,921795.528885,925289.192898,928084.124108


In [39]:
backfilled_preds.loc[backfilled_preds['ds'].between('2024-08-13', '2024-08-25'), 'y'] = backfilled_preds.loc[backfilled_preds['ds'].between('2024-08-13', '2024-08-25'), 'y'] = np.nan
backfilled_preds = backfilled_preds.drop(backfilled_preds.loc[backfilled_preds['ds'].gt('2024-10-11')].index)
backfilled_preds


Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,2024-06-15,816371.131,0,,,,,,,,...,,,,,,,,,,
1,2024-06-16,727934.964,0,,,,,,,,...,,,,,,,,,,
2,2024-06-17,851129.980,0,,,,,,,,...,,,,,,,,,,
3,2024-06-18,886222.722,0,,,,,,,,...,,,,,,,,,,
4,2024-06-19,890934.761,0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,2024-10-07,,0,835403.689371,816476.905099,708308.7500,848569.050770,782081.175580,786164.614390,791268.912903,...,857626.543665,859401.618699,861176.693732,862951.768766,864726.843800,871714.171825,878701.499850,885688.827875,889182.491888,891977.423098
115,2024-10-08,,0,855254.904895,865086.493751,804361.9375,881233.646340,801932.391103,806015.829914,811120.128427,...,890291.139235,892066.214269,893841.289302,895616.364336,897391.439370,904378.767395,911366.095420,918353.423445,921847.087458,924642.018668
116,2024-10-09,,0,873553.527089,872980.046435,811485.8125,882957.684500,820231.013297,824314.452107,829418.750621,...,892015.177395,893790.252429,895565.327462,897340.402496,899115.477530,906102.805555,913090.133580,920077.461605,923571.125618,926366.056828
117,2024-10-10,,0,885442.576177,865368.084743,817702.1250,879821.229445,832120.062385,836203.501195,841307.799709,...,888878.722340,890653.797374,892428.872407,894203.947441,895979.022475,902966.350500,909953.678525,916941.006550,920434.670563,923229.601772


In [40]:
backfilled_preds.loc[backfilled_preds['ds'].between('2024-08-10', '2024-08-15')]

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
56,2024-08-10,795148.508,0,,,,,,,,...,,,,,,,,,,
57,2024-08-11,698073.596,0,,,,,,,,...,,,,,,,,,,
58,2024-08-12,805498.244,0,,,,,,,,...,,,,,,,,,,
59,2024-08-13,,0,859596.398764,850939.848937,839761.3125,851227.58824,806273.884972,810357.323783,815461.622296,...,860285.081135,862060.156169,863835.231203,865610.306236,867385.38127,874372.709295,881360.03732,888347.365345,891841.029358,894635.960568
60,2024-08-14,,0,875220.631804,856238.799488,855502.3125,860289.549795,821898.118012,825981.556822,831085.855336,...,869347.04269,871122.117724,872897.192758,874672.267791,876447.342825,883434.67085,890421.998875,897409.3269,900902.990913,903697.922123
61,2024-08-15,,0,870283.608108,852988.14601,860784.625,857293.592145,816961.094316,821044.533127,826148.83164,...,866351.08504,868126.160074,869901.235108,871676.310141,873451.385175,880438.7132,887426.041225,894413.36925,897907.033263,900701.964473


In [41]:
backfilled_preds.tail(10)

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
109,2024-10-02,,0,852874.999796,842990.327793,827229.9375,883976.191775,799552.486004,803635.924814,808740.223328,...,893033.68467,894808.759704,896583.834737,898358.909771,900133.984805,907121.31283,914108.640855,921095.96888,924589.632893,927384.564103
110,2024-10-03,,0,865294.664666,832279.583563,835079.375,881943.90568,811972.150875,816055.589685,821159.888198,...,891001.398575,892776.473609,894551.548642,896326.623676,898101.69871,905089.026735,912076.35476,919063.682785,922557.346798,925352.278007
111,2024-10-04,,0,853766.099312,825303.539127,814772.0625,868345.122255,800443.58552,804527.024331,809631.322844,...,877402.61515,879177.690184,880952.765217,882727.840251,884502.915285,891490.24331,898477.571335,905464.89936,908958.563372,911753.494582
112,2024-10-05,,0,795684.658214,757989.036783,746983.125,816786.776935,742362.144422,746445.583233,751549.881746,...,825844.26983,827619.344864,829394.419897,831169.494931,832944.569965,839931.89799,846919.226015,853906.55404,857400.218052,860195.149262
113,2024-10-06,,0,714372.49707,677737.625985,654663.75,723839.63348,661049.983279,665133.422089,670237.720602,...,732897.126375,734672.201409,736447.276443,738222.351476,739997.42651,746984.754535,753972.08256,760959.410585,764453.074598,767248.005808
114,2024-10-07,,0,835403.689371,816476.905099,708308.75,848569.05077,782081.17558,786164.61439,791268.912903,...,857626.543665,859401.618699,861176.693732,862951.768766,864726.8438,871714.171825,878701.49985,885688.827875,889182.491888,891977.423098
115,2024-10-08,,0,855254.904895,865086.493751,804361.9375,881233.64634,801932.391103,806015.829914,811120.128427,...,890291.139235,892066.214269,893841.289302,895616.364336,897391.43937,904378.767395,911366.09542,918353.423445,921847.087458,924642.018668
116,2024-10-09,,0,873553.527089,872980.046435,811485.8125,882957.6845,820231.013297,824314.452107,829418.750621,...,892015.177395,893790.252429,895565.327462,897340.402496,899115.47753,906102.805555,913090.13358,920077.461605,923571.125618,926366.056828
117,2024-10-10,,0,885442.576177,865368.084743,817702.125,879821.229445,832120.062385,836203.501195,841307.799709,...,888878.72234,890653.797374,892428.872407,894203.947441,895979.022475,902966.3505,909953.678525,916941.00655,920434.670563,923229.601772
118,2024-10-11,,0,879483.560061,849598.463143,799233.8125,867437.20173,826161.046269,830244.48508,835348.783593,...,876494.694625,878269.769659,880044.844692,881819.919726,883594.99476,890582.322785,897569.65081,904556.978835,908050.642847,910845.574057


In [42]:
# Update the predictions
updated_predictions = update_predictions(df_hist, backfilled_preds)
updated_predictions

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,2024-06-15,816371.131,0,,,,,,,,...,,,,,,,,,,
1,2024-06-16,727934.964,0,,,,,,,,...,,,,,,,,,,
2,2024-06-17,851129.980,0,,,,,,,,...,,,,,,,,,,
3,2024-06-18,886222.722,0,,,,,,,,...,,,,,,,,,,
4,2024-06-19,890934.761,0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,2024-10-07,,0,835403.689371,816476.905099,708308.7500,848569.050770,782081.175580,786164.614390,791268.912903,...,857626.543665,859401.618699,861176.693732,862951.768766,864726.843800,871714.171825,878701.499850,885688.827875,889182.491888,891977.423098
115,2024-10-08,,0,855254.904895,865086.493751,804361.9375,881233.646340,801932.391103,806015.829914,811120.128427,...,890291.139235,892066.214269,893841.289302,895616.364336,897391.439370,904378.767395,911366.095420,918353.423445,921847.087458,924642.018668
116,2024-10-09,,0,873553.527089,872980.046435,811485.8125,882957.684500,820231.013297,824314.452107,829418.750621,...,892015.177395,893790.252429,895565.327462,897340.402496,899115.477530,906102.805555,913090.133580,920077.461605,923571.125618,926366.056828
117,2024-10-10,,0,885442.576177,865368.084743,817702.1250,879821.229445,832120.062385,836203.501195,841307.799709,...,888878.722340,890653.797374,892428.872407,894203.947441,895979.022475,902966.350500,909953.678525,916941.006550,920434.670563,923229.601772


In [43]:
updated_predictions.loc[backfilled_preds['ds'].between('2024-08-10', '2024-08-30')]

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
56,2024-08-10,795148.508,0,,,,,,,,...,,,,,,,,,,
57,2024-08-11,698073.596,0,,,,,,,,...,,,,,,,,,,
58,2024-08-12,805498.244,0,,,,,,,,...,,,,,,,,,,
59,2024-08-13,844564.07,0,859596.398764,850939.848937,839761.3125,851227.58824,806273.884972,810357.323783,815461.622296,...,860285.081135,862060.156169,863835.231203,865610.306236,867385.38127,874372.709295,881360.03732,888347.365345,891841.029358,894635.960568
60,2024-08-14,856677.381,0,875220.631804,856238.799488,855502.3125,860289.549795,821898.118012,825981.556822,831085.855336,...,869347.04269,871122.117724,872897.192758,874672.267791,876447.342825,883434.67085,890421.998875,897409.3269,900902.990913,903697.922123
61,2024-08-15,870365.0395,0,870283.608108,852988.14601,860784.625,857293.592145,816961.094316,821044.533127,826148.83164,...,866351.08504,868126.160074,869901.235108,871676.310141,873451.385175,880438.7132,887426.041225,894413.36925,897907.033263,900701.964473
62,2024-08-16,895989.212,0,861137.980859,839947.253419,877438.4375,850462.025335,807815.467067,811898.905878,817003.204391,...,859519.51823,861294.593264,863069.668298,864844.743331,866619.818365,873607.14639,880594.474415,887581.80244,891075.466453,893870.397663
63,2024-08-17,832527.095,0,764442.585897,773187.398652,807096.375,777417.84734,711120.072106,715203.510916,720307.809429,...,786475.340235,788250.415269,790025.490302,791800.565336,793575.64037,800562.968395,807550.29642,814537.624445,818031.288458,820826.219668
64,2024-08-18,744501.777,0,671796.906147,689779.036894,712674.625,693787.78856,618474.392356,622557.831166,627662.129679,...,702845.281455,704620.356489,706395.431522,708170.506556,709945.58159,716932.909615,723920.23764,730907.565665,734401.229677,737196.160887
65,2024-08-19,880713.222,0,784012.475135,795677.922422,816174.6875,810691.769685,730689.961343,734773.400154,739877.698667,...,819749.26258,821524.337614,823299.412647,825074.487681,826849.562715,833836.89074,840824.218765,847811.54679,851305.210803,854100.142013


In [44]:
data = updated_predictions[['ds', 'y', 'unique_id']].dropna()
data

Unnamed: 0,ds,y,unique_id
0,2024-06-15,816371.1310,0
1,2024-06-16,727934.9640,0
2,2024-06-17,851129.9800,0
3,2024-06-18,886222.7220,0
4,2024-06-19,890934.7610,0
...,...,...,...
67,2024-08-21,931550.5060,0
68,2024-08-22,929876.4890,0
69,2024-08-23,934259.6355,0
70,2024-08-24,855712.3575,0


In [45]:
from mlforecast.utils import PredictionIntervals
model.update(data
)

TypeError: MLForecast.update() got an unexpected keyword argument 'prediction_intervals'

In [None]:
# Make predictions
forecast_df = model.predict(h=60, level=LEVELS)
forecast_df.head()


Prediction intervals are calculated using 1-step ahead cross-validation, with a constant width for all horizons. To vary the error by horizon, pass PredictionIntervals(h=h) to the `prediction_intervals` argument when refitting the model.



Unnamed: 0,unique_id,ds,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,CatBoostRegressor-lo-80,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,0,2024-08-26,892136.042255,894759.774012,913286.625,875502.80911,838813.528463,842896.967274,848001.265787,858209.862813,...,884560.302005,886335.377039,888110.452072,889885.527106,891660.60214,898647.930165,905635.25819,912622.586215,916116.250227,918911.181437
1,0,2024-08-27,903343.867499,938535.886747,950724.625,896651.166825,850021.353707,854104.792518,859209.091031,869417.688057,...,905708.65972,907483.734754,909258.809787,911033.884821,912808.959855,919796.28788,926783.615905,933770.94393,937264.607943,940059.539153
2,0,2024-08-28,903253.2379,909894.508262,927987.125,904956.81628,849930.724108,854014.162919,859118.461432,869327.058458,...,914014.309175,915789.384209,917564.459242,919339.534276,921114.60931,928101.937335,935089.26536,942076.593385,945570.257398,948365.188608
3,0,2024-08-29,910292.73336,880569.838494,890455.5,910689.598865,856970.219568,861053.658379,866157.956892,876366.553918,...,919747.09176,921522.166794,923297.241828,925072.316861,926847.391895,933834.71992,940822.047945,947809.37597,951303.039983,954097.971193
4,0,2024-08-30,909484.002948,820248.122717,856603.9375,916236.12301,856161.489157,860244.927967,865349.22648,875557.823507,...,925293.615905,927068.690939,928843.765973,930618.841006,932393.91604,939381.244065,946368.57209,953355.900115,956849.564128,959644.495338


In [25]:
# Merge the predictions with the original data
forecast_df = pd.concat([updated_predictions, forecast_df], axis=0).drop_duplicates(subset=['ds'], keep='last')
forecast_df

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,2024-06-15,816371.131,0,,,,,,,,...,,,,,,,,,,
1,2024-06-16,727934.964,0,,,,,,,,...,,,,,,,,,,
2,2024-06-17,851129.980,0,,,,,,,,...,,,,,,,,,,
3,2024-06-18,886222.722,0,,,,,,,,...,,,,,,,,,,
4,2024-06-19,890934.761,0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55,2024-10-20,,0,712766.358043,692467.552712,689902.6250,724257.242590,659443.844252,663527.283062,668631.581575,...,733314.735485,735089.810519,736864.885553,738639.960586,740415.035620,747402.363645,754389.691670,761377.019695,764870.683708,767665.614918
56,2024-10-21,,0,834457.443438,837472.069932,813441.5000,847489.372095,781134.929646,785218.368456,790322.666970,...,856546.864990,858321.940024,860097.015057,861872.090091,863647.165125,870634.493150,877621.821175,884609.149200,888102.813213,890897.744423
57,2024-10-22,,0,857685.169414,877190.085476,870827.6250,881413.783335,804362.655623,808446.094433,813550.392946,...,890471.276230,892246.351264,894021.426297,895796.501331,897571.576365,904558.904390,911546.232415,918533.560440,922027.224453,924822.155663
58,2024-10-23,,0,867049.261881,868184.734182,897830.4375,884675.751780,813726.748089,817810.186900,822914.485413,...,893733.244675,895508.319709,897283.394742,899058.469776,900833.544810,907820.872835,914808.200860,921795.528885,925289.192898,928084.124108


In [22]:
forecast_df.loc[forecast_df['ds'].between('2024-08-10', '2024-08-25')]

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
56,2024-08-10,795148.508,0,,,,,,,,...,,,,,,,,,,
57,2024-08-11,698073.596,0,,,,,,,,...,,,,,,,,,,
58,2024-08-12,805498.244,0,,,,,,,,...,,,,,,,,,,
59,2024-08-13,844564.07,0,859596.398764,850939.848937,839761.3125,851227.58824,806273.884972,810357.323783,815461.622296,...,860285.081135,862060.156169,863835.231203,865610.306236,867385.38127,874372.709295,881360.03732,888347.365345,891841.029358,894635.960568
60,2024-08-14,856677.381,0,875220.631804,856238.799488,855502.3125,860289.549795,821898.118012,825981.556822,831085.855336,...,869347.04269,871122.117724,872897.192758,874672.267791,876447.342825,883434.67085,890421.998875,897409.3269,900902.990913,903697.922123
61,2024-08-15,870365.0395,0,870283.608108,852988.14601,860784.625,857293.592145,816961.094316,821044.533127,826148.83164,...,866351.08504,868126.160074,869901.235108,871676.310141,873451.385175,880438.7132,887426.041225,894413.36925,897907.033263,900701.964473
62,2024-08-16,895989.212,0,861137.980859,839947.253419,877438.4375,850462.025335,807815.467067,811898.905878,817003.204391,...,859519.51823,861294.593264,863069.668298,864844.743331,866619.818365,873607.14639,880594.474415,887581.80244,891075.466453,893870.397663
63,2024-08-17,832527.095,0,764442.585897,773187.398652,807096.375,777417.84734,711120.072106,715203.510916,720307.809429,...,786475.340235,788250.415269,790025.490302,791800.565336,793575.64037,800562.968395,807550.29642,814537.624445,818031.288458,820826.219668
64,2024-08-18,744501.777,0,671796.906147,689779.036894,712674.625,693787.78856,618474.392356,622557.831166,627662.129679,...,702845.281455,704620.356489,706395.431522,708170.506556,709945.58159,716932.909615,723920.23764,730907.565665,734401.229677,737196.160887
65,2024-08-19,880713.222,0,784012.475135,795677.922422,816174.6875,810691.769685,730689.961343,734773.400154,739877.698667,...,819749.26258,821524.337614,823299.412647,825074.487681,826849.562715,833836.89074,840824.218765,847811.54679,851305.210803,854100.142013


In [26]:
# Save updated predictions
process_date = pendulum.now().to_date_string().replace("-", "")
save_predictions(forecast_df, process_date)

[32m2024-08-26 11:43:51.708[0m | [1mINFO    [0m | [36mutils[0m:[36msave_predictions[0m:[36m299[0m - [1mSaving predictions to Gold layer[0m
[32m2024-08-26 11:43:52.978[0m | [1mINFO    [0m | [36mutils[0m:[36msave_predictions[0m:[36m310[0m - [1mPredictions saved to Gold layer: abfs://data/energy_consumption/predictions/predictions_20240826.parquet[0m
