In [29]:
import sys
from pathlib import Path

# Add the src folder to the system path
sys.path.append(str(Path().resolve().parent / 'src'))

from config.config import config

import loguru as logging
import pandas as pd
import pyarrow.dataset as ds
import glob
import joblib
import os
import numpy as np
import requests
import zipfile
import pendulum
from adlfs import AzureBlobFileSystem

# Initialize the logger
logger = logging.logger

# set plotly as the default plotting library
pd.options.plotting.backend = "plotly"

In [30]:
def get_azure_blob_fs() -> AzureBlobFileSystem:
    """
    Initialize and return an AzureBlobFileSystem object for the configured Azure storage account.

    Returns:
        AzureBlobFileSystem: The Azure Blob File System object.
    """
    return AzureBlobFileSystem(
        account_name=config.ACC_NAME,
        account_key=config.ACC_KEY,
        container_name=config.CONTAINER_NAME
    )

def load_gold_data() -> pd.DataFrame:
    """
    Load the latest Gold layer data from Azure Blob Storage into a Pandas DataFrame.

    Returns:
        pd.DataFrame: The Gold layer data with columns renamed and sorted by date.
    """
    gold_blob_path = f"{config.CONTAINER_NAME}/{config.FOLDER}/gold/"

    # Find the latest file
    logger.info(f"Finding the latest file in {gold_blob_path}")
    abfs = get_azure_blob_fs()
    files = abfs.ls(gold_blob_path)
    latest_file = max(files, key=lambda x: x.split("/")[-1])
    gold_blob_path = latest_file

    # Load the Gold Parquet file
    logger.info(f"Loading Gold data from {gold_blob_path}")
    pqdata = ds.dataset(gold_blob_path, filesystem=abfs)

    return (
        pqdata
        .to_table()
        .to_pandas()
        .reset_index(drop=True)
        .assign(
            date=lambda x: pd.to_datetime(x["date"]),
            unique_id=0
        )
        .rename(columns={
            "date": "ds",
            "daily_carga_mw": "y",
        })
        .sort_values("ds")
    )

def load_predictions() -> pd.DataFrame:
    """
    Load the latest predictions data from Azure Blob Storage into a Pandas DataFrame.

    Returns:
        pd.DataFrame: The predictions data sorted by date.
    """
    predictions_blob_path = f"{config.CONTAINER_NAME}/{config.FOLDER}/predictions/"

    # Find the latest file
    abfs = get_azure_blob_fs()
    files = abfs.ls(predictions_blob_path)
    latest_file = max(files, key=lambda x: x.split("/")[-1])
    predictions_blob_path = latest_file

    # Load the predictions Parquet file
    logger.info(f"Loading predictions from {predictions_blob_path}")
    pqdata = ds.dataset(predictions_blob_path, filesystem=abfs)

    return (
        pqdata
        .to_table()
        .to_pandas()
        .reset_index(drop=True)
        .assign(
            ds=lambda x: pd.to_datetime(x["ds"]),
        )
        .sort_values("ds")
    )

In [31]:
def download_and_extract_model():
    github_token = os.environ.get('GITHUB_TOKEN')
    if not github_token:
        logger.warning("GITHUB_TOKEN not found in environment variables. Skipping model download.")
        return

    url = "https://api.github.com/repos/pedroachagas/energy_demand/actions/artifacts"
    headers = {"Authorization": f"token {github_token}"}

    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        artifacts = response.json()["artifacts"]

        logger.info(f"Found {len(artifacts)} artifacts")
        artifact_names = [artifact["name"] for artifact in artifacts]
        logger.info(f"Available artifacts: {', '.join(artifact_names)}")

        model_artifacts = [artifact for artifact in artifacts if artifact["name"] == "trained-model"]
        if not model_artifacts:
            raise ValueError("No 'trained-model' artifact found")

        model_artifact = model_artifacts[0]
        logger.info(f"Downloading artifact: {model_artifact['name']}")
        download_url = model_artifact["archive_download_url"]
        zip_content = requests.get(download_url, headers=headers).content

        # Save as zip file
        with open("model.zip", "wb") as zip_file:
            zip_file.write(zip_content)

        # Extract zip file
        with zipfile.ZipFile("model.zip", "r") as zip_ref:
            zip_ref.extractall("model_folder")

        # Find the extracted joblib file
        joblib_files = glob.glob("model_folder/*.joblib")
        if not joblib_files:
            raise ValueError("No .joblib file found in the extracted contents")

        logger.info(f"Model file extracted: {joblib_files[0]}")

    except Exception as e:
        logger.error(f"Error in download_and_extract_model: {str(e)}")
        raise

def update_predictions(df_hist, preds):
    # Ensure 'ds' column is of datetime type in both dataframes
    df_hist['ds'] = pd.to_datetime(df_hist['ds'])
    preds['ds'] = pd.to_datetime(preds['ds'])

    # Merge the dataframes on 'ds'
    merged = pd.merge(preds, df_hist[['ds', 'y']], on='ds', how='left', suffixes=('_pred', '_hist'))

    # Update 'y' column in merged dataframe
    merged['y'] = merged['y_hist'].fillna(merged['y_pred'])

    # Drop unnecessary columns
    updated_preds = merged.drop(['y_pred', 'y_hist'], axis=1)

    # Ensure the columns are in the same order as the original preds dataframe
    updated_preds = updated_preds[preds.columns]

    return updated_preds

In [32]:
# # Download and extract the trained model
# download_and_extract_model()

# # Find the extracted joblib file
# joblib_files = glob.glob("model_folder/*.joblib")
# if not joblib_files:
#     logger.error("No .joblib file found. Unable to proceed with scoring.")

# model_path = joblib_files[0]

# # Load the trained model
# try:
#     model = joblib.load(model_path)
#     logger.info(f"Model loaded successfully from {model_path}")
# except Exception as e:
#     logger.error(f"Error loading model: {str(e)}")
#     raise

model_path = "model_folder/model.joblib"

In [33]:
# Get the latest data
df_hist = load_gold_data()
df_hist

[32m2024-08-27 13:55:41.863[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_gold_data[0m:[36m24[0m - [1mFinding the latest file in data/energy_consumption/gold/[0m
[32m2024-08-27 13:55:42.769[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_gold_data[0m:[36m31[0m - [1mLoading Gold data from data/energy_consumption/gold/aggregated_data_20240827.parquet[0m


Unnamed: 0,ds,y,unique_id
0,2021-01-01,517292.1985,0
1,2021-01-02,663260.0435,0
2,2021-01-03,656525.3775,0
3,2021-01-04,796937.2295,0
4,2021-01-05,875973.0590,0
...,...,...,...
1329,2024-08-22,929876.4890,0
1330,2024-08-23,934259.6355,0
1331,2024-08-24,855712.3575,0
1332,2024-08-25,748091.7795,0


In [34]:
# Load existing predictions
existing_predictions = load_predictions()
existing_predictions

[32m2024-08-27 13:55:46.313[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_predictions[0m:[36m66[0m - [1mLoading predictions from data/energy_consumption/predictions/predictions_20240827.parquet[0m


Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,2024-06-15,816371.131,0,,,,,,,,...,,,,,,,,,,
1,2024-06-16,727934.964,0,,,,,,,,...,,,,,,,,,,
2,2024-06-17,851129.980,0,,,,,,,,...,,,,,,,,,,
3,2024-06-18,886222.722,0,,,,,,,,...,,,,,,,,,,
4,2024-06-19,890934.761,0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128,2024-10-21,,0,803651.407382,769239.075103,816328.0625,798513.861240,762473.822842,763912.531213,765710.916677,...,805537.953570,806356.363486,807174.773403,807993.183319,808811.593235,813422.510235,818033.427235,822644.344235,824949.802735,826794.169535
129,2024-10-22,,0,832695.684901,816920.088404,880758.5000,839956.220215,791518.100361,792956.808732,794755.194196,...,846980.312545,847798.722461,848617.132378,849435.542294,850253.952210,854864.869210,859475.786210,864086.703210,866392.161710,868236.528510
130,2024-10-23,,0,823444.996255,826470.780959,893525.3750,843281.566845,782267.411714,783706.120086,785504.505550,...,850305.659175,851124.069091,851942.479008,852760.888924,853579.298840,858190.215840,862801.132840,867412.049840,869717.508340,871561.875140
131,2024-10-24,,0,815474.443484,821612.337913,891047.3125,831031.642050,774296.858944,775735.567315,777533.952779,...,838055.734380,838874.144296,839692.554213,840510.964129,841329.374045,845940.291045,850551.208045,855162.125045,857467.583545,859311.950345


# Debug database

In [7]:
import os
import numpy as np
from numba import njit
from mlforecast import MLForecast
from mlforecast.utils import PredictionIntervals
from statsmodels.tsa.seasonal import seasonal_decompose
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
import joblib
import pandas as pd
from typing import Tuple, List
from numpy.typing import NDArray

@njit # type: ignore
def diff(x: NDArray[np.float64], lag: int) -> NDArray[np.float64]:
    """
    Compute the difference between each element and its lag.

    Args:
        x (NDArray[np.float64]): Input array.
        lag (int): Number of periods to lag.

    Returns:
        NDArray[np.float64]: Array of lag differences.
    """
    x2 = np.full_like(x, np.nan)
    for i in range(lag, len(x)):
        x2[i] = x[i] - x[i-lag]
    return x2

@njit # type: ignore
def rolling_mean(x: NDArray[np.float64], window: int) -> NDArray[np.float64]:
    """
    Compute the rolling mean over a specified window.

    Args:
        x (NDArray[np.float64]): Input array.
        window (int): Rolling window size.

    Returns:
        NDArray[np.float64]: Array of rolling means.
    """
    x2 = np.full_like(x, np.nan)
    for i in range(window - 1, len(x)):
        x2[i] = np.mean(x[i-window+1:i+1])
    return x2

@njit # type: ignore
def rolling_std(x: NDArray[np.float64], window: int) -> NDArray[np.float64]:
    """
    Compute the rolling standard deviation over a specified window.

    Args:
        x (NDArray[np.float64]): Input array.
        window (int): Rolling window size.

    Returns:
        NDArray[np.float64]: Array of rolling standard deviations.
    """
    x2 = np.full_like(x, np.nan)
    for i in range(window - 1, len(x)):
        x2[i] = np.std(x[i-window+1:i+1])
    return x2

def seasonal_decomposition_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Add seasonal decomposition features (trend, seasonality, residual) to the dataframe.

    Args:
        df (pd.DataFrame): Input dataframe with 'ds' as date and 'y' as target columns.

    Returns:
        pd.DataFrame: Dataframe with additional columns for trend, seasonality, and residual.
    """
    result = seasonal_decompose(df.set_index('ds')['y'], model='additive')
    df['trend'] = result.trend
    df['seasonality'] = result.seasonal
    df['residual'] = result.resid
    return df

def train_model(df_train: pd.DataFrame, models: List[object]) -> MLForecast:
    """
    Train the machine learning models using MLForecast.

    Args:
        df_train (pd.DataFrame): Training dataset.
        models (List[object]): List of models to be trained.

    Returns:
        MLForecast: Trained MLForecast object.
    """
    logger.info("Training model")
    return MLForecast(
        models=models,
        freq='D',
        lags=[1, 7, 14, 28],
        lag_transforms={
            1: [
                (rolling_mean, 3),
                (rolling_mean, 7),
                (rolling_mean, 14),
                (rolling_mean, 28),
                (rolling_std, 7),
                (rolling_std, 14),
                (rolling_std, 28),
                (diff, 1),
                (diff, 7),
                (diff, 15),
                (diff, 28)
            ],
        },
        date_features=[
            'month',
            'day',
            'week',
            'dayofyear',
            'quarter',
            'dayofweek',
        ],
        num_threads=12
    ).fit(
        df_train,
        id_col='unique_id',
        time_col='ds',
        target_col='y',
        static_features=[],
        prediction_intervals=PredictionIntervals(n_windows=3, method="conformal_distribution"),
        fitted=True
    )

def split_data(data: pd.DataFrame, start_date: str, split_date: str) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Split the data into training and out-of-time datasets.

    Args:
        data (pd.DataFrame): The complete dataset.
        start_date (str): Start date for the training data.
        split_date (str): Date to split the training and out-of-time data.

    Returns:
        Tuple[pd.DataFrame, pd.DataFrame]: Training data and out-of-time data.
    """
    df_train = data[(data["ds"] >= start_date) & (data["ds"] < split_date)]
    df_oot = data[data["ds"] >= split_date]
    return df_train, df_oot

In [8]:
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from load_dotenv import load_dotenv

import joblib

# Initialize logger
logger = logging.logger

# Load environment variables
load_dotenv()

LEVELS = config.LEVELS
RANDOM_STATE = 0
N_ESTIMATORS = 100

# Load environment variables
MODEL_START_DATE = os.environ["MODEL_START_DATE"]
MODEL_SPLIT_DATE = '2024-08-13'

print('Dates:\n')
print(f'MODEL_START_DATE: {MODEL_START_DATE}')
print(f'MODEL_SPLIT_DATE: {MODEL_SPLIT_DATE}')

data = load_gold_data()
df_train, df_oot = split_data(data, MODEL_START_DATE, MODEL_SPLIT_DATE)
models = [
        CatBoostRegressor(random_state=RANDOM_STATE, n_estimators=N_ESTIMATORS),
        LGBMRegressor(random_state=RANDOM_STATE, n_estimators=N_ESTIMATORS),
        XGBRegressor(random_state=RANDOM_STATE, n_estimators=N_ESTIMATORS),
        RandomForestRegressor(random_state=RANDOM_STATE, n_estimators=N_ESTIMATORS)
    ]

def create_model(data, models):
    # Train the models
    model_path = "../model_folder/local_model.joblib"
    model = train_model(data, models)
    joblib.dump(model, model_path)

    return model_path

model_path = create_model(df_train, models)

[32m2024-08-27 11:38:31.285[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_gold_data[0m:[36m24[0m - [1mFinding the latest file in data/energy_consumption/gold/[0m
[32m2024-08-27 11:38:31.286[0m | [1mINFO    [0m | [36m__main__[0m:[36mload_gold_data[0m:[36m31[0m - [1mLoading Gold data from data/energy_consumption/gold/aggregated_data_20240825.parquet[0m


Dates:

MODEL_START_DATE: 2021-02-01
MODEL_SPLIT_DATE: 2024-08-13


[32m2024-08-27 11:38:37.175[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_model[0m:[36m94[0m - [1mTraining model[0m


Learning rate set to 0.275978
0:	learn: 70774.6686066	total: 49.1ms	remaining: 4.86s
1:	learn: 58068.3323165	total: 50ms	remaining: 2.45s
2:	learn: 50160.6278422	total: 51.3ms	remaining: 1.66s
3:	learn: 43511.3050968	total: 52.1ms	remaining: 1.25s
4:	learn: 38833.1035645	total: 52.9ms	remaining: 1s
5:	learn: 35419.0805799	total: 54.7ms	remaining: 857ms
6:	learn: 33237.4263180	total: 55.9ms	remaining: 743ms
7:	learn: 31375.5576179	total: 57.8ms	remaining: 665ms
8:	learn: 29826.4886356	total: 58.8ms	remaining: 595ms
9:	learn: 28855.5233298	total: 59.8ms	remaining: 538ms
10:	learn: 27963.6056265	total: 62.5ms	remaining: 505ms
11:	learn: 27308.4279913	total: 63.4ms	remaining: 465ms
12:	learn: 26763.3891598	total: 64.8ms	remaining: 434ms
13:	learn: 26327.5390211	total: 66.4ms	remaining: 408ms
14:	learn: 26027.7827140	total: 67.4ms	remaining: 382ms
15:	learn: 25626.6781128	total: 68.8ms	remaining: 361ms
16:	learn: 25290.1811191	total: 70.2ms	remaining: 343ms
17:	learn: 24954.8842590	total: 7

In [9]:
# load the model
model = joblib.load(model_path)
model.predict(1,level=config.LEVELS)

Unnamed: 0,unique_id,ds,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,CatBoostRegressor-lo-80,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,0,2024-08-13,859596.398764,850939.848937,839761.3125,851227.58824,806273.884972,810357.323783,815461.622296,825670.219322,...,860285.081135,862060.156169,863835.231203,865610.306236,867385.38127,874372.709295,881360.03732,888347.365345,891841.029358,894635.960568


In [10]:
forecasts = model.predict(60, level=config.LEVELS)
forecasts



Unnamed: 0,unique_id,ds,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,CatBoostRegressor-lo-80,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,0,2024-08-13,859596.398764,850939.848937,839761.3125,851227.58824,806273.884972,810357.323783,815461.622296,825670.219322,...,860285.081135,862060.156169,863835.231203,865610.306236,867385.38127,874372.709295,881360.03732,888347.365345,891841.029358,894635.960568
1,0,2024-08-14,875220.631804,856238.799488,855502.3125,860289.549795,821898.118012,825981.556822,831085.855336,841294.452362,...,869347.04269,871122.117724,872897.192758,874672.267791,876447.342825,883434.67085,890421.998875,897409.3269,900902.990913,903697.922123
2,0,2024-08-15,870283.608108,852988.14601,860784.625,857293.592145,816961.094316,821044.533127,826148.83164,836357.428666,...,866351.08504,868126.160074,869901.235108,871676.310141,873451.385175,880438.7132,887426.041225,894413.36925,897907.033263,900701.964473
3,0,2024-08-16,861137.980859,839947.253419,877438.4375,850462.025335,807815.467067,811898.905878,817003.204391,827211.801418,...,859519.51823,861294.593264,863069.668298,864844.743331,866619.818365,873607.14639,880594.474415,887581.80244,891075.466453,893870.397663
4,0,2024-08-17,764442.585897,773187.398652,807096.375,777417.84734,711120.072106,715203.510916,720307.809429,730516.406456,...,786475.340235,788250.415269,790025.490302,791800.565336,793575.64037,800562.968395,807550.29642,814537.624445,818031.288458,820826.219668
5,0,2024-08-18,671796.906147,689779.036894,712674.625,693787.78856,618474.392356,622557.831166,627662.129679,637870.726706,...,702845.281455,704620.356489,706395.431522,708170.506556,709945.58159,716932.909615,723920.23764,730907.565665,734401.229677,737196.160887
6,0,2024-08-19,784012.475135,795677.922422,816174.6875,810691.769685,730689.961343,734773.400154,739877.698667,750086.295693,...,819749.26258,821524.337614,823299.412647,825074.487681,826849.562715,833836.89074,840824.218765,847811.54679,851305.210803,854100.142013
7,0,2024-08-20,820548.768365,837593.494161,862734.125,859579.515265,767226.254573,771309.693384,776413.991897,786622.588923,...,868637.00816,870412.083194,872187.158227,873962.233261,875737.308295,882724.63632,889711.964345,896699.29237,900192.956383,902987.887593
8,0,2024-08-21,845948.581725,857932.816201,863036.1875,871205.35526,792626.067933,796709.506744,801813.805257,812022.402283,...,880262.848155,882037.923189,883812.998223,885588.073256,887363.14829,894350.476315,901337.80434,908325.132365,911818.796378,914613.727588
9,0,2024-08-22,861441.177374,866629.548279,862055.0,875694.910505,808118.663583,812202.102393,817306.400906,827514.997933,...,884752.4034,886527.478434,888302.553467,890077.628501,891852.703535,898840.03156,905827.359585,912814.68761,916308.351623,919103.282833


In [11]:
existing_predictions.groupby('ds')['CatBoostRegressor'].apply(lambda x: pd.isna(x).sum()).plot()

In [12]:
merged = existing_predictions.merge(forecasts, on=['ds', 'unique_id'], how='left', suffixes=('', '_forecast'))

for col in forecasts.columns:
    if col in ['ds', 'unique_id']:
        continue
    merged[col] = merged[col].fillna(merged[col + '_forecast'])

In [13]:
merged.groupby('ds')['CatBoostRegressor'].apply(lambda x: pd.isna(x).sum()).plot()

In [14]:
backfilled_preds = merged.drop([col + '_forecast' for col in forecasts.columns if col not in ['unique_id', 'ds']], axis=1)
backfilled_preds

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,2024-06-15,816371.131,0,,,,,,,,...,,,,,,,,,,
1,2024-06-16,727934.964,0,,,,,,,,...,,,,,,,,,,
2,2024-06-17,851129.980,0,,,,,,,,...,,,,,,,,,,
3,2024-06-18,886222.722,0,,,,,,,,...,,,,,,,,,,
4,2024-06-19,890934.761,0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,2024-10-19,,0,813300.714372,768951.685558,739190.2500,820605.372455,772123.129832,773561.838203,775360.223667,...,827629.464785,828447.874701,829266.284618,830084.694534,830903.104450,835514.021450,840124.938450,844735.855450,847041.313950,848885.680750
127,2024-10-20,,0,740781.227397,678130.031306,655829.0000,726541.013285,699603.642857,701042.351228,702840.736693,...,733565.105615,734383.515531,735201.925448,736020.335364,736838.745280,741449.662280,746060.579280,750671.496280,752976.954780,754821.321580
128,2024-10-21,,0,878809.206535,795610.518417,703844.1875,844205.421525,837631.621995,839070.330366,840868.715831,...,851229.513855,852047.923771,852866.333688,853684.743604,854503.153520,859114.070520,863724.987520,868335.904520,870641.363020,872485.729820
129,2024-10-22,,0,907374.865009,841200.623286,754059.5000,861379.821175,866197.280468,867635.988840,869434.374304,...,868403.913505,869222.323421,870040.733338,870859.143254,871677.553170,876288.470170,880899.387170,885510.304170,887815.762670,889660.129470


In [15]:
backfilled_preds.loc[backfilled_preds['ds'].between('2024-08-13', '2024-08-25'), 'y'] = backfilled_preds.loc[backfilled_preds['ds'].between('2024-08-13', '2024-08-25'), 'y'] = np.nan
backfilled_preds = backfilled_preds.drop(backfilled_preds.loc[backfilled_preds['ds'].gt('2024-10-11')].index)
backfilled_preds


Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,2024-06-15,816371.131,0,,,,,,,,...,,,,,,,,,,
1,2024-06-16,727934.964,0,,,,,,,,...,,,,,,,,,,
2,2024-06-17,851129.980,0,,,,,,,,...,,,,,,,,,,
3,2024-06-18,886222.722,0,,,,,,,,...,,,,,,,,,,
4,2024-06-19,890934.761,0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,2024-10-07,,0,870130.807750,801542.865477,805180.0625,842691.828025,828953.223210,830391.931581,832190.317045,...,849715.920355,850534.330271,851352.740188,852171.150104,852989.560020,857600.477020,862211.394020,866822.311020,869127.769520,870972.136320
115,2024-10-08,,0,905251.586985,853410.778033,853097.6250,860778.412295,864074.002444,865512.710816,867311.096280,...,867802.504625,868620.914541,869439.324458,870257.734374,871076.144290,875687.061290,880297.978290,884908.895290,887214.353790,889058.720590
116,2024-10-09,,0,912329.451314,852037.026171,858994.8125,866929.837190,871151.866773,872590.575145,874388.960609,...,873953.929520,874772.339436,875590.749352,876409.159269,877227.569185,881838.486185,886449.403185,891060.320185,893365.778685,895210.145485
117,2024-10-10,,0,905886.661509,847537.209630,845019.8750,865526.354005,864709.076969,866147.785340,867946.170805,...,872550.446335,873368.856251,874187.266168,875005.676084,875824.086000,880435.003000,885045.920000,889656.837000,891962.295500,893806.662300


In [16]:
backfilled_preds.loc[backfilled_preds['ds'].between('2024-08-10', '2024-08-15')]

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
56,2024-08-10,795148.508,0,,,,,,,,...,,,,,,,,,,
57,2024-08-11,698073.596,0,,,,,,,,...,,,,,,,,,,
58,2024-08-12,805498.244,0,,,,,,,,...,,,,,,,,,,
59,2024-08-13,,0,859596.398764,850939.848937,839761.3125,851227.58824,806273.884972,810357.323783,815461.622296,...,860285.081135,862060.156169,863835.231203,865610.306236,867385.38127,874372.709295,881360.03732,888347.365345,891841.029358,894635.960568
60,2024-08-14,,0,875220.631804,856238.799488,855502.3125,860289.549795,821898.118012,825981.556822,831085.855336,...,869347.04269,871122.117724,872897.192758,874672.267791,876447.342825,883434.67085,890421.998875,897409.3269,900902.990913,903697.922123
61,2024-08-15,,0,870283.608108,852988.14601,860784.625,857293.592145,816961.094316,821044.533127,826148.83164,...,866351.08504,868126.160074,869901.235108,871676.310141,873451.385175,880438.7132,887426.041225,894413.36925,897907.033263,900701.964473


In [17]:
backfilled_preds.tail(10)

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
109,2024-10-02,,0,878796.153283,817205.376189,872048.6875,872498.36136,837618.568742,839057.277114,840855.662578,...,879522.45369,880340.863606,881159.273523,881977.683439,882796.093355,887407.010355,892017.927355,896628.844355,898934.302855,900778.669655
110,2024-10-03,,0,878615.606608,836221.525239,873835.625,871369.682955,837438.022067,838876.730439,840675.115903,...,878393.775285,879212.185201,880030.595117,880849.005034,881667.41495,886278.33195,890889.24895,895500.16595,897805.62445,899649.99125
111,2024-10-04,,0,877127.035922,829780.349219,855304.3125,870283.203905,835949.451381,837388.159753,839186.545217,...,877307.296235,878125.706151,878944.116067,879762.525984,880580.9359,885191.8529,889802.7699,894413.6869,896719.1454,898563.5122
112,2024-10-05,,0,821069.450213,759460.127326,774917.5625,811725.73854,779891.865672,781330.574044,783128.959508,...,818749.83087,819568.240786,820386.650702,821205.060619,822023.470535,826634.387535,831245.304535,835856.221535,838161.680035,840006.046835
113,2024-10-06,,0,737291.255673,678094.731698,692929.6875,724457.549545,696113.671132,697552.379504,699350.764968,...,731481.641875,732300.051791,733118.461708,733936.871624,734755.28154,739366.19854,743977.11554,748588.03254,750893.49104,752737.85784
114,2024-10-07,,0,870130.80775,801542.865477,805180.0625,842691.828025,828953.22321,830391.931581,832190.317045,...,849715.920355,850534.330271,851352.740188,852171.150104,852989.56002,857600.47702,862211.39402,866822.31102,869127.76952,870972.13632
115,2024-10-08,,0,905251.586985,853410.778033,853097.625,860778.412295,864074.002444,865512.710816,867311.09628,...,867802.504625,868620.914541,869439.324458,870257.734374,871076.14429,875687.06129,880297.97829,884908.89529,887214.35379,889058.72059
116,2024-10-09,,0,912329.451314,852037.026171,858994.8125,866929.83719,871151.866773,872590.575145,874388.960609,...,873953.92952,874772.339436,875590.749352,876409.159269,877227.569185,881838.486185,886449.403185,891060.320185,893365.778685,895210.145485
117,2024-10-10,,0,905886.661509,847537.20963,845019.875,865526.354005,864709.076969,866147.78534,867946.170805,...,872550.446335,873368.856251,874187.266168,875005.676084,875824.086,880435.003,885045.92,889656.837,891962.2955,893806.6623
118,2024-10-11,,0,890941.541102,845229.141118,808483.5,863440.15007,849763.956561,851202.664933,853001.050397,...,870464.2424,871282.652316,872101.062232,872919.472149,873737.882065,878348.799065,882959.716065,887570.633065,889876.091565,891720.458365


In [18]:
# Update the predictions
updated_predictions = update_predictions(df_hist, backfilled_preds)
updated_predictions

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,2024-06-15,816371.131,0,,,,,,,,...,,,,,,,,,,
1,2024-06-16,727934.964,0,,,,,,,,...,,,,,,,,,,
2,2024-06-17,851129.980,0,,,,,,,,...,,,,,,,,,,
3,2024-06-18,886222.722,0,,,,,,,,...,,,,,,,,,,
4,2024-06-19,890934.761,0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,2024-10-07,,0,870130.807750,801542.865477,805180.0625,842691.828025,828953.223210,830391.931581,832190.317045,...,849715.920355,850534.330271,851352.740188,852171.150104,852989.560020,857600.477020,862211.394020,866822.311020,869127.769520,870972.136320
115,2024-10-08,,0,905251.586985,853410.778033,853097.6250,860778.412295,864074.002444,865512.710816,867311.096280,...,867802.504625,868620.914541,869439.324458,870257.734374,871076.144290,875687.061290,880297.978290,884908.895290,887214.353790,889058.720590
116,2024-10-09,,0,912329.451314,852037.026171,858994.8125,866929.837190,871151.866773,872590.575145,874388.960609,...,873953.929520,874772.339436,875590.749352,876409.159269,877227.569185,881838.486185,886449.403185,891060.320185,893365.778685,895210.145485
117,2024-10-10,,0,905886.661509,847537.209630,845019.8750,865526.354005,864709.076969,866147.785340,867946.170805,...,872550.446335,873368.856251,874187.266168,875005.676084,875824.086000,880435.003000,885045.920000,889656.837000,891962.295500,893806.662300


In [19]:
updated_predictions.loc[backfilled_preds['ds'].between('2024-08-10', '2024-08-30')]

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
56,2024-08-10,795148.508,0,,,,,,,,...,,,,,,,,,,
57,2024-08-11,698073.596,0,,,,,,,,...,,,,,,,,,,
58,2024-08-12,805498.244,0,,,,,,,,...,,,,,,,,,,
59,2024-08-13,844564.07,0,859596.398764,850939.848937,839761.3125,851227.58824,806273.884972,810357.323783,815461.622296,...,860285.081135,862060.156169,863835.231203,865610.306236,867385.38127,874372.709295,881360.03732,888347.365345,891841.029358,894635.960568
60,2024-08-14,856677.381,0,875220.631804,856238.799488,855502.3125,860289.549795,821898.118012,825981.556822,831085.855336,...,869347.04269,871122.117724,872897.192758,874672.267791,876447.342825,883434.67085,890421.998875,897409.3269,900902.990913,903697.922123
61,2024-08-15,870365.0395,0,870283.608108,852988.14601,860784.625,857293.592145,816961.094316,821044.533127,826148.83164,...,866351.08504,868126.160074,869901.235108,871676.310141,873451.385175,880438.7132,887426.041225,894413.36925,897907.033263,900701.964473
62,2024-08-16,895989.212,0,861137.980859,839947.253419,877438.4375,850462.025335,807815.467067,811898.905878,817003.204391,...,859519.51823,861294.593264,863069.668298,864844.743331,866619.818365,873607.14639,880594.474415,887581.80244,891075.466453,893870.397663
63,2024-08-17,832527.095,0,764442.585897,773187.398652,807096.375,777417.84734,711120.072106,715203.510916,720307.809429,...,786475.340235,788250.415269,790025.490302,791800.565336,793575.64037,800562.968395,807550.29642,814537.624445,818031.288458,820826.219668
64,2024-08-18,744501.777,0,671796.906147,689779.036894,712674.625,693787.78856,618474.392356,622557.831166,627662.129679,...,702845.281455,704620.356489,706395.431522,708170.506556,709945.58159,716932.909615,723920.23764,730907.565665,734401.229677,737196.160887
65,2024-08-19,880713.222,0,784012.475135,795677.922422,816174.6875,810691.769685,730689.961343,734773.400154,739877.698667,...,819749.26258,821524.337614,823299.412647,825074.487681,826849.562715,833836.89074,840824.218765,847811.54679,851305.210803,854100.142013


In [20]:
data = updated_predictions[['ds', 'y', 'unique_id']].dropna()
data

Unnamed: 0,ds,y,unique_id
0,2024-06-15,816371.1310,0
1,2024-06-16,727934.9640,0
2,2024-06-17,851129.9800,0
3,2024-06-18,886222.7220,0
4,2024-06-19,890934.7610,0
...,...,...,...
66,2024-08-20,927637.6410,0
67,2024-08-21,931550.5060,0
68,2024-08-22,929876.4890,0
69,2024-08-23,934259.6355,0


In [21]:
model.update(data)

In [22]:
# Make predictions
forecast_df = model.predict(h=60, level=LEVELS)
forecast_df.head()


Prediction intervals are calculated using 1-step ahead cross-validation, with a constant width for all horizons. To vary the error by horizon, pass PredictionIntervals(h=h) to the `prediction_intervals` argument when refitting the model.



Unnamed: 0,unique_id,ds,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,CatBoostRegressor-lo-80,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,0,2024-08-25,756643.400015,755860.569434,754300.9375,761426.75108,703320.886224,707404.325034,712508.623547,722717.220574,...,770484.243975,772259.319009,774034.394042,775809.469076,777584.54411,784571.872135,791559.20016,798546.528185,802040.192198,804835.123408
1,0,2024-08-26,893786.571687,891934.345361,905230.375,879332.662615,840464.057895,844547.496706,849651.795219,859860.392245,...,888390.15551,890165.230544,891940.305578,893715.380611,895490.455645,902477.78367,909465.111695,916452.43972,919946.103733,922741.034943
2,0,2024-08-27,901077.563952,920073.677448,943490.125,896957.4149,847755.050161,851838.488971,856942.787484,867151.384511,...,906014.907795,907789.982829,909565.057862,911340.132896,913115.20793,920102.535955,927089.86398,934077.192005,937570.856018,940365.787228
3,0,2024-08-28,903778.424491,908241.067421,925722.0,906927.04114,850455.910699,854539.34951,859643.648023,869852.245049,...,915984.534035,917759.609069,919534.684102,921309.759136,923084.83417,930072.162195,937059.49022,944046.818245,947540.482258,950335.413468
4,0,2024-08-29,912555.330091,879506.440335,889839.1875,917171.05462,859232.816299,863316.25511,868420.553623,878629.150649,...,926228.547515,928003.622549,929778.697583,931553.772616,933328.84765,940316.175675,947303.5037,954290.831725,957784.495738,960579.426948


In [23]:
# Merge the predictions with the original data
forecast_df = pd.concat([updated_predictions, forecast_df], axis=0).drop_duplicates(subset=['ds'], keep='last')
forecast_df

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
0,2024-06-15,816371.131,0,,,,,,,,...,,,,,,,,,,
1,2024-06-16,727934.964,0,,,,,,,,...,,,,,,,,,,
2,2024-06-17,851129.980,0,,,,,,,,...,,,,,,,,,,
3,2024-06-18,886222.722,0,,,,,,,,...,,,,,,,,,,
4,2024-06-19,890934.761,0,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55,2024-10-19,,0,754175.132901,751651.980316,766546.3125,811006.961265,700852.619109,704936.057919,710040.356433,...,820064.454160,821839.529194,823614.604227,825389.679261,827164.754295,834152.082320,841139.410345,848126.738370,851620.402383,854415.333593
56,2024-10-20,,0,670630.834602,655836.289006,676325.9375,723512.885705,617308.320810,621391.759621,626496.058134,...,732570.378600,734345.453634,736120.528668,737895.603701,739670.678735,746658.006760,753645.334785,760632.662810,764126.326823,766921.258033
57,2024-10-21,,0,789091.363382,755816.895602,791475.5000,853087.918280,735768.849590,739852.288401,744956.586914,...,862145.411175,863920.486209,865695.561243,867470.636276,869245.711310,876233.039335,883220.367360,890207.695385,893701.359398,896496.290608
58,2024-10-22,,0,831133.281960,807678.145166,819193.2500,887860.236550,777810.768168,781894.206979,786998.505492,...,896917.729445,898692.804479,900467.879512,902242.954546,904018.029580,911005.357605,917992.685630,924980.013655,928473.677667,931268.608877


In [24]:
forecast_df.loc[forecast_df['ds'].between('2024-08-10', '2024-08-25')]

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
56,2024-08-10,795148.508,0,,,,,,,,...,,,,,,,,,,
57,2024-08-11,698073.596,0,,,,,,,,...,,,,,,,,,,
58,2024-08-12,805498.244,0,,,,,,,,...,,,,,,,,,,
59,2024-08-13,844564.07,0,859596.398764,850939.848937,839761.3125,851227.58824,806273.884972,810357.323783,815461.622296,...,860285.081135,862060.156169,863835.231203,865610.306236,867385.38127,874372.709295,881360.03732,888347.365345,891841.029358,894635.960568
60,2024-08-14,856677.381,0,875220.631804,856238.799488,855502.3125,860289.549795,821898.118012,825981.556822,831085.855336,...,869347.04269,871122.117724,872897.192758,874672.267791,876447.342825,883434.67085,890421.998875,897409.3269,900902.990913,903697.922123
61,2024-08-15,870365.0395,0,870283.608108,852988.14601,860784.625,857293.592145,816961.094316,821044.533127,826148.83164,...,866351.08504,868126.160074,869901.235108,871676.310141,873451.385175,880438.7132,887426.041225,894413.36925,897907.033263,900701.964473
62,2024-08-16,895989.212,0,861137.980859,839947.253419,877438.4375,850462.025335,807815.467067,811898.905878,817003.204391,...,859519.51823,861294.593264,863069.668298,864844.743331,866619.818365,873607.14639,880594.474415,887581.80244,891075.466453,893870.397663
63,2024-08-17,832527.095,0,764442.585897,773187.398652,807096.375,777417.84734,711120.072106,715203.510916,720307.809429,...,786475.340235,788250.415269,790025.490302,791800.565336,793575.64037,800562.968395,807550.29642,814537.624445,818031.288458,820826.219668
64,2024-08-18,744501.777,0,671796.906147,689779.036894,712674.625,693787.78856,618474.392356,622557.831166,627662.129679,...,702845.281455,704620.356489,706395.431522,708170.506556,709945.58159,716932.909615,723920.23764,730907.565665,734401.229677,737196.160887
65,2024-08-19,880713.222,0,784012.475135,795677.922422,816174.6875,810691.769685,730689.961343,734773.400154,739877.698667,...,819749.26258,821524.337614,823299.412647,825074.487681,826849.562715,833836.89074,840824.218765,847811.54679,851305.210803,854100.142013


In [27]:
forecast_df.dropna(subset=['y']).tail(10)

Unnamed: 0,ds,y,unique_id,CatBoostRegressor,LGBMRegressor,XGBRegressor,RandomForestRegressor,CatBoostRegressor-lo-99,CatBoostRegressor-lo-95,CatBoostRegressor-lo-90,...,RandomForestRegressor-hi-20,RandomForestRegressor-hi-30,RandomForestRegressor-hi-40,RandomForestRegressor-hi-50,RandomForestRegressor-hi-60,RandomForestRegressor-hi-70,RandomForestRegressor-hi-80,RandomForestRegressor-hi-90,RandomForestRegressor-hi-95,RandomForestRegressor-hi-99
61,2024-08-15,870365.0395,0,870283.608108,852988.14601,860784.625,857293.592145,816961.094316,821044.533127,826148.83164,...,866351.08504,868126.160074,869901.235108,871676.310141,873451.385175,880438.7132,887426.041225,894413.36925,897907.033263,900701.964473
62,2024-08-16,895989.212,0,861137.980859,839947.253419,877438.4375,850462.025335,807815.467067,811898.905878,817003.204391,...,859519.51823,861294.593264,863069.668298,864844.743331,866619.818365,873607.14639,880594.474415,887581.80244,891075.466453,893870.397663
63,2024-08-17,832527.095,0,764442.585897,773187.398652,807096.375,777417.84734,711120.072106,715203.510916,720307.809429,...,786475.340235,788250.415269,790025.490302,791800.565336,793575.64037,800562.968395,807550.29642,814537.624445,818031.288458,820826.219668
64,2024-08-18,744501.777,0,671796.906147,689779.036894,712674.625,693787.78856,618474.392356,622557.831166,627662.129679,...,702845.281455,704620.356489,706395.431522,708170.506556,709945.58159,716932.909615,723920.23764,730907.565665,734401.229677,737196.160887
65,2024-08-19,880713.222,0,784012.475135,795677.922422,816174.6875,810691.769685,730689.961343,734773.400154,739877.698667,...,819749.26258,821524.337614,823299.412647,825074.487681,826849.562715,833836.89074,840824.218765,847811.54679,851305.210803,854100.142013
66,2024-08-20,927637.641,0,820548.768365,837593.494161,862734.125,859579.515265,767226.254573,771309.693384,776413.991897,...,868637.00816,870412.083194,872187.158227,873962.233261,875737.308295,882724.63632,889711.964345,896699.29237,900192.956383,902987.887593
67,2024-08-21,931550.506,0,845948.581725,857932.816201,863036.1875,871205.35526,792626.067933,796709.506744,801813.805257,...,880262.848155,882037.923189,883812.998223,885588.073256,887363.14829,894350.476315,901337.80434,908325.132365,911818.796378,914613.727588
68,2024-08-22,929876.489,0,861441.177374,866629.548279,862055.0,875694.910505,808118.663583,812202.102393,817306.400906,...,884752.4034,886527.478434,888302.553467,890077.628501,891852.703535,898840.03156,905827.359585,912814.68761,916308.351623,919103.282833
69,2024-08-23,934259.6355,0,879355.668097,877282.736118,872430.8125,871447.7678,826033.154306,830116.593116,835220.891629,...,880505.260695,882280.335729,884055.410763,885830.485796,887605.56083,894592.888855,901580.21688,908567.544905,912061.208918,914856.140128
70,2024-08-24,855712.3575,0,800287.659443,809168.435764,808452.6875,791932.23425,746965.145652,751048.584462,756152.882975,...,800989.727145,802764.802179,804539.877213,806314.952246,808090.02728,815077.355305,822064.68333,829052.011355,832545.675368,835340.606578


In [25]:
import tempfile

def save_predictions(df: pd.DataFrame, date: str) -> None:
    """Saves predictions to Azure Blob Storage."""
    logger.info("Saving predictions to Gold layer")
    predictions_blob_path = f"{config.CONTAINER_NAME}/{config.FOLDER}/predictions/predictions_{date}.parquet"

    abfs = get_azure_blob_fs()
    with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as tmp_file:
        local_predictions_path = tmp_file.name
        df.to_parquet(local_predictions_path, index=False)

    abfs.put(local_predictions_path, predictions_blob_path)

    logger.info(f"Predictions saved to Gold layer: abfs://{predictions_blob_path}")


In [28]:
# Save updated predictions
process_date = pendulum.date(2024, 8, 25).to_date_string().replace("-", "")
# save_predictions(forecast_df, process_date)

[32m2024-08-27 11:42:38.201[0m | [1mINFO    [0m | [36m__main__[0m:[36msave_predictions[0m:[36m5[0m - [1mSaving predictions to Gold layer[0m
[32m2024-08-27 11:42:39.672[0m | [1mINFO    [0m | [36m__main__[0m:[36msave_predictions[0m:[36m15[0m - [1mPredictions saved to Gold layer: abfs://data/energy_consumption/predictions/predictions_20240825.parquet[0m


# Data quality