In [3]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

import numpy as np
import pandas as pd
import skforecast

print(skforecast.__version__)

/home/joaquin/Documents/GitHub/skforecast
0.19.0


In [37]:
def reshape_series_exog_dict_to_long(series: dict | None, exog: dict | None) -> pd.DataFrame:
    """
    Convert dictionaries of series and exogenous variables to a long format
    pandas DataFrame with MultiIndex. The first level of the MultiIndex contains the
    series identifiers, and the second level contains the temporal index.

    Parameters
    ----------
    series: dict | None
        Dictionary with multiple time series (expected: dict[str, pd.Series]).
    exog: dict | None
        Dictionary with exogenous variables (expected: dict[str, pd.Series or pd.DataFrame]).
    Returns
    -------
    pd.DataFrame
        Long format DataFrame with MultiIndex.
    """
    if series is None and exog is None:
        raise ValueError("Both series and exog cannot be None.")

    if series is not None:
        for k, v in series.items():
            if not isinstance(v, (pd.Series, pd.DataFrame)):
                raise TypeError(f"series['{k}'] must be pd.Series or pd.DataFrame.")
        series = pd.concat(series.values(), keys=series.keys()).to_frame()
        series.index.names = ['series_id', 'datetime']
        series.columns = ['series_value']

    if exog is not None:
        for k, v in exog.items():
            if not isinstance(v, (pd.Series, pd.DataFrame)):
                raise TypeError(f"exog['{k}'] must be pd.Series or pd.DataFrame.")
        exog = pd.concat(exog.values(), keys=exog.keys())
        if isinstance(exog, pd.Series):
            exog = exog.to_frame()
        exog.index.names = ['series_id', 'datetime']

    if series is None:
        results = exog
    elif exog is None:
        results = series
    else:
        results = pd.merge(
                    series,
                    exog,
                    left_index=True,
                    right_index=True,
                    how='outer'
                )

    return results

In [39]:
y = pd.Series(
   data= np.arange(12),
   index= pd.date_range(start='2020-01-01', periods=12, freq='D')
)

exog = pd.DataFrame(
   data= {
       'exog_1': np.arange(10, 20),
       'exog_2': np.arange(20, 30)
   },
   index= pd.date_range(start='2020-01-01', periods=10, freq='D')
)

series_dict = {'series_1': y,
'series_2': y + 10,
'series_3': y + 20}

exog_dict = {'series_1': exog,
'series_2': exog + 10,
'series_3': exog + 20,
'series_4': exog + 30}

series_df = reshape_series_exog_dict_to_long(series=series_dict, exog=exog_dict)
series_df

Unnamed: 0_level_0,Unnamed: 1_level_0,series_value,exog_1,exog_2
series_id,datetime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
series_1,2020-01-01,0.0,10.0,20.0
series_1,2020-01-02,1.0,11.0,21.0
series_1,2020-01-03,2.0,12.0,22.0
series_1,2020-01-04,3.0,13.0,23.0
series_1,2020-01-05,4.0,14.0,24.0
series_1,2020-01-06,5.0,15.0,25.0
series_1,2020-01-07,6.0,16.0,26.0
series_1,2020-01-08,7.0,17.0,27.0
series_1,2020-01-09,8.0,18.0,28.0
series_1,2020-01-10,9.0,19.0,29.0
