In [1]:
import polars as pl
import pandas as pd
from statsmodels.datasets import macrodata
from temporalscope.core.temporal_data_loader import TimeFrame as tf
from temporalscope.core.utils import print_divider

def load_macrodata():
    """Loads and preprocesses the macrodata dataset with a combined 'ds' column for time."""
    macro_df = macrodata.load_pandas().data
    
    # Create 'ds' column by combining 'year' and 'quarter'
    macro_df['ds'] = pd.to_datetime(macro_df['year'].astype(int).astype(str) + '-' + 
                                    ((macro_df['quarter'] - 1) * 3 + 1).astype(int).astype(str) + '-01')
    
    # Drop the 'year' and 'quarter' columns
    macro_df.drop(columns=['year', 'quarter'], inplace=True)
    
    # Reorder columns to place 'ds' first
    cols = ['ds'] + [col for col in macro_df.columns if col != 'ds']
    macro_df = macro_df[cols]
    
    return macro_df

def initialize_time_frame(df, backend, time_col='ds'):
    """Initializes and verifies the TimeFrame with the specified backend."""
    macro_tf = tf(df, time_col=time_col, target_col="realgdp", backend=backend)
    assert macro_tf.backend == backend
    assert macro_tf.time_col == time_col
    assert macro_tf.target_col == "realgdp"
    print(f"{backend.capitalize()} backend initialized successfully with the macrodata dataset.")
    return macro_tf

if __name__ == "__main__":
    # Load the macrodata dataset and preprocess
    macro_df = load_macrodata()

    # Initialize TimeFrame with the Polars backend
    macro_df_polars = pl.DataFrame(macro_df)
    macro_pl_tf = initialize_time_frame(macro_df_polars, backend="polars")

    # Preview the Polars DataFrame
    print_divider()
    print("Preview of the Polars DataFrame (macrodata):")
    print(macro_pl_tf.get_data().head().to_dict(as_series=False))
    print_divider()

    # Initialize TimeFrame with the Pandas backend
    macro_pd_tf = initialize_time_frame(macro_df, backend="pandas")


Polars backend initialized successfully with the macrodata dataset.
Preview of the Polars DataFrame (macrodata):
{'ds': [datetime.datetime(1959, 1, 1, 0, 0), datetime.datetime(1959, 4, 1, 0, 0), datetime.datetime(1959, 7, 1, 0, 0), datetime.datetime(1959, 10, 1, 0, 0), datetime.datetime(1960, 1, 1, 0, 0)], 'realgdp': [2710.349, 2778.801, 2775.488, 2785.204, 2847.699], 'realcons': [1707.4, 1733.7, 1751.8, 1753.7, 1770.5], 'realinv': [286.898, 310.859, 289.226, 299.356, 331.722], 'realgovt': [470.045, 481.301, 491.26, 484.052, 462.199], 'realdpi': [1886.9, 1919.7, 1916.4, 1931.3, 1955.5], 'cpi': [28.98, 29.15, 29.35, 29.37, 29.54], 'm1': [139.7, 141.7, 140.5, 140.0, 139.6], 'tbilrate': [2.82, 3.08, 3.82, 4.33, 3.5], 'unemp': [5.8, 5.1, 5.3, 5.6, 5.2], 'pop': [177.146, 177.83, 178.657, 179.386, 180.007], 'infl': [0.0, 2.34, 2.74, 0.27, 2.31], 'realint': [0.0, 0.74, 1.09, 4.06, 1.19]}
Pandas backend initialized successfully with the macrodata dataset.


In [2]:
macro_pl_tf.get_data()

ds,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
datetime[ns],f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1959-01-01 00:00:00,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1959-04-01 00:00:00,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
1959-07-01 00:00:00,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
1959-10-01 00:00:00,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
1960-01-01 00:00:00,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19
…,…,…,…,…,…,…,…,…,…,…,…,…
2008-07-01 00:00:00,13324.6,9267.7,1990.693,991.551,9838.3,216.889,1474.7,1.17,6.0,305.27,-3.16,4.33
2008-10-01 00:00:00,13141.92,9195.3,1857.661,1007.273,9920.4,212.174,1576.5,0.12,6.9,305.952,-8.79,8.91
2009-01-01 00:00:00,12925.41,9209.2,1558.494,996.287,9926.4,212.671,1592.8,0.22,8.1,306.547,0.94,-0.71
2009-04-01 00:00:00,12901.504,9189.0,1456.678,1023.528,10077.5,214.469,1653.6,0.18,9.2,307.226,3.37,-3.19


In [5]:
# Accessing the instance's attributes as a dictionary
params = macro_pl_tf.__dict__
params

{'_cfg': {'BACKENDS': {'pl': 'polars', 'pd': 'pandas'}},
 '_df': shape: (203, 13)
 ┌─────────────────────┬───────────┬──────────┬──────────┬───┬───────┬─────────┬───────┬─────────┐
 │ ds                  ┆ realgdp   ┆ realcons ┆ realinv  ┆ … ┆ unemp ┆ pop     ┆ infl  ┆ realint │
 │ ---                 ┆ ---       ┆ ---      ┆ ---      ┆   ┆ ---   ┆ ---     ┆ ---   ┆ ---     │
 │ datetime[ns]        ┆ f64       ┆ f64      ┆ f64      ┆   ┆ f64   ┆ f64     ┆ f64   ┆ f64     │
 ╞═════════════════════╪═══════════╪══════════╪══════════╪═══╪═══════╪═════════╪═══════╪═════════╡
 │ 1959-01-01 00:00:00 ┆ 2710.349  ┆ 1707.4   ┆ 286.898  ┆ … ┆ 5.8   ┆ 177.146 ┆ 0.0   ┆ 0.0     │
 │ 1959-04-01 00:00:00 ┆ 2778.801  ┆ 1733.7   ┆ 310.859  ┆ … ┆ 5.1   ┆ 177.83  ┆ 2.34  ┆ 0.74    │
 │ 1959-07-01 00:00:00 ┆ 2775.488  ┆ 1751.8   ┆ 289.226  ┆ … ┆ 5.3   ┆ 178.657 ┆ 2.74  ┆ 1.09    │
 │ 1959-10-01 00:00:00 ┆ 2785.204  ┆ 1753.7   ┆ 299.356  ┆ … ┆ 5.6   ┆ 179.386 ┆ 0.27  ┆ 4.06    │
 │ 1960-01-01 00:00:00 ┆ 28