In [1]:
import pandas as pd
import polars as pl
from statsmodels.datasets import macrodata
from temporalscope.core.temporal_data_loader import TimeFrame as tf
from temporalscope.partition.sliding_window_partioner import (
    SlidingWindowPartitioner as SWP,
)
from temporalscope.core.utils import print_divider


def load_macrodata():
    """Loads and preprocesses the macrodata dataset with a combined 'ds' column for time."""
    macro_df = macrodata.load_pandas().data

    # Create 'ds' column by combining 'year' and 'quarter'
    macro_df["ds"] = pd.to_datetime(
        macro_df["year"].astype(int).astype(str)
        + "-"
        + ((macro_df["quarter"] - 1) * 3 + 1).astype(int).astype(str)
        + "-01"
    )

    # Drop the 'year' and 'quarter' columns
    macro_df.drop(columns=["year", "quarter"], inplace=True)

    # Reorder columns to place 'ds' first
    cols = ["ds"] + [col for col in macro_df.columns if col != "ds"]
    macro_df = macro_df[cols]

    return macro_df


if __name__ == "__main__":
    # Load the macrodata dataset and preprocess
    macro_df = load_macrodata()

    # Convert the macrodata DataFrame to a Polars DataFrame
    macro_polars_df = pl.DataFrame(macro_df)

    # Initialize TimeFrame with the Polars backend
    macro_polars_tf = tf(
        macro_polars_df, time_col="ds", target_col="realgdp", backend="polars"
    )

    # Preview the Polars DataFrame
    print_divider()
    print("Preview of the Polars DataFrame (macrodata):")
    print(macro_polars_tf.get_data().head().to_dict(as_series=False))
    print_divider()

    # Apply the sliding window partitioning
    window_size = 10  # Example window size
    stride = 5  # Example stride
    partitioner = SWP(
        data=macro_polars_tf.get_data(),
        time_col="ds",
        window_size=window_size,
        stride=stride,
    )

    # Apply the partitioner to the TimeFrame
    macro_polars_tf.apply_partitioning(partitioner)

    # Get the partitioned data
    partitioned_data = macro_polars_tf.get_partitioned_data()

    # Verify the partitions
    print_divider()
    print(f"Number of partitions: {len(partitioned_data)}")
    print("Preview of the first partition:")
    print(partitioned_data[0].to_dict(as_series=False))
    print_divider()

    # Iterate through the partitions and print a summary
    for i, partition in enumerate(partitioned_data):
        print(f"Partition {i + 1}:")
        print(partition.head().to_dict(as_series=False))
        print_divider()

Preview of the Polars DataFrame (macrodata):
{'ds': [datetime.datetime(1959, 1, 1, 0, 0), datetime.datetime(1959, 4, 1, 0, 0), datetime.datetime(1959, 7, 1, 0, 0), datetime.datetime(1959, 10, 1, 0, 0), datetime.datetime(1960, 1, 1, 0, 0)], 'realgdp': [2710.349, 2778.801, 2775.488, 2785.204, 2847.699], 'realcons': [1707.4, 1733.7, 1751.8, 1753.7, 1770.5], 'realinv': [286.898, 310.859, 289.226, 299.356, 331.722], 'realgovt': [470.045, 481.301, 491.26, 484.052, 462.199], 'realdpi': [1886.9, 1919.7, 1916.4, 1931.3, 1955.5], 'cpi': [28.98, 29.15, 29.35, 29.37, 29.54], 'm1': [139.7, 141.7, 140.5, 140.0, 139.6], 'tbilrate': [2.82, 3.08, 3.82, 4.33, 3.5], 'unemp': [5.8, 5.1, 5.3, 5.6, 5.2], 'pop': [177.146, 177.83, 178.657, 179.386, 180.007], 'infl': [0.0, 2.34, 2.74, 0.27, 2.31], 'realint': [0.0, 0.74, 1.09, 4.06, 1.19]}
Number of partitions: 39
Preview of the first partition:
{'ds': [datetime.datetime(1959, 1, 1, 0, 0), datetime.datetime(1959, 4, 1, 0, 0), datetime.datetime(1959, 7, 1, 0, 0)

In [2]:
partitioned_data[30]

ds,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
datetime[ns],f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1996-07-01 00:00:00,9488.879,6314.6,1422.059,692.741,6908.9,158.2,1086.1,5.04,5.3,270.581,3.05,2.0
1996-10-01 00:00:00,9592.458,6366.1,1418.193,690.744,6946.8,159.4,1081.5,4.99,5.3,271.36,3.02,1.97
1997-01-01 00:00:00,9666.235,6430.2,1451.304,681.445,7008.9,159.9,1063.8,5.1,5.2,272.083,1.25,3.85
1997-04-01 00:00:00,9809.551,6456.2,1543.976,693.525,7061.5,160.4,1066.2,5.01,5.0,272.912,1.25,3.76
1997-07-01 00:00:00,9932.672,6566.0,1571.426,691.261,7142.4,161.5,1065.5,5.02,4.9,273.852,2.73,2.29
1997-10-01 00:00:00,10008.874,6641.1,1596.523,690.311,7241.5,162.0,1074.4,5.11,4.7,274.626,1.24,3.88
1998-01-01 00:00:00,10103.425,6707.2,1672.732,668.783,7406.2,162.2,1076.1,5.02,4.6,275.304,0.49,4.53
1998-04-01 00:00:00,10194.277,6822.6,1652.716,687.184,7512.0,163.2,1075.0,4.98,4.4,276.115,2.46,2.52
1998-07-01 00:00:00,10328.787,6913.1,1700.071,681.472,7591.0,163.9,1086.0,4.49,4.5,277.003,1.71,2.78
1998-10-01 00:00:00,10507.575,7019.1,1754.743,688.147,7646.5,164.7,1097.8,4.38,4.4,277.79,1.95,2.43
