# Temporal Scope Tutorial

## Loading and Testing Macro Data Across Multiple Backends

This tutorial demonstrates how to load macroeconomic data and test it across **Pandas**, **Polars**, and **Modin** backends using TemporalScope’s `TimeFrame`.

- **Different Backends**:
  - **Pandas**: Ideal for small-to-medium datasets with seamless integration into the Python ecosystem.
  - **Polars**: Optimized for speed, suitable for handling large datasets using multi-threading.
  - **Modin**: Scales Pandas operations across multiple cores, ideal for large datasets.
- **TimeFrame**:
  - Consistent handling of temporal data across all backends.
  - Facilitates switching between backends based on dataset size and performance needs.
- **Strict Assumptions for Default Models**:
  - **One-step ahead forecasting**: Requires shifting the target variable for future prediction.
  - **Predictive performance**: Requires proper data cleaning and preparation before using TemporalScope partitioning or SHAP-based temporal analysis pipelines.


In [None]:
import modin.pandas as mpd
import pandas as pd
import polars as pl
from statsmodels.datasets import macrodata

from temporalscope.core.core_utils import print_divider
from temporalscope.core.temporal_data_loader import TimeFrame as tf


def load_macrodata(target_col: str = "realgdp"):
    """Preprocess the dataset with a combined column for time & shifted target.

    :param target_col: The column to be used as the target for prediction
    :type target_col: str, optional
    :default target_col: 'realgdp'

    :return: Preprocessed DataFrame with shifted target
    :rtype: pd.DataFrame
    """
    print_divider()
    print("Loading the 'macrodata' dataset from the open-license statsmodels package.")
    print(f"Using '{target_col}' as the target column for future prediction.")
    print_divider()

    # Load macrodata dataset
    macro_df = macrodata.load_pandas().data.copy()

    # Create 'ds' column by combining 'year' and 'quarter'
    macro_df["ds"] = pd.to_datetime(
        macro_df["year"].astype(int).astype(str)
        + "-"
        + ((macro_df["quarter"] - 1) * 3 + 1).astype(int).astype(str)
        + "-01"
    )

    # Drop the 'year' and 'quarter' columns
    macro_df = macro_df.drop(columns=["year", "quarter"])

    # Reorder columns to place 'ds' first
    cols = ["ds"] + [col for col in macro_df.columns if col != "ds"]
    macro_df = macro_df[cols].copy()

    # Shift the target column for future prediction and rename it
    shifted_target_col = f"target_{target_col}"
    macro_df[shifted_target_col] = macro_df[target_col].shift(-1)

    # Drop any rows with NaN (due to shifting)
    macro_df = macro_df.dropna().copy()

    # Print the shape of the DataFrame
    print(f"Loaded DataFrame shape: {macro_df.shape}")

    print_divider()
    print(
        f"""Shifted '{target_col}' to create a new target column '{shifted_target_col}'
        for future prediction."""
    )
    print_divider()

    return macro_df, shifted_target_col


def init_timeframes_for_backends(macro_df, target_col: str):
    """Initialize TimeFrame objects for multiple backends (Pandas, Polars, Modin).

    :param macro_df: Preprocessed macro dataset.
    :type macro_df: pd.DataFrame
    :param target_col: The target column for prediction.
    :type target_col: str
    :return: A dictionary containing TimeFrame objects for Pandas, Polars, and Modin.
    :rtype: dict
    """
    # Pandas backend
    macro_pandas_df = pd.DataFrame(macro_df)
    macro_pandas_tf = tf(
        macro_pandas_df, time_col="ds", target_col=target_col, backend="pd"
    )

    # Polars backend
    macro_polars_df = pl.DataFrame(macro_df)
    macro_polars_tf = tf(
        macro_polars_df, time_col="ds", target_col=target_col, backend="pl"
    )

    # Modin backend
    macro_modin_df = mpd.DataFrame(macro_df)
    macro_modin_tf = tf(
        macro_modin_df, time_col="ds", target_col=target_col, backend="mpd"
    )

    return {
        "pandas": macro_pandas_tf,
        "polars": macro_polars_tf,
        "modin": macro_modin_tf,
    }


if __name__ == "__main__":
    # Load the macrodata dataset and preprocess
    macro_df, shifted_target_col = load_macrodata()

    # Init TimeFram passing the correct target column explicitly
    timeframes = init_timeframes_for_backends(macro_df, target_col="target_realgdp")

    # We will only demonstrate detailed output for Modin
    print_divider()
    print("Using Modin backend:")
    macro_modin_tf = timeframes["modin"]

    # Assert that the backend is Modin
    assert macro_modin_tf.backend == "mpd", "Backend is not Modin!"

    print("Preview of the Modin DataFrame (macrodata):")
    print(macro_modin_tf.get_data().head())
    print_divider()

    # Print object's attributes (metadata)
    print("Metadata for Modin TimeFrame object:")
    print(macro_modin_tf.__dict__)
    print_divider()

In [None]:
macro_modin_tf.get_data()

In [None]:
macro_modin_tf.__dict__