<div style="text-align: center;">
    <img src="temporalscope_github_banner.svg" alt="TemporalScope" width="400"/>
</div>

# TemporalScope Tutorial: Loading Data into the TimeFrame Object

This tutorial demonstrates how to load and preprocess data into the `TimeFrame` object, supporting various backends like Polars, Pandas, and Modin.

1. **Load Data**: We begin by loading the macroeconomic dataset.
2. **Initialize TimeFrame**: We will initialize the `TimeFrame` object with the desired backend.
3. **Preview Data**: Finally, we preview the data to ensure it's correctly loaded.


In [1]:
import polars as pl
import pandas as pd
import modin.pandas as mpd
from statsmodels.datasets import macrodata
from temporalscope.core.temporal_data_loader import TimeFrame as tf
from temporalscope.core.utils import print_divider


def load_macrodata():
    """Loads and preprocesses the macrodata dataset with a combined 'ds' column for time."""
    macro_df = macrodata.load_pandas().data

    # Create 'ds' column by combining 'year' and 'quarter'
    macro_df["ds"] = pd.to_datetime(
        macro_df["year"].astype(int).astype(str)
        + "-"
        + ((macro_df["quarter"] - 1) * 3 + 1).astype(int).astype(str)
        + "-01"
    )

    # Drop the 'year' and 'quarter' columns
    macro_df.drop(columns=["year", "quarter"], inplace=True)

    # Reorder columns to place 'ds' first
    cols = ["ds"] + [col for col in macro_df.columns if col != "ds"]
    macro_df = macro_df[cols]

    return macro_df


def initialize_time_frame(df, backend, time_col="ds"):
    """Initializes and verifies the TimeFrame with the specified backend."""
    macro_tf = tf(df, time_col=time_col, target_col="realgdp", backend=backend)
    assert macro_tf.backend == backend
    assert macro_tf.time_col == time_col
    assert macro_tf.target_col == "realgdp"
    print(
        f"{backend.capitalize()} backend initialized successfully with the macrodata dataset."
    )
    return macro_tf


if __name__ == "__main__":
    # Load the macrodata dataset and preprocess
    macro_df = load_macrodata()

    # Define backends to test
    backends = ["pl", "pd", "mpd"]

    for backend in backends:
        print_divider()
        print(f"Testing {backend} backend:")

        if backend == "pl":
            df = pl.DataFrame(macro_df)
        elif backend == "pd":
            df = pd.DataFrame(macro_df)
        elif backend == "mpd":
            df = mpd.DataFrame(macro_df)

        # Initialize TimeFrame with the specified backend
        macro_tf = initialize_time_frame(df, backend=backend)

        # Preview the DataFrame
        print_divider()
        print(f"Preview of the {backend} DataFrame (macrodata):")
        if backend == "pl":
            print(macro_tf.get_data().head().to_dict(as_series=False))
        else:
            print(macro_tf.get_data().head().to_dict())
        print_divider()

Testing pl backend:
Pl backend initialized successfully with the macrodata dataset.
Preview of the pl DataFrame (macrodata):
{'ds': [datetime.datetime(1959, 1, 1, 0, 0), datetime.datetime(1959, 4, 1, 0, 0), datetime.datetime(1959, 7, 1, 0, 0), datetime.datetime(1959, 10, 1, 0, 0), datetime.datetime(1960, 1, 1, 0, 0)], 'realgdp': [2710.349, 2778.801, 2775.488, 2785.204, 2847.699], 'realcons': [1707.4, 1733.7, 1751.8, 1753.7, 1770.5], 'realinv': [286.898, 310.859, 289.226, 299.356, 331.722], 'realgovt': [470.045, 481.301, 491.26, 484.052, 462.199], 'realdpi': [1886.9, 1919.7, 1916.4, 1931.3, 1955.5], 'cpi': [28.98, 29.15, 29.35, 29.37, 29.54], 'm1': [139.7, 141.7, 140.5, 140.0, 139.6], 'tbilrate': [2.82, 3.08, 3.82, 4.33, 3.5], 'unemp': [5.8, 5.1, 5.3, 5.6, 5.2], 'pop': [177.146, 177.83, 178.657, 179.386, 180.007], 'infl': [0.0, 2.34, 2.74, 0.27, 2.31], 'realint': [0.0, 0.74, 1.09, 4.06, 1.19]}
Testing pd backend:
Pd backend initialized successfully with the macrodata dataset.
Preview of

2024-09-04 23:22:46,411	INFO worker.py:1783 -- Started a local Ray instance.


Mpd backend initialized successfully with the macrodata dataset.
Preview of the mpd DataFrame (macrodata):
{'ds': {0: Timestamp('1959-01-01 00:00:00'), 1: Timestamp('1959-04-01 00:00:00'), 2: Timestamp('1959-07-01 00:00:00'), 3: Timestamp('1959-10-01 00:00:00'), 4: Timestamp('1960-01-01 00:00:00')}, 'realgdp': {0: 2710.349, 1: 2778.801, 2: 2775.488, 3: 2785.204, 4: 2847.699}, 'realcons': {0: 1707.4, 1: 1733.7, 2: 1751.8, 3: 1753.7, 4: 1770.5}, 'realinv': {0: 286.898, 1: 310.859, 2: 289.226, 3: 299.356, 4: 331.722}, 'realgovt': {0: 470.045, 1: 481.301, 2: 491.26, 3: 484.052, 4: 462.199}, 'realdpi': {0: 1886.9, 1: 1919.7, 2: 1916.4, 3: 1931.3, 4: 1955.5}, 'cpi': {0: 28.98, 1: 29.15, 2: 29.35, 3: 29.37, 4: 29.54}, 'm1': {0: 139.7, 1: 141.7, 2: 140.5, 3: 140.0, 4: 139.6}, 'tbilrate': {0: 2.82, 1: 3.08, 2: 3.82, 3: 4.33, 4: 3.5}, 'unemp': {0: 5.8, 1: 5.1, 2: 5.3, 3: 5.6, 4: 5.2}, 'pop': {0: 177.146, 1: 177.83, 2: 178.657, 3: 179.386, 4: 180.007}, 'infl': {0: 0.0, 1: 2.34, 2: 2.74, 3: 0.27, 



In [2]:
macro_tf.get_data()

Unnamed: 0,ds,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959-01-01,2710.349,1707.4,286.898,470.045,1886.9,28.980,139.7,2.82,5.8,177.146,0.00,0.00
1,1959-04-01,2778.801,1733.7,310.859,481.301,1919.7,29.150,141.7,3.08,5.1,177.830,2.34,0.74
2,1959-07-01,2775.488,1751.8,289.226,491.260,1916.4,29.350,140.5,3.82,5.3,178.657,2.74,1.09
3,1959-10-01,2785.204,1753.7,299.356,484.052,1931.3,29.370,140.0,4.33,5.6,179.386,0.27,4.06
4,1960-01-01,2847.699,1770.5,331.722,462.199,1955.5,29.540,139.6,3.50,5.2,180.007,2.31,1.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,2008-07-01,13324.600,9267.7,1990.693,991.551,9838.3,216.889,1474.7,1.17,6.0,305.270,-3.16,4.33
199,2008-10-01,13141.920,9195.3,1857.661,1007.273,9920.4,212.174,1576.5,0.12,6.9,305.952,-8.79,8.91
200,2009-01-01,12925.410,9209.2,1558.494,996.287,9926.4,212.671,1592.8,0.22,8.1,306.547,0.94,-0.71
201,2009-04-01,12901.504,9189.0,1456.678,1023.528,10077.5,214.469,1653.6,0.18,9.2,307.226,3.37,-3.19


In [3]:
# Accessing the instance's attributes as a dictionary
params = macro_tf.__dict__
params

{'_cfg': {'BACKENDS': {'pl': 'polars', 'pd': 'pandas', 'mpd': 'modin'}},
 '_backend': 'mpd',
 '_df':             ds    realgdp  realcons   realinv  realgovt  realdpi      cpi  \
 0   1959-01-01   2710.349    1707.4   286.898   470.045   1886.9   28.980   
 1   1959-04-01   2778.801    1733.7   310.859   481.301   1919.7   29.150   
 2   1959-07-01   2775.488    1751.8   289.226   491.260   1916.4   29.350   
 3   1959-10-01   2785.204    1753.7   299.356   484.052   1931.3   29.370   
 4   1960-01-01   2847.699    1770.5   331.722   462.199   1955.5   29.540   
 ..         ...        ...       ...       ...       ...      ...      ...   
 198 2008-07-01  13324.600    9267.7  1990.693   991.551   9838.3  216.889   
 199 2008-10-01  13141.920    9195.3  1857.661  1007.273   9920.4  212.174   
 200 2009-01-01  12925.410    9209.2  1558.494   996.287   9926.4  212.671   
 201 2009-04-01  12901.504    9189.0  1456.678  1023.528  10077.5  214.469   
 202 2009-07-01  12990.341    9256.0  1486