# Load Data

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

dataset_metadata = {
    "monash_tourism_monthly": {"prediction_length": 24},
    "m4_hourly": {"prediction_length": 48},
}

dataset_choice = "monash_tourism_monthly"
num_time_series_subset = 2
prediction_length = dataset_metadata[dataset_choice]["prediction_length"]

# LOADING DATA
def load_data(dataset_choice, num_time_series_subset):

    from datasets import load_dataset
    from autogluon.timeseries import TimeSeriesDataFrame

    from tabpfn_time_series.data_preparation import to_gluonts_univariate, generate_test_X

    prediction_length = dataset_metadata[dataset_choice]["prediction_length"]
    dataset = load_dataset("autogluon/chronos_datasets", dataset_choice)

    tsdf = TimeSeriesDataFrame(to_gluonts_univariate(dataset["train"]))
    tsdf = tsdf[
        tsdf.index.get_level_values("item_id").isin(tsdf.item_ids[:num_time_series_subset])
    ]
    train_tsdf, test_tsdf_ground_truth = tsdf.train_test_split(
        prediction_length=prediction_length
    )
    test_tsdf = generate_test_X(train_tsdf, prediction_length)

    return tsdf, train_tsdf, test_tsdf_ground_truth, test_tsdf


# Call data
tsdf, train_tsdf, test_tsdf_ground_truth, test_tsdf = load_data(dataset_choice, num_time_series_subset)

# Original Feature Transformers

In [7]:
# Original Feature Transformer
from tabpfn_time_series import FeatureTransformer
from tabpfn_time_series.features import (
    RunningIndexFeature,
    CalendarFeature,
    AutoSeasonalFeature,
)

selected_features = [
    RunningIndexFeature(),
    CalendarFeature(),
    AutoSeasonalFeature(),
]

feature_transformer = FeatureTransformer(selected_features)

# Original Feature Transformer Time Series DataFrame
train_tsdf_original, test_tsdf_original = feature_transformer.transform(train_tsdf, test_tsdf)


# Example usage of feature transformations

In [8]:
from tabpfn_time_series.features.utils_pipeline import from_autogluon_tsdf_to_df, from_df_to_autogluon_tsdf 


In [9]:
from tabpfn_time_series.features.feature_pipeline import (
    RunningIndexFeature,
    AutoSeasonalFeatureSklearn,
    CalendarFeatureSklearn,
)
from tabpfn_time_series.features.utils_pipeline import train_test_split_time_series
from tabpfn_time_series.features.tests.test_pipelines import test_train_test_split_time_series,test_feature_transformer

# Split your data into train_df and test_df (with columns: item_id, timestamp, target)
pipeline = [
    RunningIndexFeature(mode="per_item"),
    # RunningIndexFeature(mode="global_timestamp"),
    AutoSeasonalFeatureSklearn(),
    CalendarFeatureSklearn(),
]

tsdf, train_tsdf, test_tsdf_ground_truth, test_tsdf = load_data(dataset_choice, num_time_series_subset)

# convert to pandas dataframe
df = from_autogluon_tsdf_to_df(tsdf)   
train_df, test_df, ground_truth = train_test_split_time_series(df, prediction_length)

# from tabpfn_time_series.features.predict_pipeline import TabPFNTimeSeriesPredictor

PREDICTIONS = []

# get all unique item_id and loop through them and print them out
unique_item_id = train_df['item_id'].unique()
for item_id in unique_item_id:
    train_item_id = train_df[train_df['item_id'] == item_id]
    test_item_id = test_df[test_df['item_id'] == item_id]
    
    # Fit on train only
    for feat in pipeline:
        feat.fit(train_item_id)
    
    # Transform both train and test
    train_feat = train_item_id.copy()
    test_feat = test_item_id.copy()
    for feat in pipeline:
        # print(feat)
        # print(train_feat['timestamp'].head())
        train_feat = feat.transform(train_feat)
        test_feat = feat.transform(test_feat)
    
    # tests
    test_feature_transformer(train_feat, train_tsdf_original)

Found 5 seasonal periods: [(12.0, 72368.19862358051), (6.0, 31799.917445186442), (4.0, 18986.89682967007), (2.0, 7689.506995250422), (3.0, 7597.16411492776)]
All common columns match between train_df and train_tsdf_original for all item_ids.
Found 4 seasonal periods: [(12.0, 1806838.9715574959), (163.0, 677915.7804036293), (6.0, 571950.6753520791), (3.0, 236132.8087148125)]
All common columns match between train_df and train_tsdf_original for all item_ids.
