# GIFT-Eval Dataset Exploration

Simple notebook to explore GIFT-Eval datasets and test evaluation.

In [1]:
import numpy as np
import jax.numpy as jnp
from pathlib import Path
import sys

# Add project root to path
sys.path.insert(0, str(Path.cwd().parent))

from src.config import load_config, cfg_to_training_config
from src.tsf import TimeSeriesForecaster
from src.data.time_features import compute_batch_time_features
from src.data.frequency import parse_frequency

## 1. Load GIFT-Eval Dataset

In [2]:
# Load a univariate dataset
try:
    from gift_eval import Dataset
    dataset_name = "electricity"  # Change to any univariate dataset
    dataset = Dataset(dataset_name)
    print(f"Loaded GIFT-Eval dataset: {dataset_name}")
except ImportError:
    print("GIFT-Eval not installed. Using GluonTS dataset instead.")
    from gluonts.dataset.repository import get_dataset
    dataset_name = "electricity"
    dataset = get_dataset(dataset_name)
    print(f"Loaded GluonTS dataset: {dataset_name}")

GIFT-Eval not installed. Using GluonTS dataset instead.
Loaded GluonTS dataset: electricity


## 2. Explore Dataset Structure

In [3]:
# Get test data
test_data = dataset.test_data if hasattr(dataset, 'test_data') else dataset.test

# Look at first item
first_item = next(iter(test_data))
print("Keys in dataset item:", list(first_item.keys()))
print("\nTarget shape:", np.array(first_item['target']).shape)
print("Start:", first_item['start'])
print("Frequency:", str(first_item['start'].freq) if hasattr(first_item['start'], 'freq') else 'N/A')

Keys in dataset item: ['target', 'start', 'feat_static_cat', 'item_id']

Target shape: (21068,)
Start: 2012-01-01 00:00
Frequency: <Hour>


  return pd.Period(val, freq)


In [4]:
# Check if dataset has metadata about prediction length
print("\nDataset attributes:")
for attr in dir(dataset):
    if not attr.startswith('_'):
        try:
            value = getattr(dataset, attr)
            if not callable(value):
                print(f"  {attr}: {value}")
        except:
            pass


Dataset attributes:
  metadata: freq='1H' target=None feat_static_cat=[CategoricalFeatureInfo(name='feat_static_cat_0', cardinality='321')] feat_static_real=[] feat_dynamic_real=[] feat_dynamic_cat=[] prediction_length=24
  test: Map(fn=<gluonts.dataset.common.ProcessDataEntry object at 0x1065a9d90>, iterable=JsonLinesFile(path=PosixPath('/Users/mariana/.gluonts/datasets/electricity/test/data.json.gz'), start=0, n=None, line_starts=[np.int32(0), np.int32(99873), np.int32(219668), np.int32(311560), np.int32(438110), np.int32(564118), np.int32(693652), np.int32(798498), np.int32(936090), np.int32(1061990), np.int32(1187633), np.int32(1313149), np.int32(1439625), np.int32(1564963), np.int32(1687218), np.int32(1813654), np.int32(1950313), np.int32(2055726), np.int32(2181339), np.int32(2309195), np.int32(2424945), np.int32(2530372), np.int32(2663496), np.int32(2774028), np.int32(2884049), np.int32(3010527), np.int32(3136573), np.int32(3263051), np.int32(3368786), np.int32(3495282), np.int3

## 3. Extract Prediction Length

GIFT-Eval datasets typically have a `prediction_length` attribute.

In [5]:
# Try to get prediction length from dataset
if hasattr(dataset, 'prediction_length'):
    prediction_length = dataset.prediction_length
    print(f"Prediction length from dataset: {prediction_length}")
elif hasattr(dataset, 'metadata'):
    prediction_length = dataset.metadata.prediction_length
    print(f"Prediction length from metadata: {prediction_length}")
else:
    # Default fallback
    prediction_length = 96
    print(f"Using default prediction length: {prediction_length}")

print(f"\nWill forecast {prediction_length} steps ahead")

Prediction length from metadata: 24

Will forecast 24 steps ahead


## 4. Load Model

In [6]:
# Load config and model
config_path = "../conf/training.yaml"
checkpoint_path = "../checkpoints/toy_model.pkl"  # Update with your checkpoint path

cfg = load_config(config_path)
training_config = cfg_to_training_config(cfg)

forecaster = TimeSeriesForecaster(config=training_config)
# forecaster.load(checkpoint_path)  # Uncomment when you have a checkpoint

print(f"Model loaded with time_dim={training_config.time_dim}")

Model loaded with time_dim=6


In [7]:
training_config

TrainingConfig(learning_rate=0.001, weight_decay=0.0, dropout=0.1, quantiles=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], model_config=ModelConfig(input_dim=1, embedding_dim=32, head_embedding_dim=8, num_heads=2, n_layers=4, output_dim=9, weaving_block_config=WeavingBlockLSTMConfig(n_layers=4, embedding_dim=8, num_heads=2, weaving_layer_config=mLSTMWeavingLayerConfig(conv1d_kernel_size=4, qkv_proj_blocksize=4, num_heads=2, embedding_dim=8, bias=False, dropout=0.1, dtype='bfloat16', _num_blocks=1, _inner_embedding_dim=16, mlstm_cell=mLSTMWeavingCellConfig(embedding_dim=16, num_heads=2, dtype='bfloat16')))), log_every=10, num_epochs=1, batch_size=32, time_dim=6)

## 5. Make a Single Prediction

In [16]:
# Get one time series
data = first_item
target = np.array(data['target'])
start = data['start']
freq = str(start.freq) if hasattr(start, 'freq') else 'D'

print(f"Time series length: {len(target)}")
print(f"Start: {start}")
print(f"Frequency: {freq}")

# Use last 512 points as context
context_length = 512
history = target[-context_length:]
history = jnp.array(history[None, ..., None, None])  # (1, seq, 1, 1)

print(f"\nHistory shape: {history.shape}")
print(f"Will predict {prediction_length} steps")

Time series length: 21068
Start: 2012-01-01 00:00
Frequency: <Hour>

History shape: (1, 512, 1, 1)
Will predict 24 steps


In [17]:
target.shape

(21068,)

In [18]:
prediction_length

24

In [21]:
# Compute time features
frequency = parse_frequency("H")

history_tf, future_tf = compute_batch_time_features(
    start=[np.datetime64(start)],
    history_length=context_length,
    future_length=prediction_length,
    batch_size=1,
    frequency=[frequency],
    K_max=training_config.time_dim,
    include_extra=False,
)

print(f"History time features shape: {history_tf.shape}")
print(f"Future time features shape: {future_tf.shape}")

History time features shape: (1, 512, 6)
Future time features shape: (1, 24, 6)


  offset = pd.tseries.frequencies.to_offset(freq_str)


'<Hour>'

In [None]:
# Make prediction
# preds = forecaster.predict(history, history_tf, future_tf)
# preds = np.array(preds[0, :, 0, :])  # (pred_len, num_quantiles)

# print(f"\nPrediction shape: {preds.shape}")
# print(f"Quantiles: {forecaster.quantiles}")

print("Uncomment the above when you have a trained model")

## 6. Run Full Evaluation

In [None]:
from gluonts.model.forecast import QuantileForecast
from gluonts.evaluation import make_evaluation_predictions, Evaluator

class GluonTSPredictor:
    """Minimal wrapper for GluonTS compatibility."""

    def __init__(self, forecaster, prediction_length, context_length=512, time_dim=6):
        self.forecaster = forecaster
        self.prediction_length = prediction_length
        self.context_length = context_length
        self.time_dim = time_dim
        self.lead_time = 0  # Required by GluonTS evaluator

    def predict(self, dataset, num_samples=None):
        """Generate quantile forecasts."""
        for data in dataset:
            target = np.array(data['target'])
            start = data['start']
            freq = str(start.freq) if hasattr(start, 'freq') else 'D'

            # Use last context_length points
            history = target[-self.context_length:]
            history = jnp.array(history[None, ..., None, None])  # (1, seq, 1, 1)

            # Compute time features
            frequency = parse_frequency(freq)
            hist_len = history.shape[1]

            history_tf, future_tf = compute_batch_time_features(
                start=[np.datetime64(start)],
                history_length=hist_len,
                future_length=self.prediction_length,
                batch_size=1,
                frequency=[frequency],
                K_max=self.time_dim,
                include_extra=False,
            )

            # Predict all quantiles
            preds = self.forecaster.predict(history, history_tf, future_tf)
            preds = np.array(preds[0, :, 0, :])  # (pred_len, num_quantiles)

            yield QuantileForecast(
                forecast_arrays=preds.T,  # (num_quantiles, pred_len)
                start_date=start + len(target),
                forecast_keys=[str(q) for q in self.forecaster.quantiles],
                item_id=data.get('item_id', ''),
            )

# Create predictor
predictor = GluonTSPredictor(
    forecaster=forecaster,
    prediction_length=prediction_length,
    context_length=512,
    time_dim=training_config.time_dim,
)

print("Predictor created")

In [None]:
# Run evaluation (uncomment when you have a trained model)
# print("Generating predictions...")
# forecast_it, ts_it = make_evaluation_predictions(
#     dataset=test_data,
#     predictor=predictor,
#     num_samples=None,
# )
# forecasts = list(forecast_it)
# targets = list(ts_it)

# print("Computing metrics...")
# evaluator = Evaluator()
# metrics, _ = evaluator(targets, forecasts)

# print("\n=== Results ===")
# for key, val in metrics.items():
#     print(f"{key}: {val:.4f}")

print("Uncomment the above when you have a trained model")

## Summary

- GIFT-Eval datasets have a `prediction_length` attribute
- Use this instead of hardcoding the prediction length
- The evaluation script should extract this from the dataset automatically