# Running Time-MOE models on BOOM benchmark

This notebook is adapted from the [GiftEval repository](https://github.com/SalesforceAIResearch/gift-eval/tree/main/notebooks) and shows how to run the Time-MOE models on the BOOM benchmark.

Make sure you download the BOOM benchmark and set the `BOOM` environment variable correctly before running this notebook.

We will use the `Dataset` class from GiftEval to load the data and run the model.

Download BOOM datasets. Calling `download_boom_benchmark` also sets the `BOOM` environment variable with the correct path, which is needed for running the evals below.

In [None]:
import os
import json
from dotenv import load_dotenv
import importlib.util
import os
import json
from dotenv import load_dotenv
import numpy as np
import torch
import torch.cuda.amp



from gluonts.itertools import batcher
from gluonts.model.forecast import QuantileForecast, SampleForecast

from typing import List, Union, Tuple
import torch.nn.functional as F

from gluonts.itertools import batcher
from transformers import AutoModelForCausalLM
from dataset_utils import download_boom_benchmark

boom_path = "ChangeMe"
download_boom_benchmark(boom_path)
load_dotenv()

dataset_properties_map = json.load(open("eval/boom/dataset_properties.json"))
all_datasets = list(dataset_properties_map.keys())
print(len(all_datasets))

In [None]:
from gluonts.ev.metrics import (
    MAE,
    MAPE,
    MASE,
    MSE,
    MSIS,
    ND,
    NRMSE,
    RMSE,
    SMAPE,
    MeanWeightedSumQuantileLoss,
)

# Instantiate the metrics
metrics = [
    MSE(forecast_type="mean"),
    MSE(forecast_type=0.5),
    MAE(),
    MASE(),
    MAPE(),
    SMAPE(),
    MSIS(),
    RMSE(),
    NRMSE(),
    ND(),
    MeanWeightedSumQuantileLoss(quantile_levels=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]),
]

In [None]:
class timemoe_wrapper:
    def __init__(self, prediction_length, truncate = 2000, batch_size = 32):
        model_name = "Maple728/TimeMoE-50M"
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        device_map = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = AutoModelForCausalLM.from_pretrained(model_name, device_map = self.device, trust_remote_code=True)
        self.batch_size = batch_size
        self.truncate = truncate
        self.prediction_length = prediction_length
    def calculate_max_shape(self, values: List[Union[torch.Tensor, np.ndarray]]) -> Tuple[int, ...]:
            """Calculate the maximum shape for a list of tensors or arrays."""
            return tuple(
                max(v.size(dim) if isinstance(v, torch.Tensor) else v.shape[dim] for v in values)
                for dim in range(len(values[0].shape))
            )

    def pad_and_stack(self, values: List[Union[torch.Tensor, np.ndarray]], max_shape: Tuple[int, ...]) -> torch.Tensor:
        """Pad and stack tensors or arrays to the given max shape."""
        padded_values: List[torch.Tensor] = [
            (
                F.pad(
                    v,
                    [
                        value
                        for dim, max_dim in enumerate(reversed(max_shape))
                        for value in (0, max_dim - (v.size(dim) if isinstance(v, torch.Tensor) else v.shape[dim]))
                    ],
                )
                if isinstance(v, torch.Tensor)
                else torch.tensor(
                    np.pad(
                        v, [(0, max_dim - v.shape[dim]) for dim, max_dim in enumerate(max_shape)], mode="constant"
                    )
                )
            )
            for v in values
        ]
        return torch.stack(padded_values)
    
    def truncate_and_stack(self, tensor_list, maxlength):
        """
        Truncates each tensor to maxlength. If a tensor is shorter than maxlength, it is left-padded with zeros.
        Uses torch.jit.fork to parallelize the extraction and padding process.
        
        Args:
            tensor_list (list of torch.Tensor): List of tensors with shape (length) or (num_channel, length).
            maxlength (int): The fixed length to truncate or pad tensors.
        
        Returns:
            torch.Tensor: Stacked tensor with shape (N, maxlength) for 1D tensors or (N, num_channel, maxlength) for 2D tensors.
        """
        def process_tensor(t):
            # t = self.normalize_input(t)
            if t.dim() == 1:
                pad_size = maxlength - t.shape[0]
                if pad_size > 0:
                    t = torch.cat([torch.zeros(pad_size, dtype=t.dtype, device=t.device), t])
                return t[-maxlength:]
            else:
                pad_size = maxlength - t.shape[1]
                if pad_size > 0:
                    t = torch.cat([torch.zeros(t.shape[0], pad_size, dtype=t.dtype, device=t.device), t], dim=1)
                return t[:, -maxlength:]
        
        futures = [torch.jit.fork(process_tensor, t) for t in tensor_list]  # Parallel extraction and padding
        extracted_tensors = [torch.jit.wait(f) for f in futures]  # Wait for all to complete
        return torch.stack(extracted_tensors, dim=0)  # Stack along first dimension
    def normalize_input(self, inputs):
        mean, std = inputs.mean(dim=-1, keepdim=True), inputs.std(dim=-1, keepdim=True)
        normed_input = (inputs - mean) / std
        return normed_input, mean, std
    def predict(self, data):
        self.model.eval()
        self.model = self.model.to(self.device)
        while self.batch_size>=1:
            try:
                print("Trying batch size", self.batch_size)
                forecasts = []
                process_inputs = []
                outputs = []
                for batch in batcher(data, batch_size=self.batch_size):
                    inputs = [torch.tensor(entry["target"]) for entry in batch]
                    if not self.truncate:
                        max_shape = self.calculate_max_shape(inputs)
                        padded_input = self.pad_and_stack(inputs, max_shape)
                    else:
                        padded_input = self.truncate_and_stack(inputs, self.truncate)
                    if len(padded_input.shape)>2:
                        padded_input = padded_input.squeeze(1)
                    padded_input = padded_input.to(self.device).float()
                    padded_input, means, stds = self.normalize_input(padded_input)
                    # print(padded_input.shape)
                    with torch.no_grad():
                        output = self.model.generate(padded_input, max_new_tokens=self.prediction_length)
                        output = output*stds+means
                    forecasts.append(output[:, -self.prediction_length:].cpu().numpy())
                    process_inputs.append(padded_input.cpu().numpy())
                    outputs.append(output.cpu().numpy())
                forecasts = np.concatenate(forecasts)
                process_inputs = np.concatenate(process_inputs)
                outputs = np.concatenate(outputs)
                break
            except RuntimeError:
                self.batch_size = self.batch_size//2
                print(f"Batch size too large, reducing to {self.batch_size}")
        if self.batch_size < 1:
            raise ValueError("Batch size too small")
        
        forecasts_date = []
        for item, ts in zip(forecasts, data):
            item = item[np.newaxis,:]
            # print(item.shape)
            forecast_start_date = ts["start"] + len(ts["target"])

            forecasts_date.append(SampleForecast(samples=item, start_date=forecast_start_date))
            

        # return forecasts_date, process_inputs, forecasts, outputs
        return forecasts_date

## Evaluation

Now that we have our predictor class, we can use it to predict on the boom benchmark datasets. We will use the `evaluate_model` function from `gluonts` to evaluate the model. We are going to store the results in a csv file called `all_results.csv` under the `results/time-moe` folder.

The first column in the csv file is the dataset config name which is a combination of the dataset name, frequency and the term:

```python
f"{dataset_name}/{freq}/{term}"
```
Note that we try to replace the results with the baseline results whenever the model yield nan forecasts.

In [None]:
from gluonts.model import evaluate_model
import csv
import os
from gluonts.time_feature import get_seasonality
from gift_eval.data import Dataset
import torch
import pandas as pd

torch.set_float32_matmul_precision("high")

# Iterate over all available datasets
model_name = "time-moe"
output_dir = f"ChangeMe/{model_name}"
# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Define the path for the CSV file
csv_file_path = os.path.join(output_dir, "all_results.csv")

with open(csv_file_path, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)

    # Write the header
    writer.writerow(
        [
            "dataset",
            "model",
            "eval_metrics/MSE[mean]",
            "eval_metrics/MSE[0.5]",
            "eval_metrics/MAE[0.5]",
            "eval_metrics/MASE[0.5]",
            "eval_metrics/MAPE[0.5]",
            "eval_metrics/sMAPE[0.5]",
            "eval_metrics/MSIS",
            "eval_metrics/RMSE[mean]",
            "eval_metrics/NRMSE[mean]",
            "eval_metrics/ND[0.5]",
            "eval_metrics/mean_weighted_sum_quantile_loss",
            "eval_metrics/MAAPE[0.5]",
            "domain",
            "num_variates",
            "dataset_size",
        ]
    )

for ds_num, ds_name in enumerate(all_datasets):
    print(f"Processing dataset: {ds_name} ({ds_num + 1} of {len(all_datasets)})")
    dataset_term = dataset_properties_map[ds_name]["term"]
    terms = ["short", "medium", "long"]
    for term in terms:
        if (term == "medium" or term == "long") and dataset_term == "short":
            continue
        ds_freq = dataset_properties_map[ds_name]["frequency"]
        ds_config = f"{ds_name}/{ds_freq}/{term}"

        # Initialize the dataset, since Moirai support multivariate time series forecast, it does not require
        # to convert the original data into univariate
        to_univariate = False if Dataset(name=ds_name, term=term,to_univariate=False,storage_env_var="BOOM").target_dim == 1 else True
        # to_univariate = False
        dataset = Dataset(name=ds_name, term=term, to_univariate=to_univariate,storage_env_var="BOOM")
        model = timemoe_wrapper(dataset.prediction_length)
        season_length = get_seasonality(dataset.freq)
        dataset_size = len(dataset.test_data)
        print(f"Dataset size: {dataset_size}")
        
        try:
            res = evaluate_model(
                model,
                test_data=dataset.test_data,
                metrics=metrics,
                batch_size=512,
                axis=None,
                mask_invalid_label=True,
                allow_nan_forecast=False,
                seasonality=season_length,
            )
        except Exception as e:
            if "NaN" in str(e):
                print(f"replacing results of {ds_name} with seasonal naive scores due to NaN values")
                res = pd.read_csv(f"ChangeMe/seasonalnaive/all_results.csv")
                prefix = "eval_metrics/"
                res.columns = [col[len(prefix):] if col.startswith(prefix) else col for col in res.columns]
                res = res[res["dataset"]==ds_config]
                res = res.reset_index(drop=True)
            else:
                raise e
        # Append the results to the CSV file
        with open(csv_file_path, "a", newline="") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(
                [
                    ds_config,
                    "time-moe",
                    res["MSE[mean]"][0],
                    res["MSE[0.5]"][0],
                    res["MAE[0.5]"][0],
                    res["MASE[0.5]"][0],
                    res["MAPE[0.5]"][0],
                    res["sMAPE[0.5]"][0],
                    res["MSIS"][0],
                    res["RMSE[mean]"][0],
                    res["NRMSE[mean]"][0],
                    res["ND[0.5]"][0],
                    res["mean_weighted_sum_quantile_loss"][0],
                    res["MAAPE[0.5]"][0],
                    dataset_properties_map[ds_name]["domain"],
                    dataset_properties_map[ds_name]["num_variates"],
                    dataset_size,
                ]
            )

        print(f"Results for {ds_name} have been written to {csv_file_path}")

## Results

Running the above cell will generate a csv file called `all_results.csv` under the `results/time-moe` folder containing the results for the Time-MOE model on the boom benchmark. The csv file will look like this:

In [None]:
import pandas as pd
df = pd.read_csv(output_dir + "/all_results.csv")
df