# Chronos

In [66]:
import warnings
import transformers
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
from chronos import ChronosPipeline
from pmdarima.arima import auto_arima
from statsmodels.tsa.statespace.sarimax import SARIMAX
from tqdm import tqdm
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, root_mean_squared_error
from darts import TimeSeries
import plotly.graph_objs as go
import plotly.io as pio

In [67]:
# Import the data
def load_and_prepare_data(file_path):
    """
    Load energy prices data from a CSV file, ensure chronological order, and convert 'Date' to datetime.
    """
    df = pd.read_csv(file_path)
    df.sort_values('Date', inplace=True)
    df.set_index('Date', inplace=True)
    df = pd.DataFrame(df)
    return df

In [68]:
train_df = load_and_prepare_data('../../data/Final_data/train_df_no_lags.csv')
test_df = load_and_prepare_data('../../data/Final_data/test_df_no_lags.csv')

train_df.reset_index(inplace=True)
test_df.reset_index(inplace=True)

In [69]:
# Import the data
df = load_and_prepare_data('../../data/Final_data/final_data_no_lags.csv')

# Reset the index
df = df.reset_index()

target_column = "Day_ahead_price (€/MWh)"

In [70]:
# date of first forecast
start_date = "2022-07-01"

# date of last forecast
end_date = "2024-07-28"

In [71]:
# create a TimeSeries object from df
data = df[['Date', target_column]]
# transform to a pandas series
data = data.set_index('Date')
data = data.squeeze()
data

Date
2012-01-08    26.83
2012-01-09    47.91
2012-01-10    45.77
2012-01-11    47.83
2012-01-12    43.10
              ...  
2024-07-24    66.61
2024-07-25    78.34
2024-07-26    93.04
2024-07-27    80.74
2024-07-28    43.96
Name: Day_ahead_price (€/MWh), Length: 4586, dtype: float64

In [72]:
data.shape

(4586,)

## Chronos Pipeline

In [73]:
# Define the size of the Chronos model
SIZE = "mini"

In [None]:
chronos_model = ChronosPipeline.from_pretrained(
    f"amazon/chronos-t5-{SIZE}",
    device_map="mps",
    torch_dtype=torch.bfloat16,
)

config.json:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

In [None]:
# save the start time
start_time = datetime.now()

# create a list for storing the forecasts
chronos_forecasts = []

transformers.set_seed(42)

NUM_SAMPLES = 100

# loop across the dates
for t in tqdm(range(data.index.get_loc(start_date), data.index.get_loc(end_date) + 1)):

    # extract the context window
    context = data.iloc[:t]

    # generate the one-step-ahead forecast
    chronos_forecast = chronos_model.predict(
        context=torch.from_numpy(context.values),
        prediction_length=1,
        num_samples=NUM_SAMPLES
    ).detach().cpu().numpy().flatten()

    # save the forecasts
    chronos_forecasts.append({
        "date": data.index[t],
        "actual": data.values[t],
        "mean": np.mean(chronos_forecast),
        "std": np.std(chronos_forecast, ddof=1),
    })

# cast the forecasts to data frame
chronos_forecasts = pd.DataFrame(chronos_forecasts)

# save the end time
end_time = datetime.now()

print(f"\nRunning time of Chronos model: {end_time - start_time}")

100%|██████████| 759/759 [01:19<00:00,  9.53it/s]


Running time of Chronos model: 0:01:19.685581





In [None]:
chronos_forecasts.head()

Unnamed: 0,date,actual,mean,std
0,2022-07-01,314.38,308.881226,12.827397
1,2022-07-02,218.92,301.165813,13.275861
2,2022-07-03,200.11,229.198733,16.286211
3,2022-07-04,293.89,229.724203,14.900106
4,2022-07-05,318.37,320.436664,13.07062


In [None]:
chronos_forecasts.tail()

Unnamed: 0,date,actual,mean,std
754,2024-07-24,66.61,86.384787,8.388112
755,2024-07-25,78.34,75.744715,8.68897
756,2024-07-26,93.04,79.477516,8.998552
757,2024-07-27,80.74,89.299353,7.97408
758,2024-07-28,43.96,81.875653,7.881426


In [None]:
# Plot the result

# Create traces for actual and predicted values
trace_actual = go.Scatter(
    x=chronos_forecasts["date"].values,
    y=chronos_forecasts["actual"].values,
    mode='lines',
    name='Actual',
    line=dict(color='#3f4751', width=1)
)

trace_predicted = go.Scatter(
    x=chronos_forecasts["date"].values,
    y=chronos_forecasts["mean"].values,
    mode='lines',
    name='Predicted',
    line=dict(color='#009ad3', width=1)
)

# Create traces for confidence intervals
trace_std_1 = go.Scatter(
    x=chronos_forecasts["date"].values,
    y=chronos_forecasts["mean"].values + chronos_forecasts["std"].values,
    mode='lines',
    name='Predicted +/- 1 Std. Dev.',
    line=dict(color='#009ad3', width=0),
    fill='tonexty',
    fillcolor='rgba(0, 154, 211, 0.2)'
)

trace_std_1_neg = go.Scatter(
    x=chronos_forecasts["date"].values,
    y=chronos_forecasts["mean"].values - chronos_forecasts["std"].values,
    mode='lines',
    line=dict(color='#009ad3', width=0),
    showlegend=False,
    fill='tonexty',
    fillcolor='rgba(0, 154, 211, 0.2)'
)

trace_std_2 = go.Scatter(
    x=chronos_forecasts["date"].values,
    y=chronos_forecasts["mean"].values + 2 * chronos_forecasts["std"].values,
    mode='lines',
    name='Predicted +/- 2 Std. Dev.',
    line=dict(color='#009ad3', width=0),
    fill='tonexty',
    fillcolor='rgba(0, 154, 211, 0.1)'
)

trace_std_2_neg = go.Scatter(
    x=chronos_forecasts["date"].values,
    y=chronos_forecasts["mean"].values - 2 * chronos_forecasts["std"].values,
    mode='lines',
    line=dict(color='#009ad3', width=0),
    showlegend=False,
    fill='tonexty',
    fillcolor='rgba(0, 154, 211, 0.1)'
)

# Create the figure with all the traces
fig = go.Figure()
fig.add_trace(trace_actual)
fig.add_trace(trace_predicted)
fig.add_trace(trace_std_2)
fig.add_trace(trace_std_2_neg)
fig.add_trace(trace_std_1)
fig.add_trace(trace_std_1_neg)

# Set layout options
fig.update_layout(
    xaxis_title='Time',
    yaxis_title='Value',
    legend=dict(x=1.05, y=1),
    margin=dict(t=20, b=20, l=20, r=20),
    width=1200,
    height=450
)

# Show the plot
pio.show(fig)

In [None]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.metrics import rmse, mae, mape, smape, mse

# Convert the chronos_forecasts DataFrame to Darts TimeSeries objects
actual_series = TimeSeries.from_series(chronos_forecasts["actual"])
predicted_series = TimeSeries.from_series(chronos_forecasts["mean"])

# Calculate the metrics using Darts functions
rmse_value = rmse(actual_series, predicted_series)
mae_value = mae(actual_series, predicted_series)
mape_value = mape(actual_series, predicted_series)
smape_value = smape(actual_series, predicted_series)
mse_value = mse(actual_series, predicted_series)

# Create the metrics DataFrame
chronos_metrics = pd.DataFrame(
    columns=["Metric", "Value"],
    data=[
        {"Metric": "RMSE", "Value": rmse_value},
        {"Metric": "MAE", "Value": mae_value},
        {"Metric": "MAPE", "Value": mape_value},
        {"Metric": "SMAPE", "Value": smape_value},
        {"Metric": "MSE", "Value": mse_value},
    ]
).set_index("Metric")

In [None]:
chronos_metrics

Unnamed: 0_level_0,Value
Metric,Unnamed: 1_level_1
RMSE,41.187968
MAE,26.966135
MAPE,96.261998
SMAPE,26.180439
MSE,1696.448732


In [None]:
# Save Chronos metrics as csv
chronos_metrics.to_csv(f'chronos_metrics_{SIZE}.csv')

In [None]:
# Select only the 'date' and 'mean' columns and create a copy to avoid the warning
date_mean_data = chronos_forecasts[['date', 'mean']].copy()

# Rename 'mean' to 'Day_ahead_price'
date_mean_data.rename(columns={'mean': 'Day_ahead_price'}, inplace=True)

# Save to CSV
date_mean_data.to_csv(f'chronos_forecast_{SIZE}.csv', index=False)
print(f"Data saved to chronos_forecast_{SIZE}.csv")

Data saved to chronos_forecast_tiny.csv
