In [1]:

from data import load_all_data
filename = "all_data_smoothed.xlsx"
dataset = load_all_data(filename)

print(dataset)

  from .autonotebook import tqdm as notebook_tqdm


number of dates: 2868
The dataset has 130 features, each feature has 2868 data points
DatasetDict({
    train: Dataset({
        features: ['start', 'target', 'feat_static_cat', 'feat_dynamic_real', 'item_id'],
        num_rows: 130
    })
    test: Dataset({
        features: ['start', 'target', 'feat_static_cat', 'feat_dynamic_real', 'item_id'],
        num_rows: 130
    })
    validation: Dataset({
        features: ['start', 'target', 'feat_static_cat', 'feat_dynamic_real', 'item_id'],
        num_rows: 130
    })
})


In [2]:
freq = "1D"
prediction_length = 365

train_dataset = dataset["train"]
test_dataset = dataset["test"]

In [3]:

from functools import lru_cache

import pandas as pd
import numpy as np

@lru_cache(10_000)
def convert_to_pandas_period(date, freq):
    return pd.Period(date, freq)


def transform_start_field(batch, freq):
    batch["start"] = [convert_to_pandas_period(date, freq) for date in batch["start"]]
    return batch

In [4]:

from functools import partial

train_dataset.set_transform(partial(transform_start_field, freq=freq))
test_dataset.set_transform(partial(transform_start_field, freq=freq))


from gluonts.dataset.multivariate_grouper import MultivariateGrouper
num_of_variates = len(train_dataset)

train_grouper = MultivariateGrouper(max_target_dim=num_of_variates)
test_grouper = MultivariateGrouper(
    max_target_dim=num_of_variates,
    num_test_dates=len(test_dataset) // num_of_variates, # number of rolling test windows
)

multi_variate_train_dataset = train_grouper(train_dataset)
multi_variate_test_dataset = test_grouper(test_dataset)
print(multi_variate_train_dataset[0]["target"])


[[ 1.5908936  1.5908936  1.5908936 ...  1.3840507  1.3840507  1.3840507]
 [ 1.1921233  1.1921233  1.1921233 ...  1.1646624  1.1646624  1.1646624]
 [ 0.5        0.5        0.5       ...  0.1        0.1        0.1      ]
 ...
 [64.        64.        64.        ... 58.        58.        58.       ]
 [12.        12.        12.        ... 25.        25.        25.       ]
 [23.        23.        23.        ... 17.        17.        17.       ]]


In [5]:

from gluonts.time_feature import get_lags_for_frequency

lags_sequence = get_lags_for_frequency(freq)
print(lags_sequence)

from gluonts.time_feature import time_features_from_frequency_str

time_features = time_features_from_frequency_str(freq)
print(time_features)

[1, 2, 3, 4, 5, 6, 7, 8, 13, 14, 15, 20, 21, 22, 27, 28, 29, 30, 31, 56, 84, 363, 364, 365, 727, 728, 729, 1091, 1092, 1093]
[<function day_of_week at 0x7f4defe69f70>, <function day_of_month at 0x7f4defc420d0>, <function day_of_year at 0x7f4defc421f0>]


In [None]:

from transformers import InformerConfig, InformerForPrediction

config = InformerConfig(
    # in the multivariate setting, input_size is the number of variates in the time series per time step
    input_size=num_of_variates,
    # prediction length:
    prediction_length=prediction_length,
    # context length:
    context_length=prediction_length * 2,
    # lags value copied from 1 week before:
    lags_sequence=[1, 7],
    # we'll add 5 time features ("hour_of_day", ..., and "age"):
    num_time_features=len(time_features) + 1,
    
    # informer params:
    dropout=0.1,
    encoder_layers=6,
    decoder_layers=4,
    # project input from num_of_variates*len(lags_sequence)+num_time_features to:
    d_model=64,
)

In [None]:

# model = InformerForPrediction(config)

from dataloader import create_train_dataloader, create_backtest_dataloader
train_dataloader = create_train_dataloader(
    config=config,
    freq=freq,
    data=multi_variate_train_dataset,
    batch_size=256,
    num_batches_per_epoch=100,
    num_workers=2,
)

test_dataloader = create_backtest_dataloader(
    config=config,
    freq=freq,
    data=multi_variate_test_dataset,
    batch_size=32,
)

In [None]:

### Train the model! ###
from accelerate import Accelerator
from torch.optim import AdamW

epochs = 30
loss_history = []

accelerator = Accelerator()
device = accelerator.device
print(f"available device: {device}")

model.to(device)
optimizer = AdamW(model.parameters(), lr=6e-5, betas=(0.9, 0.95), weight_decay=1e-1)

model, optimizer, train_dataloader = accelerator.prepare(
    model,
    optimizer,
    train_dataloader,
)

model.train()
for epoch in range(epochs):
    for idx, batch in enumerate(train_dataloader):
        optimizer.zero_grad()
        outputs = model(
            static_categorical_features=batch["static_categorical_features"].to(device)
            if config.num_static_categorical_features > 0
            else None,
            static_real_features=batch["static_real_features"].to(device)
            if config.num_static_real_features > 0
            else None,
            past_time_features=batch["past_time_features"].to(device),
            past_values=batch["past_values"].to(device),
            future_time_features=batch["future_time_features"].to(device),
            future_values=batch["future_values"].to(device),
            past_observed_mask=batch["past_observed_mask"].to(device),
            future_observed_mask=batch["future_observed_mask"].to(device),
        )
        loss = outputs.loss

        # Backpropagation
        accelerator.backward(loss)
        optimizer.step()

        loss_history.append(loss.item())
        if idx % 100 == 0:
            print(loss.item())

    



In [None]:

import matplotlib.pyplot as plt
# view training
loss_history = np.array(loss_history).reshape(-1)
x = range(loss_history.shape[0])
plt.figure(figsize=(10, 5))
plt.plot(x, loss_history, label="train")
plt.title("Loss", fontsize=15)
plt.legend(loc="upper right")
plt.xlabel("iteration")
plt.ylabel("nll")
plt.savefig("./train.png")
plt.show()


In [None]:

# perform inference
model.eval()

forecasts_ = []
for batch in test_dataloader:
    outputs = model.generate(
        static_categorical_features=batch["static_categorical_features"].to(device)
        if config.num_static_categorical_features > 0
        else None,
        static_real_features=batch["static_real_features"].to(device)
        if config.num_static_real_features > 0
        else None,
        past_time_features=batch["past_time_features"].to(device),
        past_values=batch["past_values"].to(device),
        future_time_features=batch["future_time_features"].to(device),
        past_observed_mask=batch["past_observed_mask"].to(device),
    )
    forecasts_.append(outputs.sequences.cpu().numpy())

print(forecasts_[0].shape)
forecasts = np.vstack(forecasts_)
print(forecasts.shape)

In [None]:
# perform evaluate
from evaluate import load
from gluonts.time_feature import get_seasonality

mase_metric = load("evaluate-metric/mase")
smape_metric = load("evaluate-metric/smape")

forecast_median = np.median(forecasts,1).squeeze(0).T
print(f"shape for forecast_median is: {forecast_median.shape}")

mase_metrics = []
smape_metrics = []

for item_id, ts in enumerate(test_dataset):
    if item_id >= 2: continue
    print(f"item_id={item_id}, ts={ts}")
    training_data = ts["target"][:-prediction_length]
    ground_truth = ts["target"][-prediction_length:]
    mase = mase_metric.compute(
        predictions=forecast_median[item_id],
        references=np.array(ground_truth),
        training=np.array(training_data),
        periodicity=get_seasonality(freq),
    )
    mase_metrics.append(mase["mase"])

    smape = smape_metric.compute(
        predictions=forecast_median[item_id],
        references=np.array(ground_truth),
    )
    smape_metrics.append(smape["smape"])

In [None]:
print(f"MASE: {np.mean(mase_metrics)}")
print(f"sMAPE: {np.mean(smape_metrics)}")

In [7]:

import matplotlib.dates as mdates
from gluonts.dataset.field_names import FieldName
import matplotlib.pyplot as plt


df = pd.read_excel(filename, usecols=['Date'])
dates = pd.to_datetime(df['Date'])
dates

def plot(ts_index, mv_index):
    fig, ax = plt.subplots()

    index = dates
    print(f"index is {index}")
    print(f"length of index: {len(index)}")

    ax.xaxis.set_minor_locator(mdates.HourLocator())

    ax.plot(
        index[:-prediction_length],
        multi_variate_test_dataset[ts_index]["target"][mv_index,:],
        label="actual",
    )

    ax.plot(
        index[-2*prediction_length:-prediction_length],
        forecasts[ts_index, ..., mv_index].mean(axis=0),
        label="mean",
    )
    ax.fill_between(
        index[-2*prediction_length:-prediction_length],
        forecasts[ts_index, ..., mv_index].mean(0)
        - forecasts[ts_index, ..., mv_index].std(axis=0),
        forecasts[ts_index, ..., mv_index].mean(0)
        + forecasts[ts_index, ..., mv_index].std(axis=0),
        alpha=0.2,
        interpolate=True,
        label="+/- 1-std",
    )
    ax.legend()
    fig.autofmt_xdate()
    return fig, ax

In [None]:
fig, ax = plot(0,0)
ax.set_xlabel('date')
ax.set_ylabel('GBP_USD')
ax.set_ylim(0,2)

In [None]:
fig, ax = plot(0,1)
ax.set_xlabel('date')
ax.set_ylabel('GBP_EUR')

In [8]:
from accelerate import Accelerator
from torch.optim import AdamW
from evaluate import load
from gluonts.time_feature import get_seasonality
from transformers import InformerConfig, InformerForPrediction
mase_metric = load("evaluate-metric/mase")
smape_metric = load("evaluate-metric/smape")

EPOCHS = 10
accelerator = Accelerator()
device = accelerator.device
print(f"available device: {device}")
def train(lr, weight_decay, num_encoder_layers, num_decoder_layers, d_model):
    print(f"-----Start Training Trial for lr={lr}, w_d={weight_decay}, num_encoder_layers={num_encoder_layers}, num_decoder_layers={num_decoder_layers}, d_model={d_model}-----")
    loss_history = []
    config = InformerConfig(
        # in the multivariate setting, input_size is the number of variates in the time series per time step
        input_size=num_of_variates,
        # prediction length:
        prediction_length=prediction_length,
        # context length:
        context_length=prediction_length * 2,
        # lags value copied from 1 week before:
        lags_sequence=[1, 7],
        # we'll add 5 time features ("hour_of_day", ..., and "age"):
        num_time_features=len(time_features) + 1,
        
        # informer params:
        dropout=0.1,
        encoder_layers=num_encoder_layers,
        decoder_layers=num_decoder_layers,
        # project input from num_of_variates*len(lags_sequence)+num_time_features to:
        d_model=d_model,
    )

    from dataloader import create_train_dataloader, create_backtest_dataloader
    train_dataloader = create_train_dataloader(
        config=config,
        freq=freq,
        data=multi_variate_train_dataset,
        batch_size=256,
        num_batches_per_epoch=100,
        num_workers=2,
    )

    test_dataloader = create_backtest_dataloader(
        config=config,
        freq=freq,
        data=multi_variate_test_dataset,
        batch_size=32,
    )

    model = InformerForPrediction(config)
    model.to(device)
    optimizer = AdamW(model.parameters(), lr=lr, betas=(0.9, 0.95), weight_decay=weight_decay)
    model, optimizer, train_dataloader = accelerator.prepare(
        model,
        optimizer,
        train_dataloader,
    )
    model.train()
    for epoch in range(EPOCHS):
        for idx, batch in enumerate(train_dataloader):
            optimizer.zero_grad()
            outputs = model(
                static_categorical_features=batch["static_categorical_features"].to(device)
                if config.num_static_categorical_features > 0
                else None,
                static_real_features=batch["static_real_features"].to(device)
                if config.num_static_real_features > 0
                else None,
                past_time_features=batch["past_time_features"].to(device),
                past_values=batch["past_values"].to(device),
                future_time_features=batch["future_time_features"].to(device),
                future_values=batch["future_values"].to(device),
                past_observed_mask=batch["past_observed_mask"].to(device),
                future_observed_mask=batch["future_observed_mask"].to(device),
            )
            loss = outputs.loss

            # Backpropagation
            accelerator.backward(loss)
            optimizer.step()

            loss_history.append(loss.item())
            if idx % 100 == 0:
                print(f"epoch: {epoch}, loss: {loss.item()}")

    # perform the evaluation
    model.eval()

    forecasts_ = []
    for batch in test_dataloader:
        outputs = model.generate(
            static_categorical_features=batch["static_categorical_features"].to(device)
            if config.num_static_categorical_features > 0
            else None,
            static_real_features=batch["static_real_features"].to(device)
            if config.num_static_real_features > 0
            else None,
            past_time_features=batch["past_time_features"].to(device),
            past_values=batch["past_values"].to(device),
            future_time_features=batch["future_time_features"].to(device),
            past_observed_mask=batch["past_observed_mask"].to(device),
        )
        forecasts_.append(outputs.sequences.cpu().numpy())

    forecasts = np.vstack(forecasts_)
    forecast_median = np.median(forecasts,1).squeeze(0).T


    mase_metrics = []
    smape_metrics = []

    for item_id, ts in enumerate(test_dataset):
        if item_id >= 2: continue
        print(f"item_id={item_id}, ts={ts}")
        training_data = ts["target"][:-prediction_length]
        ground_truth = ts["target"][-prediction_length:]
        mase = mase_metric.compute(
            predictions=forecast_median[item_id],
            references=np.array(ground_truth),
            training=np.array(training_data),
            periodicity=get_seasonality(freq),
        )
        mase_metrics.append(mase["mase"])

        smape = smape_metric.compute(
            predictions=forecast_median[item_id],
            references=np.array(ground_truth),
        )
        smape_metrics.append(smape["smape"])

    mase = np.mean(mase_metrics)
    print(f"-----End Training Trial, mase={mase:.1f} for lr={lr}, w_d={weight_decay}, num_encoder_layers={num_encoder_layers}, num_decoder_layers={num_decoder_layers}, d_model={d_model}-----")
    return mase, model, config



available device: cuda


In [11]:
import time
best_model = None
lrs = [6e-3, 6e-4]
weight_decays = [1e-1, 1e-2, 1e-3]
num_encoders = [3,6]
num_decoders = [4,8]
d_models = [32,64,128]

least_mase = float('inf')
best_config = None

total_trial = len(lrs) * len(weight_decays) * len(num_encoders) * len(num_decoders) * len(d_models)
count = 1

for lr in lrs:
    for weight_decay in weight_decays:
        for num_encoder_layers in num_encoders:
            for num_decoder_layers in num_decoders:
                for d_model in d_models:
                    try:
                        start = time.time()
                        mase, model, config = train(lr, weight_decay, num_encoder_layers, num_decoder_layers, d_model)
                        training_time = time.time() - start
                        if mase < least_mase:
                            print(f"MASE: {mase} is the currently best for [{lr}, {weight_decay}, {num_encoder_layers}, {num_decoder_layers}, {d_model}]")
                            least_mase = mase
                            del best_model
                            best_model = model
                            best_config = config
                        else:
                            del model
                        print(f"This trial takes {training_time}s to finish, {count}/{total_trial}")    
                        count += 1
                    except Exception as e:
                        training_time = time.time() - start
                        print(f"For [{lr}, {weight_decay}, {num_encoder_layers}, {num_decoder_layers}, {d_model}], training failed! Take time: {training_time}s, {count}/{total_trial}")
                        count += 1

print(f"Exploration finished, least_mase={least_mase}")

-----Start Training Trial for lr=0.006, w_d=0.1, num_encoder_layers=3, num_decoder_layers=4, d_model=32-----
epoch: 0, loss: -468.49285888671875
epoch: 1, loss: -745.253173828125
epoch: 2, loss: -786.5719604492188
epoch: 3, loss: -781.2622680664062
epoch: 4, loss: -790.212158203125
epoch: 5, loss: -786.1687622070312
epoch: 6, loss: -815.6360473632812
For [0.006, 0.1, 3, 4, 32], training failed! Take time: 269.4945764541626s, 1/72
-----Start Training Trial for lr=0.006, w_d=0.1, num_encoder_layers=3, num_decoder_layers=4, d_model=64-----
epoch: 0, loss: -460.0308837890625
epoch: 1, loss: -722.5523681640625
epoch: 2, loss: -771.1315307617188
epoch: 3, loss: -741.8736572265625
epoch: 4, loss: -769.6417846679688
epoch: 5, loss: -801.0233764648438
epoch: 6, loss: -804.4201049804688
epoch: 7, loss: -737.940673828125
epoch: 8, loss: -811.5033569335938
For [0.006, 0.1, 3, 4, 64], training failed! Take time: 405.3089249134064s, 2/72
-----Start Training Trial for lr=0.006, w_d=0.1, num_encoder_l

In [12]:
# perform forcasting based on best model

test_dataloader = create_backtest_dataloader(
        config=best_config,
        freq=freq,
        data=multi_variate_test_dataset,
        batch_size=32,
)
# perform inference
best_model.eval()

forecasts_ = []
for batch in test_dataloader:
    outputs = best_model.generate(
        static_categorical_features=batch["static_categorical_features"].to(device)
        if config.num_static_categorical_features > 0
        else None,
        static_real_features=batch["static_real_features"].to(device)
        if config.num_static_real_features > 0
        else None,
        past_time_features=batch["past_time_features"].to(device),
        past_values=batch["past_values"].to(device),
        future_time_features=batch["future_time_features"].to(device),
        past_observed_mask=batch["past_observed_mask"].to(device),
    )
    forecasts_.append(outputs.sequences.cpu().numpy())

forecasts = np.vstack(forecasts_)

NameError: name 'create_backtest_dataloader' is not defined

In [None]:
df = pd.read_excel('all_data.xlsx', usecols=['Date'])
dates = pd.to_datetime(df['Date'])
dates

In [None]:
fig, ax = plot(0,0)
ax.set_xlabel('date')
ax.set_ylabel('GBP_USD')
ax.set_ylim(0,2)

In [None]:
fig, ax = plot(0,1)
ax.set_xlabel('date')
ax.set_ylabel('GBP_EUR')