### Load libraries

In [1]:
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.dates as mdates
from itertools import islice
import torch
import os
import wandb
from gluonts.dataset.repository import get_dataset, dataset_names
from gluonts.dataset.util import to_pandas
from gluonts.dataset.common import ListDataset
from tqdm.autonotebook import tqdm
from gluonts.evaluation import make_evaluation_predictions, Evaluator
from gluonts.dataset.pandas import PandasDataset
from lag_llama.gluon.estimator import LagLlamaEstimator

  from tqdm.autonotebook import tqdm


### train_ds and test_ds in ListDatasets ✅ 

In [2]:
train_df = pd.read_csv(f'train_df_14.csv', index_col=0)
train_df_std = (train_df - train_df.mean())/train_df.std()
train_data = [{"start": train_df_std.index[0], "target": train_df_std[i].values} for i in train_df_std.columns]
train_ds = ListDataset(data_iter=train_data, freq='1s') # training dataset

In [3]:
test_df = pd.read_csv(f'test_df_14.csv', index_col=0)
test_df_std = (test_df - test_df.mean())/test_df.std()
test_data = [{"start": test_df_std.index[0], "target": test_df_std[i].values} for i in test_df_std.columns]
test_ds = ListDataset(data_iter=test_data, freq='1s') # testing dataset

### define sweeper in wandb

In [4]:
# Define the search space
sweep_config = {
    "method": "grid",
    "parameters": {
        "lr": {"values": [5e-4, 10e-4, 5e-5 ,10e-5]}, #4 bigger magnitude 10-2 ... 10-6, lr_scheduler
        "batch_size": {"values": [2, 8, 16, 32, 64, 96]}, #6
        "context_length": {"values": [32, 64, 96, 128]} #4
    }
}
sweep_id = wandb.sweep(sweep_config, project="lag_llama_04_07") # a new sweep is created in the sweep space under my w&b account

Create sweep with ID: 8bk4uije
Sweep URL: https://wandb.ai/junboma/lag_llama_04_07/sweeps/8bk4uije


### initialize the estimator

In [5]:
prediction_length = 24
num_samples = 100
device = "cuda:0"

In [6]:
def create_estimator(output_dir):
    
    config = wandb.config
    lr = config.lr
    context_length = config.context_length
    batch_size = config.batch_size

    ckpt = torch.load("lag-llama.ckpt", map_location=torch.device(device))

    estimator_args = ckpt["hyper_parameters"]["model_kwargs"]
    estimator = LagLlamaEstimator(
        ckpt_path="lag-llama.ckpt",
        prediction_length=prediction_length,
        context_length=context_length,
        nonnegative_pred_samples=False,
        aug_prob=0,
        lr=lr,
        input_size=estimator_args["input_size"],
        n_layer=estimator_args["n_layer"],
        n_embd_per_head=estimator_args["n_embd_per_head"],
        n_head=estimator_args["n_head"],
        time_feat=estimator_args["time_feat"],
        rope_scaling={
            "type": "linear",
            "factor": max(1.0, (context_length + prediction_length) / estimator_args["context_length"]),
        },
        batch_size=batch_size,
        num_parallel_samples=num_samples,
        trainer_kwargs={"max_epochs": 100},
    #    log_train_loss=True, 
    )

    return estimator

In [7]:
def getCPRS(dataset, predictor, num_samples=100):

    # inference step
    forecast_it, ts_it = make_evaluation_predictions( 
            dataset=dataset,
            predictor=predictor,
            num_samples=num_samples)

    forecasts = list(tqdm(forecast_it, total=len(dataset), desc="Forecasting batches"))
    tss = list(tqdm(ts_it, total=len(dataset), desc="Ground truth"))
    
    # restore mean and avg for forecasts and tss of dataset (test_ds)
    for i in range(len(tss)):
        tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
    for i in range(len(forecasts)):
        samples = forecasts[i].samples
        std_value = test_df.std()[i]
        mean_value = test_df.mean()[i]
        np.multiply(samples, std_value, out=samples)
        np.add(samples, mean_value, out=samples)
    
    # evaluate metrics
    evaluator = Evaluator()
    agg_metrics, ts_metrics = evaluator(iter(tss), iter(forecasts))
    
    return agg_metrics['mean_wQuantileLoss']

In [None]:
def train():
    with wandb.init():
        estimator = create_estimator(output_dir=".")
        predictor = estimator.train(train_ds, cache_data=False, shuffle_buffer_length=1000)
        CPRS = getCPRS(test_ds, predictor, num_samples=100)
        wandb.log({"CPRS": CPRS})
        wandb.finish() # must-have, otherwise w&b doesn't know when a search is finished and thus can't proceed with the next search
# Start the sweep agent 
wandb.agent(sweep_id, train) 

[34m[1mwandb[0m: Agent Starting Run: zinyftfc with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 32
[34m[1mwandb[0m: 	lr: 0.0005
[34m[1mwandb[0m: Currently logged in as: [33mjunboleng[0m ([33mjunboma[0m). Use [1m`wandb login --relogin`[0m to force relogin




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111244142262472, max=1.0)…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/logger_connector/logger_connector.py:67: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `lightning.pytorch` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Para

Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.83678 (best 0.83678), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_28/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.68651 (best 0.68651), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_28/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.58697 (best 0.58697), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_28/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.44757 (best 0.44757), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 252.81it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,█▄▇▆▅▆▃▃▆▆▄▆▄▅▄▂▂▂▄▆▅▅▅▅▇█▅▃▆▄▂▁▂▅▅▃▁▄▃▁

0,1
CPRS,0.00035
train_loss,-0.23716


[34m[1mwandb[0m: Agent Starting Run: blndtdqc with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 32
[34m[1mwandb[0m: 	lr: 0.001




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112384270462725, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.94483 (best 0.94483), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_29/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.73799 (best 0.73799), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_29/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.60103 (best 0.60103), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_29/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.48449 (best 0.48449), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 467.34it/s]


VBox(children=(Label(value='0.003 MB of 0.022 MB uploaded\r'), FloatProgress(value=0.15332332950834143, max=1.…

0,1
CPRS,▁
train_loss,▆▅▇▆▃▅▂▆▆▇▅▆▅▆▃▆▆▃█▆▁▄▇▅▅▃▂▃▆█▄█▄▃▆▅▅▄▄▇

0,1
CPRS,0.00046
train_loss,0.29449


[34m[1mwandb[0m: Agent Starting Run: fcl5z193 with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 32
[34m[1mwandb[0m: 	lr: 5e-05




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112476895666785, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.53025 (best 0.53025), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_30/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.45021 (best 0.45021), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_30/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.43635 (best 0.43635), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_30/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' was not in top 1
Epoch 5, global step 300: 'train_loss' reached 0.37865 (best 0.37865), saving model to

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 607.53it/s]


VBox(children=(Label(value='0.024 MB of 0.024 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,▆▅▄▄▇▃▆▂█▇▄▂▇▄▄▁▆▁▇▅▇▄▂▅▂▅▄▄▃▃▂▇▄▂▃▂▃▂▃▃

0,1
CPRS,0.00036
train_loss,0.34103


[34m[1mwandb[0m: Agent Starting Run: n0ztollz with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 32
[34m[1mwandb[0m: 	lr: 0.0001




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113043284664552, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.55061 (best 0.55061), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_31/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.52414 (best 0.52414), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_31/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.28037 (best 0.28037), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_31/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' was not in top 1
Epoch 5, global step 300: 'train_loss' was not in top 1
Epoch 6, global step 350: 'tra

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 565.42it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,▅▅█▆▆▅▅▆▄▃▄▅▅▅█▄▃▅▅▆▁▄▅▅▄▃▃▄▄▃▃▂▄▃▇▂▃▃▃▁

0,1
CPRS,0.00037
train_loss,-0.42521


[34m[1mwandb[0m: Agent Starting Run: lgyn7imm with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 64
[34m[1mwandb[0m: 	lr: 0.0005




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113145388662815, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.75623 (best 0.75623), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_32/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.51077 (best 0.51077), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_32/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.50163 (best 0.50163), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_32/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' reached 0.47915 (best 0.47915), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesi

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 218.67it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,█▅▇▄▅▆▇▆▆▄▇▂▆▅▅▃▁▅▁▃▆▃▄▇▄▅▅▃▅▃▁▄▅▁▄▃▂▁▃▃

0,1
CPRS,0.00034
train_loss,0.0543


[34m[1mwandb[0m: Agent Starting Run: x5v5azsk with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 64
[34m[1mwandb[0m: 	lr: 0.001




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112496029171678, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.91883 (best 0.91883), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_33/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.63023 (best 0.63023), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_33/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.55274 (best 0.55274), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_33/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.51614 (best 0.51614), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 226.63it/s]


VBox(children=(Label(value='0.003 MB of 0.022 MB uploaded\r'), FloatProgress(value=0.15025727504648248, max=1.…

0,1
CPRS,▁
train_loss,█▇▅▆▆▃▇▄▅▄▅▄▆▅▃▃▄▃▁▇▇▆█▅▆▅▁▅▇▂▆▆▃▄▆▂▄▅▆▅

0,1
CPRS,0.00038
train_loss,0.47211


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: u1ih0uny with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 64
[34m[1mwandb[0m: 	lr: 5e-05




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112489023556311, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.66386 (best 0.66386), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_34/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.48555 (best 0.48555), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_34/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.39192 (best 0.39192), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_34/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' reached 0.34310 (best 0.34310), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesi

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 198.37it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,▇▅██▆█▇▅█▃▆█▇███▇▆▆▅▁▇▄▄▅▅▄▄▄▃▂▆▅▃▄█▄▄▅▃

0,1
CPRS,0.00042
train_loss,-0.23449


[34m[1mwandb[0m: Agent Starting Run: 4jpopdn0 with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 64
[34m[1mwandb[0m: 	lr: 0.0001




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112518360217412, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.45431 (best 0.45431), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_35/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.43305 (best 0.43305), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_35/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.39602 (best 0.39602), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_35/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.38190 (best 0.38190), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 260.69it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,▇█▄▂▄▇▂▅█▇▇▃▃▅▅▄▅▃▅▃▂▃▆▃▃▄▄▅▆▃▅▃▄▂▂▂▁▂▄▂

0,1
CPRS,0.00039
train_loss,-0.67933


[34m[1mwandb[0m: Agent Starting Run: zu00ivxs with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 96
[34m[1mwandb[0m: 	lr: 0.0005




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112463691582282, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.82486 (best 0.82486), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_36/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.48950 (best 0.48950), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_36/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.47932 (best 0.47932), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_36/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' reached 0.37876 (best 0.37876), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesi

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 424.40it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,▄▂▂▅▆▆█▅▆▆▇▄▆▅▂▅▇▆▆▄▄▃▆▃▄▇▅▇▄▄▅▄▃▃▄▅▄▁▃▃

0,1
CPRS,0.00038
train_loss,-0.74512


[34m[1mwandb[0m: Agent Starting Run: w8sjezgv with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 96
[34m[1mwandb[0m: 	lr: 0.001




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112463205224937, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.81326 (best 0.81326), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_37/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.54981 (best 0.54981), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_37/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' was not in top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' reached 0.50731 (best 0.50731), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_37/checkpoints/epoch=4-step=250.ckpt' as top 1
Epoch 5, global step 300: 'train_loss' reached 0.48523 (best 0.48523), saving model to

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 257.16it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,▄█▂▃▅▅▆▃▃▂▅▁▄▃▆▃▇█▅▆▃▅▄▃▅▅▇▅▇▂▄▆▅▂▂▇▄▅▇▅

0,1
CPRS,0.00038
train_loss,0.47894


[34m[1mwandb[0m: Agent Starting Run: dbqxc6rf with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 96
[34m[1mwandb[0m: 	lr: 5e-05




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112426097194353, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.55414 (best 0.55414), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_38/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.34776 (best 0.34776), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_38/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' was not in top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' reached 0.32997 (best 0.32997), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_38/checkpoints/epoch=4-step=250.ckpt' as top 1
Epoch 5, global step 300: 'train_loss' was not in top 1
Epoch 6, global step 350: 'tra

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 336.04it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,▃▇▂▅▃█▆▄▇▆▃▃▄▃▄▄▇▄▇▃▅▂▄▃▅▃▃▆▄▄▁▅▄▃▃▂▃▂▂▂

0,1
CPRS,0.00032
train_loss,-0.73992


[34m[1mwandb[0m: Agent Starting Run: 2edy6d8a with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 96
[34m[1mwandb[0m: 	lr: 0.0001




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111245917984181, max=1.0)…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.57974 (best 0.57974), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_39/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.40710 (best 0.40710), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_39/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.37832 (best 0.37832), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_39/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' was not in top 1
Epoch 5, global step 300: 'train_loss' reached 0.36034 (best 0.36034), saving model to

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 235.05it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,▆█▆▆▇▄▄▅▅▃▄▇▆▁▆▆▂▆▆▄▃▅▄▄▃▄▄▄▂▃▃▃▃▂▃▂▃▂▂▂

0,1
CPRS,0.00035
train_loss,-0.36253


[34m[1mwandb[0m: Agent Starting Run: hf77gcou with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 128
[34m[1mwandb[0m: 	lr: 0.0005




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112442674736182, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.58442 (best 0.58442), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_40/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' was not in top 1
Epoch 2, global step 150: 'train_loss' reached 0.47533 (best 0.47533), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_40/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' was not in top 1
Epoch 5, global step 300: 'train_loss' was not in top 1
Epoch 6, global step 350: 'train_loss' reached 0.41832 (best 0.41832), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_40/checkpoints/epoch=

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 241.12it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,▆▃█▃▃▅█▅▄▃▅▅▇▅▄▇▃▆▆▄▃▆▄▄▅▃▄▃▃▄▃▁▅▃▁▃▂▃▁▃

0,1
CPRS,0.00045
train_loss,0.07142


[34m[1mwandb[0m: Agent Starting Run: x9hq3kq7 with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 128
[34m[1mwandb[0m: 	lr: 0.001




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112643333358898, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.92493 (best 0.92493), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_41/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.59044 (best 0.59044), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_41/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.58357 (best 0.58357), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_41/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.57417 (best 0.57417), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 324.65it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,▆▁▂▇▆▄▁▄▂▅▄▅▆▄▃▁▇█▆▅▆▆▂▅▄▄▄▃▅▂▄▅▆▅▄▂█▂▄▆

0,1
CPRS,0.0003
train_loss,0.64872


[34m[1mwandb[0m: Agent Starting Run: 7od1mpdr with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 128
[34m[1mwandb[0m: 	lr: 5e-05




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112381973200374, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.46329 (best 0.46329), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_42/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.37364 (best 0.37364), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_42/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' was not in top 1
Epoch 3, global step 200: 'train_loss' reached 0.35866 (best 0.35866), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_42/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 0.32845 (best 0.32845), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesi

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 515.97it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,█▃▃▇▃▆▅▇▆▇▄▃▇▄▂▃▇▆▄█▅▇▅▆▅▂▄▆▄▅▅▃▄▄▁▃▅▅▂▄

0,1
CPRS,0.00033
train_loss,0.2427


[34m[1mwandb[0m: Agent Starting Run: g5hkebiu with config:
[34m[1mwandb[0m: 	batch_size: 2
[34m[1mwandb[0m: 	context_length: 128
[34m[1mwandb[0m: 	lr: 0.0001




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113177995300955, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.67074 (best 0.67074), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_43/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.43925 (best 0.43925), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_43/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' was not in top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' reached 0.37522 (best 0.37522), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_43/checkpoints/epoch=4-step=250.ckpt' as top 1
Epoch 5, global step 300: 'train_loss' reached 0.34466 (best 0.34466), saving model to

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 524.11it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,█▇▅▂▆▂▅▃▄▂█▃▅▄▅▅▆▁▅▄▆▄▂▅▅▃▅▄▅▃▃▁▂▄▃▃▃▅▂▃

0,1
CPRS,0.00034
train_loss,-0.30742


[34m[1mwandb[0m: Agent Starting Run: yrb1pjvj with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	context_length: 32
[34m[1mwandb[0m: 	lr: 0.0005




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112601734283897, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.51383 (best 0.51383), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_44/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.45910 (best 0.45910), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_44/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.37429 (best 0.37429), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_44/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' was not in top 1
Epoch 5, global step 300: 'train_loss' reached 0.36325 (best 0.36325), saving model to

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 531.53it/s]


VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
CPRS,▁
train_loss,███▇▇▆█▅▆▆▆▆▅▄▄▄▅▄▅▄▃▃▃▄▄▃▄▃▂▂▂▂▂▂▂▂▂▂▂▁

0,1
CPRS,0.00042
train_loss,-1.54718


[34m[1mwandb[0m: Agent Starting Run: 6y1amb60 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	context_length: 32
[34m[1mwandb[0m: 	lr: 0.001




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111308919886748, max=1.0)…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.72077 (best 0.72077), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_45/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.49130 (best 0.49130), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_45/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.43374 (best 0.43374), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_45/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.42797 (best 0.42797), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/

Forecasting batches:   0%|          | 0/60 [00:00<?, ?it/s]

Ground truth:   0%|          | 0/60 [00:00<?, ?it/s]

  tss[i].values[:] = tss[i].values * test_df.std()[i] + test_df.mean()[i]
  std_value = test_df.std()[i]
  mean_value = test_df.mean()[i]
Running evaluation: 60it [00:00, 577.45it/s]


VBox(children=(Label(value='0.003 MB of 0.012 MB uploaded\r'), FloatProgress(value=0.26887120451209146, max=1.…

0,1
CPRS,▁
train_loss,██▇▇█▇▆▆▆▆▅▅▆▅▅▄▅▃▄▃▃▃▃▃▃▃▃▁▂▃▂▁▃▂▂▁▁▂▁▁

0,1
CPRS,0.00043
train_loss,-1.2612


[34m[1mwandb[0m: Agent Starting Run: inyeg6fx with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	context_length: 32
[34m[1mwandb[0m: 	lr: 5e-05




VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113044298771356, max=1.0…

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/eragroup/anaconda3/envs/lag_llama/lib/python3.12/site-packages/lightning/pytorch/trainer/configuration_validator.py:74: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                               | 0/? [00:00<?, ?it/s]

Epoch 0, global step 50: 'train_loss' reached 0.46759 (best 0.46759), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_46/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.35810 (best 0.35810), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_46/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' was not in top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' was not in top 1
Epoch 5, global step 300: 'train_loss' was not in top 1
Epoch 6, global step 350: 'train_loss' reached 0.33221 (best 0.33221), saving model to '/home/eragroup/Documents/Documents/Koutas_Master_Thesis_Supervision/Junbo_Leng/lag_llama_model/lag-llama-main/lightning_logs/version_46/checkpoints/epoch=

### experiment 0: would the format of training datasets have an impact on training results?
- PandasDatasets: CRPS = 0.0311
- ListDatasets: CRPS = 0.0028
  
Conclusion: **stay with ListDatasets**

### experiment 1: would the format of training datasets have an impact on training results?
- #Epochs100: CRPS = 0.0028
- #Epochs500: CRPS = 0.0021

More training epochs
- can improve CPRS score
- the predicitions show more visible fanning-out across the prediciton horizon
- still very much input-independent :(((

### adjusting the learning rate schedule.
### Reducing or removing regularization terms/techniques.
### Tuning the regularization hyperparameters.