In [2]:
cd lag-llama

/home/user/energygpt/lagllama/lag-llama


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [3]:
import os
import glob
from collections import defaultdict
from datetime import datetime
from itertools import islice

from matplotlib import pyplot as plt
import matplotlib.dates as mdates
from tqdm.autonotebook import tqdm

import torch
from gluonts.evaluation import make_evaluation_predictions, Evaluator
from gluonts.dataset.repository.datasets import get_dataset

from gluonts.dataset.pandas import PandasDataset
import pandas as pd

from lag_llama.gluon.estimator import LagLlamaEstimator

import warnings  
warnings.filterwarnings('ignore') 

seed = 42
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True

torch.set_warn_always(False)

  from tqdm.autonotebook import tqdm


In [5]:
def get_lag_llama_predictions(dataset, prediction_length, device, context_length=32, use_rope_scaling=True, num_samples=100):
    ckpt = torch.load("./checkpoints/lag-llama.ckpt", map_location=device) # Uses GPU since in this Colab we use a GPU.
    estimator_args = ckpt["hyper_parameters"]["model_kwargs"]

    rope_scaling_arguments = {
        "type": "linear",
        "factor": max(1.0, (context_length + prediction_length) / estimator_args["context_length"]),
    }

    estimator = LagLlamaEstimator(
        ckpt_path="./checkpoints/lag-llama.ckpt",
        prediction_length=prediction_length,
        context_length=context_length, # Lag-Llama was trained with a context length of 32, but can work with any context length

        # estimator args
        input_size=estimator_args["input_size"],
        n_layer=estimator_args["n_layer"],
        n_embd_per_head=estimator_args["n_embd_per_head"],
        n_head=estimator_args["n_head"],
        scaling=estimator_args["scaling"],
        time_feat=estimator_args["time_feat"],
        rope_scaling=rope_scaling_arguments if use_rope_scaling else None,

        batch_size=32,
        num_parallel_samples=100,
        device=device,
    )

    lightning_module = estimator.create_lightning_module()
    transformation = estimator.create_transformation()
    predictor = estimator.create_predictor(transformation, lightning_module)

    forecast_it, ts_it = make_evaluation_predictions(
        dataset=dataset,
        predictor=predictor,
        num_samples=num_samples
    )
    forecasts = list(forecast_it)
    tss = list(ts_it)

    return forecasts, tss

In [7]:
# Data pipelining
def get_batched_data_fn(sub_df,
    batch_size: int = 128, 
    context_len: int = 168, 
    horizon_len: int = 24):
    
    examples = defaultdict(list)
    num_examples = 0
    for start in range(0, len(sub_df) - (context_len + horizon_len), horizon_len):
      num_examples += 1
      examples["inputs"].append(sub_df["y"][start:(context_end := start + context_len)].tolist())
      examples["outputs"].append(sub_df["y"][context_end:(context_end + horizon_len)].tolist())
      examples['inputs_ts'].append(sub_df.index[start:(context_end := start + context_len)])
      examples["outputs_ts"].append(sub_df.index[context_end:(context_end + horizon_len)])

    return examples


In [8]:
def forecast_building(df):

    # Set numerical columns as float32
    for col in df.columns:
        # Check if column is not of string type
        if df[col].dtype != 'object' and pd.api.types.is_string_dtype(df[col]) == False:
            df[col] = df[col].astype('float32')
    
    # Create the Pandas
    dataset = PandasDataset.from_long_dataframe(df, target="target", item_id="item_id")

    
    backtest_dataset = dataset
    prediction_length = 24  # Define your prediction length. We use 24 here since the data is of hourly frequency
    num_samples = 10 # number of samples sampled from the probability distribution for each timestep 
    device = torch.device("cuda:0") # You can switch this to CPU or other GPUs if you'd like, depending on your environment    
    
    ckpt = torch.load("./checkpoints_bdg/lag-llama.ckpt", map_location=device) # Uses GPU since in this Colab we use a GPU.

    forecasts, tss = get_lag_llama_predictions(backtest_dataset, prediction_length, device, num_samples=num_samples)

    evaluator = Evaluator()
    agg_metrics, ts_metrics = evaluator(iter(tss), iter(forecasts))     
    
    res_all = []
    for ts, fc in zip(tss, forecasts):
        res = ts[ts.index.isin(fc.index)]
        res.columns = ['y_true']
        res.insert(1, 'y_pred', fc.median)
        res_all.append(res)
   
    res_all_df = pd.concat(res_all).sort_index()
    return res_all_df, agg_metrics, ts_metrics

In [9]:
def fine_tune_and_forecast_building(train, test):

    prediction_length = 24
    context_length = 168
    num_samples = 20
    device = "cuda:0"
    
    ckpt = torch.load("./checkpoints_bdg/lag-llama.ckpt", map_location=device)
    estimator_args = ckpt["hyper_parameters"]["model_kwargs"]
    
    estimator = LagLlamaEstimator(
            ckpt_path="./checkpoints_bdg/lag-llama.ckpt",
            prediction_length=prediction_length,
            context_length=context_length,
            nonnegative_pred_samples=True,
            aug_prob=0,
            lr=5e-4,
    
            # estimator args
            input_size=estimator_args["input_size"],
            n_layer=estimator_args["n_layer"],
            n_embd_per_head=estimator_args["n_embd_per_head"],
            n_head=estimator_args["n_head"],
            time_feat=estimator_args["time_feat"],
    
            batch_size=64,
            num_parallel_samples=num_samples,
            trainer_kwargs = {"max_epochs": 50,}, # <- lightning trainer arguments. For modification refer Lag-llama github repo
        )    
    
    # Create the Pandas
    dataset_train = PandasDataset.from_long_dataframe(train, target="target", item_id="item_id")    
    predictor = estimator.train(dataset_train, cache_data=True, shuffle_buffer_length=1000)    
  

    # Create the Pandas
    dataset_test = PandasDataset.from_long_dataframe(test, target="target", item_id="item_id")    
    forecast_it, ts_it = make_evaluation_predictions(
            dataset=dataset_test,
            predictor=predictor,
            num_samples=num_samples
        )


    forecasts = list(forecast_it)
    tss = list(ts_it)

    evaluator = Evaluator()
    agg_metrics, ts_metrics = evaluator(iter(tss), iter(forecasts))     
    
    res_all = []
    for ts, fc in zip(tss, forecasts):
        res = ts[ts.index.isin(fc.index)]
        res.columns = ['y_true']
        res.insert(1, 'y_pred', fc.median)
        res_all.append(res)
     
    res_all_df = pd.concat(res_all).sort_index()
    return res_all_df, agg_metrics, ts_metrics

In [10]:
def process_building(df): 
    building_name = df.columns[0]
    df.columns = ['y']
    input_data = get_batched_data_fn(df, batch_size=500)
    
    windows_all = []
    counter = 1
    for inputs_ts, inputs, outputs_ts, outputs in zip(input_data['inputs_ts'], 
                                                      input_data['inputs'], 
                                                      input_data['outputs_ts'], 
                                                      input_data['outputs']):
        
        input_df = pd.DataFrame({'timestamp': inputs_ts, 
                                 'target': inputs})
        
        output_df = pd.DataFrame({'timestamp': outputs_ts, 
                                 'target': outputs})
        combined = pd.concat([input_df, output_df], axis=0)
        combined['item_id'] = str(building_name) + '_' + str(counter)
        combined['item_id_no'] = counter
        counter += 1
        windows_all.append(combined)
        
    windows_all_df = pd.concat(windows_all)
    windows_all_df.timestamp = pd.to_datetime(windows_all_df.timestamp)
    windows_all_df.set_index('timestamp', inplace=True)

    df = windows_all_df
    # Set numerical columns as float32
    for col in df.columns:
        # Check if column is not of string type
        if df[col].dtype != 'object' and pd.api.types.is_string_dtype(df[col]) == False:
            df[col] = df[col].astype('float32')

    th = df.item_id_no.max()/2
    train = df[df.item_id_no <= th]
    test  = df[df.item_id_no > th]    
    print(datetime.now(), '#items', df.item_id_no.max(), 'split', th)
 
    
    test_res, test_agg_metrics, test_ts_metrics = forecast_building(test)
    ft_res, ft_agg_metrics, ft_ts_metrics = fine_tune_and_forecast_building(train, test)
    return test_res, test_agg_metrics, test_ts_metrics, ft_res, ft_agg_metrics, ft_ts_metrics



def process_file(filename):
    df = pd.read_csv(filename)
    df = df.set_index(['timestamp'])    
    df.index = pd.to_datetime(df.index)    
    ix = pd.date_range(start = df.index.min(), end = df.index.max(), freq = 'H')
    df = df.reindex(ix)
    df = df.ffill()

    if df.shape[1] < 2:
        return None
        
    print(datetime.now(), df.shape, flush=True)

    test_res_all = []
    test_agg_metrics_all = []
    test_ts_metrics_all = []

    finetuned_res_all = []
    finetuned_agg_metrics_all = []
    finetuned_ts_metrics_all = []
    
    i = 0
    for building_name in df.columns:
        print(datetime.now(), i, '/', len(df.columns), building_name, flush=True)
        df1 = df[[building_name]]#.head(24*200)

        test_res, test_agg_metrics, test_ts_metrics, finetuned_res, finetuned_agg_metrics, finetuned_ts_metrics = process_building(df1)
        test_res['building'] = building_name
        test_res['filename'] = filename
        test_res_all.append(test_res)
        
        test_ts_metrics.insert(0, 'building', building_name)
        test_ts_metrics.insert(0, 'filename', filename)
        test_ts_metrics = test_ts_metrics.sort_values(['forecast_start'])
        test_ts_metrics_all.append(test_ts_metrics)
        
        test_agg_metrics_df = pd.DataFrame([test_agg_metrics])
        test_agg_metrics_df.insert(0, 'building', building_name)
        test_agg_metrics_df.insert(0, 'filename', filename)
        test_agg_metrics_all.append(test_agg_metrics_df)

        finetuned_res['building'] = building_name
        finetuned_res['filename'] = filename
        finetuned_res_all.append(finetuned_res)
        
        finetuned_ts_metrics.insert(0, 'building', building_name)
        finetuned_ts_metrics.insert(0, 'filename', filename)
        finetuned_ts_metrics = finetuned_ts_metrics.sort_values(['forecast_start'])
        finetuned_ts_metrics_all.append(finetuned_ts_metrics)
        
        finetuned_agg_metrics_df = pd.DataFrame([finetuned_agg_metrics])
        finetuned_agg_metrics_df.insert(0, 'building', building_name)
        finetuned_agg_metrics_df.insert(0, 'filename', filename)
        finetuned_agg_metrics_all.append(finetuned_agg_metrics_df)        

        i += 1
        if i % 2 == 0:
            print(datetime.now(), 'Saving...')

            test_res_all_df = pd.concat(test_res_all).round(6)
            test_res_all_df = test_res_all_df.reset_index()
            test_res_all_df = test_res_all_df.rename(columns={test_res_all_df.columns[0]: "timestamp" })
            test_res_all_df.to_csv(f'../forecasts_finetuned/{dataset}/{os.path.basename(filename)}', index=False)            

            test_ts_metrics_all_df = pd.concat(test_ts_metrics_all).round(6)
            test_ts_metrics_all_df.to_csv(f'../results_finetuned/{dataset}/test_ts_metrics_{os.path.basename(filename)}', index=False)            

            test_agg_metrics_all_df = pd.concat(test_agg_metrics_all).round(6)            
            test_agg_metrics_all_df.to_csv(f'../results_finetuned/{dataset}/test_agg_metrics_{os.path.basename(filename)}', index=False)            

            finetuned_res_all_df = pd.concat(finetuned_res_all).round(6)
            finetuned_res_all_df = finetuned_res_all_df.reset_index()
            finetuned_res_all_df = finetuned_res_all_df.rename(columns={finetuned_res_all_df.columns[0]: "timestamp" })
            finetuned_res_all_df.to_csv(f'../forecasts_finetuned/{dataset}/{os.path.basename(filename)}', index=False)            

            finetuned_ts_metrics_all_df = pd.concat(finetuned_ts_metrics_all).round(6)
            finetuned_ts_metrics_all_df.to_csv(f'../results_finetuned/{dataset}/finetuned_ts_metrics_{os.path.basename(filename)}', index=False)            

            finetuned_agg_metrics_all_df = pd.concat(finetuned_agg_metrics_all).round(6)            
            finetuned_agg_metrics_all_df.to_csv(f'../results_finetuned/{dataset}/finetuned_agg_metrics_{os.path.basename(filename)}', index=False)            
    
    test_res_all_df = pd.concat(test_res_all).round(6)
    test_res_all_df = test_res_all_df.reset_index()
    test_res_all_df = test_res_all_df.rename(columns={test_res_all_df.columns[0]: "timestamp" })
    test_res_all_df.to_csv(f'../forecasts_finetuned/{dataset}/{os.path.basename(filename)}', index=False)            

    test_ts_metrics_all_df = pd.concat(test_ts_metrics_all).round(6)
    test_ts_metrics_all_df.to_csv(f'../results_finetuned/{dataset}/test_ts_metrics_{os.path.basename(filename)}', index=False)            

    test_agg_metrics_all_df = pd.concat(test_agg_metrics_all).round(6)            
    test_agg_metrics_all_df.to_csv(f'../results_finetuned/{dataset}/test_agg_metrics_{os.path.basename(filename)}', index=False)            

    finetuned_res_all_df = pd.concat(finetuned_res_all).round(6)
    finetuned_res_all_df = finetuned_res_all_df.reset_index()
    finetuned_res_all_df = finetuned_res_all_df.rename(columns={finetuned_res_all_df.columns[0]: "timestamp" })
    finetuned_res_all_df.to_csv(f'../forecasts_finetuned/{dataset}/{os.path.basename(filename)}', index=False)            

    finetuned_ts_metrics_all_df = pd.concat(finetuned_ts_metrics_all).round(6)
    finetuned_ts_metrics_all_df.to_csv(f'../results_finetuned/{dataset}/finetuned_ts_metrics_{os.path.basename(filename)}', index=False)            

    finetuned_agg_metrics_all_df = pd.concat(finetuned_agg_metrics_all).round(6)            
    finetuned_agg_metrics_all_df.to_csv(f'../results_finetuned/{dataset}/finetuned_agg_metrics_{os.path.basename(filename)}', index=False)            

    return test_res_all_df, test_ts_metrics_all_df, test_agg_metrics_all_df, finetuned_res_all_df, finetuned_ts_metrics_all_df, finetuned_agg_metrics_all_df

In [11]:
files_list = glob.glob('/home/user/BuildingsBench/BuildingsBenchData/BuildingsBench/Buildings-900K-test-csv/*.csv')   #Enter Data set location in your comptuer in CSV format
print(files_list)

dataset = 'Buildings-900K-test-50epoch'  # dataset Name to be processed
os.makedirs(f'../forecasts_finetuned/{dataset}/', exist_ok = True)
os.makedirs(f'../results_finetuned/{dataset}/', exist_ok = True)

for filename in files_list:
    print(datetime.now(), filename)
    results = process_file(filename)
 
    print('')    

['/home/user/BuildingsBench/BuildingsBenchData/BuildingsBench/Buildings-900K-test-csv/G51059305.csv']
2024-08-15 11:19:22.735552 /home/user/BuildingsBench/BuildingsBenchData/BuildingsBench/Buildings-900K-test-csv/G51059305.csv
2024-08-15 11:19:22.781498 (8760, 83)
2024-08-15 11:19:22.781918 0 / 83 4390
2024-08-15 11:19:22.960478 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 2269.25it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.82763 (best 4.82763), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10922/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.87346 (best 3.87346), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10922/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.74765 (best 3.74765), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10922/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.60593 (best 3.60593), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10922/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.54087 (best 3.54087), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10922/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:20:51.331225 1 / 83 16464
2024-08-15 11:20:51.498708 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3977.54it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 1.91413 (best 1.91413), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10924/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.28793 (best 1.28793), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10924/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.11769 (best 1.11769), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10924/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.97641 (best 0.97641), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10924/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 0.89602 (best 0.89602), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10924/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:22:19.782083 Saving...
2024-08-15 11:22:19.828898 2 / 83 19360
2024-08-15 11:22:20.023804 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8285.75it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.99215 (best 2.99215), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10926/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.35942 (best 2.35942), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10926/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.22207 (best 2.22207), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10926/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.14049 (best 2.14049), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10926/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.01869 (best 2.01869), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10926/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:23:47.138365 3 / 83 23149
2024-08-15 11:23:47.307054 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8752.39it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.98324 (best 3.98324), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10928/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.39107 (best 3.39107), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10928/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.23291 (best 3.23291), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10928/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.10656 (best 3.10656), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10928/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.09239 (best 3.09239), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10928/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:25:15.768956 Saving...
2024-08-15 11:25:15.859736 4 / 83 23184
2024-08-15 11:25:16.027536 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 6369.56it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.07459 (best 3.07459), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10930/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.32995 (best 2.32995), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10930/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.21838 (best 2.21838), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10930/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.13005 (best 2.13005), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10930/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.07478 (best 2.07478), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10930/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:26:45.692333 5 / 83 27072
2024-08-15 11:26:45.856216 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4392.84it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.25358 (best 2.25358), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10932/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.35597 (best 1.35597), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10932/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.21258 (best 1.21258), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10932/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.97199 (best 0.97199), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10932/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 0.80706 (best 0.80706), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10932/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:28:16.347284 Saving...
2024-08-15 11:28:16.475950 6 / 83 32240
2024-08-15 11:28:16.648292 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3905.55it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.01395 (best 3.01395), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10934/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.77463 (best 1.77463), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10934/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.69282 (best 1.69282), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10934/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.51784 (best 1.51784), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10934/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.40679 (best 1.40679), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10934/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:29:47.811579 7 / 83 33667
2024-08-15 11:29:47.970650 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8742.51it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.77040 (best 4.77040), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10935/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 4.28982 (best 4.28982), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10935/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 4.08749 (best 4.08749), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10935/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.95458 (best 3.95458), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10935/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.66199 (best 3.66199), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10935/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:31:17.376727 Saving...
2024-08-15 11:31:17.547626 8 / 83 41024
2024-08-15 11:31:17.791285 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8786.09it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.79534 (best 2.79534), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10937/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.18443 (best 2.18443), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10937/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.05097 (best 2.05097), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10937/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.94568 (best 1.94568), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10937/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.81411 (best 1.81411), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10937/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:32:48.819828 9 / 83 41181
2024-08-15 11:32:49.057153 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8886.87it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.49969 (best 4.49969), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10939/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 4.05888 (best 4.05888), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10939/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.79662 (best 3.79662), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10939/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.70317 (best 3.70317), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10939/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.50382 (best 3.50382), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10939/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:34:19.012013 Saving...
2024-08-15 11:34:19.320758 10 / 83 44720
2024-08-15 11:34:19.483040 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8552.30it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.99346 (best 4.99346), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10941/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 4.31778 (best 4.31778), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10941/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 4.22726 (best 4.22726), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10941/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 4.09643 (best 4.09643), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10941/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 4.08082 (best 4.08082), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10941/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:35:49.137887 11 / 83 45014
2024-08-15 11:35:49.347056 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8931.91it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.12673 (best 3.12673), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10943/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.46463 (best 2.46463), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10943/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.21234 (best 2.21234), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10943/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.07507 (best 2.07507), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10943/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.96868 (best 1.96868), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10943/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:37:16.165820 Saving...
2024-08-15 11:37:16.435806 12 / 83 46221
2024-08-15 11:37:16.677082 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 6478.89it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.45026 (best 4.45026), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10945/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.91615 (best 3.91615), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10945/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.68080 (best 3.68080), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10945/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.51940 (best 3.51940), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10945/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.33776 (best 3.33776), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10945/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:38:44.122463 13 / 83 60979
2024-08-15 11:38:44.392789 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 7097.97it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 5.37347 (best 5.37347), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10947/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 4.68201 (best 4.68201), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10947/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 4.57312 (best 4.57312), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10947/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 4.44837 (best 4.44837), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10947/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 4.39403 (best 4.39403), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10947/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:40:10.604349 Saving...
2024-08-15 11:40:10.997452 14 / 83 61645
2024-08-15 11:40:11.166453 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 7372.23it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 1.86396 (best 1.86396), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10949/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.30427 (best 1.30427), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10949/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.11885 (best 1.11885), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10949/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.99283 (best 0.99283), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10949/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 0.80382 (best 0.80382), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10949/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:41:38.068265 15 / 83 65315
2024-08-15 11:41:38.242242 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8444.27it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.81630 (best 3.81630), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10951/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.17379 (best 3.17379), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10951/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.06965 (best 3.06965), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10951/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.69904 (best 2.69904), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10951/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' was not in top 1
Epoch 5, global step 300: 'train_loss' reached 2.46727 (best 2.46727), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs

2024-08-15 11:43:03.467546 Saving...
2024-08-15 11:43:03.965730 16 / 83 65731
2024-08-15 11:43:04.134566 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4385.37it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.30279 (best 3.30279), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10953/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.57650 (best 2.57650), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10953/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.44479 (best 2.44479), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10953/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.35800 (best 2.35800), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10953/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.30294 (best 2.30294), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10953/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:44:32.464485 17 / 83 66947
2024-08-15 11:44:32.664314 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 7751.03it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.74000 (best 4.74000), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10955/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 4.19265 (best 4.19265), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10955/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.99830 (best 3.99830), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10955/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.67041 (best 3.67041), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10955/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.57044 (best 3.57044), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10955/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:45:59.798584 Saving...
2024-08-15 11:46:00.671587 18 / 83 69584
2024-08-15 11:46:00.992492 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 6982.19it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.52860 (best 3.52860), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10957/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.88753 (best 2.88753), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10957/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.69481 (best 2.69481), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10957/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.42061 (best 2.42061), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10957/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.32691 (best 2.32691), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10957/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:47:30.559275 19 / 83 69942
2024-08-15 11:47:30.733896 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3661.72it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 6.16026 (best 6.16026), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10959/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 5.59442 (best 5.59442), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10959/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 5.40057 (best 5.40057), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10959/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 5.19798 (best 5.19798), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10959/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 5.05363 (best 5.05363), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10959/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:48:57.272158 Saving...
2024-08-15 11:48:57.787376 20 / 83 85712
2024-08-15 11:48:57.953154 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4951.30it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.15919 (best 4.15919), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10961/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.33045 (best 3.33045), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10961/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.18949 (best 3.18949), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10961/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.01531 (best 3.01531), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10961/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.93467 (best 2.93467), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10961/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:50:25.764109 21 / 83 92493
2024-08-15 11:50:26.074587 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3500.29it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.03404 (best 3.03404), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10963/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.32227 (best 2.32227), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10963/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.08662 (best 2.08662), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10963/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.89595 (best 1.89595), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10963/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.82828 (best 1.82828), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10963/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:51:54.523979 Saving...
2024-08-15 11:51:55.115584 22 / 83 93197
2024-08-15 11:51:55.271535 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8435.36it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.02475 (best 3.02475), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10965/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.34501 (best 2.34501), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10965/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.17374 (best 2.17374), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10965/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' reached 1.91929 (best 1.91929), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10965/checkpoints/epoch=4-step=250.ckpt' as top 1
Epoch 5, global step 300: 'train_loss' reached 1.80094 (best 1.80094), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs

2024-08-15 11:53:24.618710 23 / 83 94963
2024-08-15 11:53:24.779917 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 7519.91it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 5.29711 (best 5.29711), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10967/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 4.50593 (best 4.50593), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10967/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 4.41993 (best 4.41993), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10967/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 4.28617 (best 4.28617), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10967/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 4.16434 (best 4.16434), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10967/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:54:53.205054 Saving...
2024-08-15 11:54:53.859570 24 / 83 95088
2024-08-15 11:54:54.014865 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 1537.21it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 5.97063 (best 5.97063), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10969/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 5.53873 (best 5.53873), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10969/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 5.28183 (best 5.28183), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10969/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 5.08676 (best 5.08676), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10969/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 4.94548 (best 4.94548), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10969/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 11:56:26.868183 25 / 83 99053
2024-08-15 11:56:27.468432 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8820.16it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.03813 (best 3.03813), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10970/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.44245 (best 2.44245), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10970/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.28898 (best 2.28898), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10970/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' reached 1.95099 (best 1.95099), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10970/checkpoints/epoch=4-step=250.ckpt' as top 1
Epoch 5, global step 300: 'train_loss' reached 1.80768 (best 1.80768), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs

2024-08-15 11:57:57.526134 Saving...
2024-08-15 11:57:58.123974 26 / 83 100419
2024-08-15 11:57:58.285162 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8824.72it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 5.09082 (best 5.09082), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10972/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 4.45049 (best 4.45049), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10972/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 4.24156 (best 4.24156), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10972/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' reached 3.99420 (best 3.99420), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10972/checkpoints/epoch=4-step=250.ckpt' as top 1
Epoch 5, global step 300: 'train_loss' reached 3.85520 (best 3.85520), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs

2024-08-15 11:59:26.261117 27 / 83 104032
2024-08-15 11:59:26.423294 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8441.24it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.58060 (best 4.58060), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10974/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.68727 (best 3.68727), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10974/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.56466 (best 3.56466), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10974/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.45455 (best 3.45455), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10974/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.31940 (best 3.31940), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10974/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:00:54.325845 Saving...
2024-08-15 12:00:55.036018 28 / 83 109155
2024-08-15 12:00:55.194945 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8701.27it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 1.82995 (best 1.82995), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10976/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.23785 (best 1.23785), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10976/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.09029 (best 1.09029), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10976/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.99690 (best 0.99690), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10976/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 0.88631 (best 0.88631), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10976/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:02:22.460554 29 / 83 116304
2024-08-15 12:02:23.037866 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8636.71it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.23773 (best 2.23773), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10978/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.67219 (best 1.67219), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10978/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.50521 (best 1.50521), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10978/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.35871 (best 1.35871), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10978/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.18844 (best 1.18844), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10978/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:03:49.805775 Saving...
2024-08-15 12:03:50.628359 30 / 83 122131
2024-08-15 12:03:50.796846 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8855.32it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.85632 (best 3.85632), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10980/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.28059 (best 3.28059), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10980/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.14336 (best 3.14336), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10980/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.04893 (best 3.04893), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10980/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.91983 (best 2.91983), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10980/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:05:17.280972 31 / 83 124045
2024-08-15 12:05:17.473377 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4898.80it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 5.40773 (best 5.40773), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10982/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 4.71669 (best 4.71669), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10982/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 4.58976 (best 4.58976), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10982/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 4.49501 (best 4.49501), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10982/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 4.31983 (best 4.31983), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10982/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:06:44.069821 Saving...
2024-08-15 12:06:44.931432 32 / 83 126813
2024-08-15 12:06:45.099859 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3719.87it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.28684 (best 3.28684), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10984/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.81902 (best 2.81902), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10984/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.61340 (best 2.61340), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10984/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.39565 (best 2.39565), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10984/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.24040 (best 2.24040), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10984/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:08:12.464398 33 / 83 129651
2024-08-15 12:08:12.629696 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4376.53it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.82681 (best 2.82681), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10986/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.25738 (best 2.25738), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10986/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.12046 (best 2.12046), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10986/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.86318 (best 1.86318), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10986/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.82758 (best 1.82758), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10986/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:09:39.177281 Saving...
2024-08-15 12:09:40.237116 34 / 83 131315
2024-08-15 12:09:40.504321 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 7323.26it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.21885 (best 2.21885), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10988/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.59971 (best 1.59971), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10988/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.44175 (best 1.44175), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10988/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.26582 (best 1.26582), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10988/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.02534 (best 1.02534), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10988/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:11:06.702094 35 / 83 139830
2024-08-15 12:11:06.949598 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4284.05it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.45885 (best 3.45885), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10990/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.80647 (best 2.80647), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10990/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.69211 (best 2.69211), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10990/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.60308 (best 2.60308), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10990/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.56036 (best 2.56036), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10990/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:12:32.950708 Saving...
2024-08-15 12:12:33.964406 36 / 83 141203
2024-08-15 12:12:34.227157 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 6241.21it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.33249 (best 3.33249), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10992/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.63775 (best 2.63775), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10992/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.46913 (best 2.46913), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10992/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.31771 (best 2.31771), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10992/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.14105 (best 2.14105), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10992/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:14:03.412740 37 / 83 144166
2024-08-15 12:14:03.864854 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 7005.18it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.04724 (best 2.04724), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10994/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.32955 (best 1.32955), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10994/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.16172 (best 1.16172), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10994/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.01246 (best 1.01246), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10994/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 0.87524 (best 0.87524), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10994/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:15:30.304302 Saving...
2024-08-15 12:15:31.340789 38 / 83 147955
2024-08-15 12:15:31.586025 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 5773.41it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.82347 (best 4.82347), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10996/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.94630 (best 3.94630), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10996/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.80591 (best 3.80591), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10996/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.67085 (best 3.67085), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10996/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.57945 (best 3.57945), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10996/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:17:01.147318 39 / 83 150464
2024-08-15 12:17:01.317393 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4971.92it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 1.39047 (best 1.39047), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10998/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.76935 (best 0.76935), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10998/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.66350 (best 0.66350), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10998/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.56102 (best 0.56102), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10998/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 0.48665 (best 0.48665), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_10998/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:18:28.578507 Saving...
2024-08-15 12:18:29.673877 40 / 83 156470
2024-08-15 12:18:29.914221 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 7976.76it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.75239 (best 3.75239), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11000/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.02177 (best 3.02177), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11000/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.82487 (best 2.82487), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11000/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.69610 (best 2.69610), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11000/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.54727 (best 2.54727), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11000/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:19:58.738419 41 / 83 160294
2024-08-15 12:19:58.918096 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4981.23it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.79354 (best 3.79354), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11002/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.97263 (best 2.97263), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11002/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.79990 (best 2.79990), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11002/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.69240 (best 2.69240), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11002/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.53133 (best 2.53133), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11002/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:21:28.604495 Saving...
2024-08-15 12:21:29.759060 42 / 83 162294
2024-08-15 12:21:29.924737 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 6879.37it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 1.87270 (best 1.87270), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11004/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.13766 (best 1.13766), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11004/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.92299 (best 0.92299), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11004/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.85097 (best 0.85097), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11004/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 0.70580 (best 0.70580), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11004/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:22:59.929387 43 / 83 164573
2024-08-15 12:23:00.165947 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3905.82it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 5.07837 (best 5.07837), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11006/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 4.49961 (best 4.49961), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11006/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 4.35924 (best 4.35924), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11006/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 4.21884 (best 4.21884), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11006/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 4.08231 (best 4.08231), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11006/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:24:31.457824 Saving...
2024-08-15 12:24:32.618751 44 / 83 166003
2024-08-15 12:24:32.784218 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8335.43it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 6.15992 (best 6.15992), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11008/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 5.70633 (best 5.70633), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11008/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 5.55267 (best 5.55267), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11008/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 5.32205 (best 5.32205), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11008/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 5.19350 (best 5.19350), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11008/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:26:01.582280 45 / 83 167382
2024-08-15 12:26:01.741530 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8135.10it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 0.01074 (best 0.01074), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11009/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached -0.61543 (best -0.61543), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11009/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached -0.76176 (best -0.76176), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11009/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached -0.82818 (best -0.82818), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11009/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached -1.00841 (best -1.00841), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11009/checkpoints/epoch=4-step=250.ckpt

2024-08-15 12:27:30.910488 Saving...
2024-08-15 12:27:32.079887 46 / 83 168752
2024-08-15 12:27:32.234794 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8837.39it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 5.14449 (best 5.14449), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11011/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 4.58070 (best 4.58070), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11011/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 4.40339 (best 4.40339), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11011/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 4.24212 (best 4.24212), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11011/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 4.16339 (best 4.16339), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11011/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:29:03.431653 47 / 83 179110
2024-08-15 12:29:03.686400 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 2026.73it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.27834 (best 2.27834), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11013/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.76132 (best 1.76132), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11013/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.62669 (best 1.62669), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11013/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.50521 (best 1.50521), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11013/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.43031 (best 1.43031), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11013/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:30:33.788694 Saving...
2024-08-15 12:30:36.236872 48 / 83 182643
2024-08-15 12:30:36.407716 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 6088.07it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.19706 (best 2.19706), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11015/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.60851 (best 1.60851), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11015/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.38568 (best 1.38568), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11015/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.33826 (best 1.33826), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11015/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.15375 (best 1.15375), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11015/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:32:04.207775 49 / 83 186253
2024-08-15 12:32:04.408393 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 9083.08it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.31410 (best 3.31410), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11017/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.60670 (best 2.60670), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11017/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.43016 (best 2.43016), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11017/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.24383 (best 2.24383), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11017/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.14722 (best 2.14722), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11017/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:33:34.118593 Saving...
2024-08-15 12:33:35.487531 50 / 83 188704
2024-08-15 12:33:35.652671 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4966.92it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.95281 (best 3.95281), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11019/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.34564 (best 3.34564), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11019/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.25806 (best 3.25806), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11019/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.08733 (best 3.08733), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11019/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.00413 (best 3.00413), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11019/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:35:03.029640 51 / 83 189891
2024-08-15 12:35:03.201993 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4992.36it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 5.87543 (best 5.87543), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11021/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 5.34829 (best 5.34829), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11021/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 5.12865 (best 5.12865), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11021/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 4.92100 (best 4.92100), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11021/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 4.83752 (best 4.83752), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11021/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:36:29.019080 Saving...
2024-08-15 12:36:31.809650 52 / 83 212192
2024-08-15 12:36:32.043711 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4209.54it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.89449 (best 2.89449), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11023/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.23793 (best 2.23793), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11023/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.03069 (best 2.03069), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11023/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.84615 (best 1.84615), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11023/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.78352 (best 1.78352), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11023/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:37:59.501812 53 / 83 215885
2024-08-15 12:37:59.753576 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 7256.23it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 0.70662 (best 0.70662), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11025/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.23181 (best 0.23181), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11025/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached -0.05973 (best -0.05973), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11025/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached -0.28038 (best -0.28038), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11025/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached -0.44994 (best -0.44994), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11025/checkpoints/epoch=4-step=250.ckpt' 

2024-08-15 12:39:28.669840 Saving...
2024-08-15 12:39:31.725648 54 / 83 216589
2024-08-15 12:39:32.054346 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 7698.97it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.68919 (best 3.68919), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11027/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.95908 (best 2.95908), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11027/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.72835 (best 2.72835), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11027/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.71535 (best 2.71535), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11027/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.52624 (best 2.52624), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11027/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:41:03.923989 55 / 83 216816
2024-08-15 12:41:04.103709 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3823.35it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.12202 (best 4.12202), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11029/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.42201 (best 3.42201), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11029/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.16730 (best 3.16730), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11029/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.01770 (best 3.01770), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11029/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.94873 (best 2.94873), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11029/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:42:33.522046 Saving...
2024-08-15 12:42:36.095241 56 / 83 216947
2024-08-15 12:42:36.415563 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 7188.22it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.39400 (best 2.39400), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11031/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.77418 (best 1.77418), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11031/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.51253 (best 1.51253), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11031/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.38615 (best 1.38615), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11031/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.16462 (best 1.16462), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11031/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:44:03.800976 57 / 83 217110
2024-08-15 12:44:03.965503 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3939.39it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.61700 (best 2.61700), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11033/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.99985 (best 1.99985), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11033/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.74097 (best 1.74097), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11033/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.65891 (best 1.65891), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11033/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.54328 (best 1.54328), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11033/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:45:31.569320 Saving...
2024-08-15 12:45:34.091914 58 / 83 221792
2024-08-15 12:45:34.410813 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 6008.12it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.62220 (best 3.62220), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11035/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.81570 (best 2.81570), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11035/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.61940 (best 2.61940), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11035/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.47134 (best 2.47134), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11035/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.34014 (best 2.34014), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11035/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:47:02.190997 59 / 83 245267
2024-08-15 12:47:02.381569 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 5957.87it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.64923 (best 3.64923), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11037/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.71479 (best 2.71479), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11037/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.35923 (best 2.35923), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11037/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.35208 (best 2.35208), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11037/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.05091 (best 2.05091), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11037/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:48:29.386520 Saving...
2024-08-15 12:48:31.022930 60 / 83 247437
2024-08-15 12:48:31.277650 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 6704.95it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 6.52210 (best 6.52210), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11039/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 5.20193 (best 5.20193), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11039/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' was not in top 1
Epoch 3, global step 200: 'train_loss' reached 5.09518 (best 5.09518), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11039/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' was not in top 1
Epoch 5, global step 300: 'train_loss' reached 4.82111 (best 4.82111), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11039/checkpoints/epoch=5-step=300.ckpt' as top 1
Epoch 6, global step 350: 'train_loss' reached 4.73221 (best 4.73221), saving model to

2024-08-15 12:49:58.595188 61 / 83 250589
2024-08-15 12:49:58.810974 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3282.10it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 5.03192 (best 5.03192), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11041/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 4.25627 (best 4.25627), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11041/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 4.07033 (best 4.07033), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11041/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.92224 (best 3.92224), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11041/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.70622 (best 3.70622), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11041/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:51:29.575729 Saving...
2024-08-15 12:51:33.143154 62 / 83 251776
2024-08-15 12:51:33.344922 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3568.26it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.81569 (best 2.81569), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11043/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.14286 (best 2.14286), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11043/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.94743 (best 1.94743), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11043/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.78472 (best 1.78472), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11043/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.68882 (best 1.68882), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11043/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:53:01.927386 63 / 83 257539
2024-08-15 12:53:02.196579 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4374.72it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.31192 (best 4.31192), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11045/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.57641 (best 3.57641), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11045/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.41498 (best 3.41498), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11045/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.31356 (best 3.31356), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11045/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.22017 (best 3.22017), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11045/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:54:30.445263 Saving...
2024-08-15 12:54:33.118915 64 / 83 260691
2024-08-15 12:54:33.275145 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3070.24it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 6.65517 (best 6.65517), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11047/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 5.94502 (best 5.94502), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11047/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 5.55985 (best 5.55985), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11047/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 5.46687 (best 5.46687), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11047/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' was not in top 1
Epoch 5, global step 300: 'train_loss' reached 5.35005 (best 5.35005), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs

2024-08-15 12:56:04.436400 65 / 83 275363
2024-08-15 12:56:04.595450 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4094.03it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 1.27210 (best 1.27210), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11049/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 0.82373 (best 0.82373), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11049/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.69001 (best 0.69001), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11049/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.54092 (best 0.54092), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11049/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 0.40178 (best 0.40178), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11049/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:57:33.858207 Saving...
2024-08-15 12:57:35.566907 66 / 83 285392
2024-08-15 12:57:35.860476 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 5217.59it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.76644 (best 2.76644), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11050/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.94214 (best 1.94214), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11050/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.80547 (best 1.80547), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11050/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.79702 (best 1.79702), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11050/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.61903 (best 1.61903), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11050/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 12:59:04.398545 67 / 83 289885
2024-08-15 12:59:04.552807 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 9119.49it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.22547 (best 4.22547), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11052/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.68332 (best 3.68332), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11052/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.38489 (best 3.38489), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11052/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.24276 (best 3.24276), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11052/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.05713 (best 3.05713), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11052/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:00:32.721022 Saving...
2024-08-15 13:00:34.394577 68 / 83 293104
2024-08-15 13:00:34.633179 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8407.68it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.05014 (best 3.05014), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11054/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.43021 (best 2.43021), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11054/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.23174 (best 2.23174), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11054/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.10691 (best 2.10691), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11054/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.02948 (best 2.02948), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11054/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:02:07.029799 69 / 83 297206
2024-08-15 13:02:07.287315 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8444.18it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.66945 (best 2.66945), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11056/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.13301 (best 2.13301), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11056/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.96488 (best 1.96488), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11056/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.80646 (best 1.80646), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11056/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.67605 (best 1.67605), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11056/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:03:36.443507 Saving...
2024-08-15 13:03:40.068421 70 / 83 298192
2024-08-15 13:03:40.234562 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8790.62it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 1.69302 (best 1.69302), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11058/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.05874 (best 1.05874), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11058/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.92157 (best 0.92157), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11058/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.82946 (best 0.82946), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11058/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 0.65787 (best 0.65787), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11058/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:05:07.792909 71 / 83 299856
2024-08-15 13:05:07.968592 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3615.05it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.38292 (best 2.38292), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11060/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.56640 (best 1.56640), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11060/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.44574 (best 1.44574), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11060/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.30152 (best 1.30152), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11060/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.09287 (best 1.09287), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11060/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:06:38.905252 Saving...
2024-08-15 13:06:42.031005 72 / 83 301315
2024-08-15 13:06:42.386305 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8353.14it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.35850 (best 4.35850), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11062/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.62943 (best 3.62943), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11062/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.53103 (best 3.53103), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11062/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.43159 (best 3.43159), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11062/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.30182 (best 3.30182), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11062/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:08:11.828327 73 / 83 307568
2024-08-15 13:08:12.035281 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 6597.08it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.52490 (best 2.52490), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11064/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.89560 (best 1.89560), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11064/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.63938 (best 1.63938), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11064/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' was not in top 1
Epoch 4, global step 250: 'train_loss' reached 1.38120 (best 1.38120), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11064/checkpoints/epoch=4-step=250.ckpt' as top 1
Epoch 5, global step 300: 'train_loss' reached 1.28818 (best 1.28818), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs

2024-08-15 13:09:43.138070 Saving...
2024-08-15 13:09:46.696932 74 / 83 307597
2024-08-15 13:09:46.871626 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 7340.30it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.07877 (best 4.07877), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11066/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.42969 (best 3.42969), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11066/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.26733 (best 3.26733), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11066/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.14150 (best 3.14150), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11066/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.97093 (best 2.97093), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11066/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:11:18.064743 75 / 83 312160
2024-08-15 13:11:18.230528 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3729.29it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 5.62317 (best 5.62317), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11068/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 4.63749 (best 4.63749), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11068/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 4.35710 (best 4.35710), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11068/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 4.19125 (best 4.19125), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11068/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' was not in top 1
Epoch 5, global step 300: 'train_loss' reached 3.97664 (best 3.97664), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs

2024-08-15 13:12:46.578398 Saving...
2024-08-15 13:12:49.819740 76 / 83 313846
2024-08-15 13:12:50.354388 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8399.40it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 4.49882 (best 4.49882), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11070/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 3.91617 (best 3.91617), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11070/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 3.78093 (best 3.78093), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11070/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 3.58072 (best 3.58072), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11070/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 3.43172 (best 3.43172), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11070/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:14:24.742933 77 / 83 337536
2024-08-15 13:14:24.915546 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 3175.16it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.29222 (best 2.29222), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11072/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.74486 (best 1.74486), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11072/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.59491 (best 1.59491), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11072/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.47525 (best 1.47525), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11072/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.32704 (best 1.32704), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11072/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:15:54.311695 Saving...
2024-08-15 13:15:58.026784 78 / 83 337933
2024-08-15 13:15:58.350233 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 6890.93it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.66228 (best 2.66228), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11074/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.86827 (best 1.86827), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11074/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 1.72349 (best 1.72349), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11074/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 1.66172 (best 1.66172), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11074/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 1.57188 (best 1.57188), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11074/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:17:31.675809 79 / 83 338160
2024-08-15 13:17:32.239787 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8220.88it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 1.56194 (best 1.56194), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11076/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 1.12420 (best 1.12420), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11076/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 0.87304 (best 0.87304), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11076/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 0.68048 (best 0.68048), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11076/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 0.46601 (best 0.46601), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11076/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:19:02.247759 Saving...
2024-08-15 13:19:06.549825 80 / 83 343232
2024-08-15 13:19:06.814576 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 8614.41it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.68567 (best 3.68567), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11078/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.88281 (best 2.88281), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11078/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.68169 (best 2.68169), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11078/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.54478 (best 2.54478), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11078/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.50792 (best 2.50792), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11078/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:20:34.028196 81 / 83 345283
2024-08-15 13:20:34.207736 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 6485.94it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 2.93140 (best 2.93140), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11080/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.22748 (best 2.22748), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11080/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.18219 (best 2.18219), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11080/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.13416 (best 2.13416), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11080/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.00265 (best 2.00265), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11080/checkpoints/epoch=4-step=250.ckpt' as top

2024-08-15 13:22:03.374074 Saving...
2024-08-15 13:22:07.405247 82 / 83 348278
2024-08-15 13:22:07.747006 #items 357.0 split 178.5


Running evaluation: 179it [00:00, 4740.07it/s]
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [1]

  | Name          | Type               | Params
-----------------------------------------------------
0 | model         | LagLlamaModel      | 2.4 M 
1 | augmentations | ApplyAugmentations | 0     
-----------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.797     Total estimated model params size (MB)


Training: |                                                                                                 | …

Epoch 0, global step 50: 'train_loss' reached 3.83763 (best 3.83763), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11082/checkpoints/epoch=0-step=50.ckpt' as top 1
Epoch 1, global step 100: 'train_loss' reached 2.84873 (best 2.84873), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11082/checkpoints/epoch=1-step=100.ckpt' as top 1
Epoch 2, global step 150: 'train_loss' reached 2.66314 (best 2.66314), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11082/checkpoints/epoch=2-step=150.ckpt' as top 1
Epoch 3, global step 200: 'train_loss' reached 2.66217 (best 2.66217), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11082/checkpoints/epoch=3-step=200.ckpt' as top 1
Epoch 4, global step 250: 'train_loss' reached 2.45759 (best 2.45759), saving model to '/home/user/energygpt/lagllama/lag-llama/lightning_logs/version_11082/checkpoints/epoch=4-step=250.ckpt' as top




In [12]:
assert(False)

AssertionError: 