In [0]:
%pip install bitsandbytes

[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.0-py3-none-manylinux_2_24_x86_64.whl (102.2 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 102.2/102.2 MB 9.1 MB/s eta 0:00:00
Installing collected packages: bitsandbytes
Successfully installed bitsandbytes-0.43.0
[43mNote: you may need to restart the kernel using dbutils.library.restartPython() to use updated packages.[0m


In [0]:
import torch
# from accelerate import Accelerator, DeepSpeedPlugin
# from accelerate import DistributedDataParallelKwargs
from torch import optim
from torch.optim import lr_scheduler

from data_provider.m4 import M4Meta
from models import Autoformer, DLinear, TimeLLM

from data_provider.data_factory import data_provider
import time
import numpy as np
import pandas

from utils.losses import smape_loss
from utils.m4_summary import M4Summary
import os
from transformers import AutoTokenizer, AutoModelForCausalLM

# tokenizer = AutoTokenizer.from_pretrained("luodian/llama-7b-hf")
# model = AutoModelForCausalLM.from_pretrained("luodian/llama-7b-hf")

os.environ['CURL_CA_BUNDLE'] = ''
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:64"

from utils.tools import del_files, EarlyStopping, adjust_learning_rate, load_content, test

class Args:
    def __init__(self):
        self.task_name= 'short_term_forecast'
        self.is_training= 1
        self.model_id= 'm4_Weekly'
        self.model_comment= 'TimeLLM-M4'
        self.model= 'TimeLLM'
        self.seed= 0
        self.data= 'm4'
        self.root_path= './dataset/m4'
        self.data_path= 'm4.csv'
        self.features= 'M'
        self.target= 'OT'
        self.loader= 'modal'
        self.freq= 'h'
        self.checkpoints= './checkpoints/'
        self.seq_len= 96
        self.label_len= 48
        self.pred_len= 96
        self.seasonal_patterns= 'Weekly'
        self.enc_in= 1
        self.dec_in= 1
        self.c_out= 1
        self.d_model= 8
        self.n_heads= 8
        self.e_layers= 2
        self.d_layers= 1
        self.d_ff= 32
        self.moving_avg= 25
        self.factor= 1
        self.dropout= 0.1
        self.embed= 'timeF'
        self.activation= 'gelu'
        self.output_attention= True
        self.patch_len= 1
        self.stride= 1
        self.prompt_domain= 0
        self.num_workers= 10
        self.itr= 1
        self.train_epochs= 50
        self.align_epochs= 10
        self.batch_size= 32
        self.eval_batch_size= 8
        self.patience= 20
        self.learning_rate= 0.001
        self.des= 'test'
        self.loss= 'SMAPE'
        self.lradj= 'type1'
        self.pct_start= 0.2
        self.use_amp= False
        self.llm_layers= 32
        self.percent= 10

args = Args()



[2024-03-08 09:56:20,889] [INFO] [real_accelerator.py:191:get_accelerator] Setting ds_accelerator to cuda (auto detect)


2024-03-08 09:56:29.647744: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  warn("The installed version of bitsandbytes was compiled without GPU support. "


/local_disk0/.ephemeral_nfs/envs/pythonEnv-68e38c56-5d3d-43ab-9e2c-fc6ca917bc26/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32


In [0]:
from accelerate import Accelerator, DeepSpeedPlugin
from accelerate import DistributedDataParallelKwargs
ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
deepspeed_plugin = DeepSpeedPlugin(hf_ds_config='./ds_config_zero2.json')

In [0]:
accelerator = Accelerator(kwargs_handlers=[ddp_kwargs], deepspeed_plugin=deepspeed_plugin)

In [0]:
for ii in range(args.itr):
    # setting record of experiments
    setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_{}_{}'.format(
        args.task_name,
        args.model_id,
        args.model,
        args.data,
        args.features,
        args.seq_len,
        args.label_len,
        args.pred_len,
        args.d_model,
        args.n_heads,
        args.e_layers,
        args.d_layers,
        args.d_ff,
        args.factor,
        args.embed,
        args.des, ii)

    if args.data == 'm4':
        args.pred_len = M4Meta.horizons_map[args.seasonal_patterns]  # Up to M4 config
        args.seq_len = 2 * args.pred_len
        args.label_len = args.pred_len
        args.frequency_map = M4Meta.frequency_map[args.seasonal_patterns]

    train_data, train_loader = data_provider(args, 'train')
    vali_data, vali_loader = data_provider(args, 'val')
    test_data, test_loader = data_provider(args, 'test')

    # if args.model == 'Autoformer':
    #     model = Autoformer.Model(args).float()
    # elif args.model == 'DLinear':
    #     model = DLinear.Model(args).float()
    # else:
    model = TimeLLM.Model(args).float()

    path = os.path.join(args.checkpoints,
                        setting + '-' + args.model_comment)  # unique checkpoint saving path
    args.content = load_content(args)
    if not os.path.exists(path) and accelerator.is_local_main_process:
        os.makedirs(path)

    time_now = time.time()

    train_steps = len(train_loader)
    early_stopping = EarlyStopping(accelerator=accelerator, patience=args.patience, verbose=True)

    model_optim = optim.Adam(model.parameters(), lr=args.learning_rate)

    if args.lradj == 'COS':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=20, eta_min=1e-8)
    else:
        scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
                                            steps_per_epoch=train_steps,
                                            pct_start=args.pct_start,
                                            epochs=args.train_epochs,
                                            max_lr=args.learning_rate)

    criterion = smape_loss()

    train_loader, vali_loader, model, model_optim, scheduler = accelerator.prepare(
        train_loader, vali_loader, model, model_optim, scheduler)

    for epoch in range(args.train_epochs):
        iter_count = 0
        train_loss = []

        model.train()
        epoch_time = time.time()

        for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
            iter_count += 1
            model_optim.zero_grad()
            batch_x = batch_x.float().to(accelerator.device)

            batch_y = batch_y.float().to(accelerator.device)
            batch_y_mark = batch_y_mark.float().to(accelerator.device)

            # decoder input
            dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float().to(accelerator.device)
            dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(
                accelerator.device)

            outputs = model(batch_x, None, dec_inp, None)

            f_dim = -1 if args.features == 'MS' else 0
            outputs = outputs[:, -args.pred_len:, f_dim:]
            batch_y = batch_y[:, -args.pred_len:, f_dim:]

            batch_y_mark = batch_y_mark[:, -args.pred_len:, f_dim:]
            loss = criterion(batch_x, args.frequency_map, outputs, batch_y, batch_y_mark)

            train_loss.append(loss.item())

            if (i + 1) % 100 == 0:
                accelerator.print(
                    "\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())
                )
                speed = (time.time() - time_now) / iter_count
                left_time = speed * ((args.train_epochs - epoch) * train_steps - i)
                accelerator.print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
                iter_count = 0
                time_now = time.time()

            accelerator.backward(loss)
            model_optim.step()

            if args.lradj == 'TST':
                adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=False)
                scheduler.step()

        accelerator.print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
        train_loss = np.average(train_loss)
        vali_loss = test(args, accelerator, model, train_loader, vali_loader, criterion)
        test_loss = vali_loss
        accelerator.print(
            "Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
                epoch + 1, train_steps, train_loss, vali_loss, test_loss))
        early_stopping(vali_loss, model, path)  # model saving
        if early_stopping.early_stop:
            accelerator.print("Early stopping")
            break

        if args.lradj != 'TST':
            adjust_learning_rate(accelerator, model_optim, scheduler, epoch + 1, args, printout=True)
        else:
            accelerator.print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))

    best_model_path = path + '/' + 'checkpoint'
    accelerator.wait_for_everyone()
    unwrapped_model = accelerator.unwrap_model(model)
    torch.cuda.synchronize()
    torch.cuda.empty_cache()
    unwrapped_model.load_state_dict(torch.load(best_model_path, map_location=lambda storage, loc: storage))

    x, _ = train_loader.dataset.last_insample_window()
    y = test_loader.dataset.timeseries
    x = torch.tensor(x, dtype=torch.float32).to(accelerator.device)
    x = x.unsqueeze(-1)

    model.eval()

    with torch.no_grad():
        B, _, C = x.shape
        dec_inp = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device)
        dec_inp = torch.cat([x[:, -args.label_len:, :], dec_inp], dim=1)
        outputs = torch.zeros((B, args.pred_len, C)).float().to(accelerator.device)
        id_list = np.arange(0, B, args.eval_batch_size)
        id_list = np.append(id_list, B)
        for i in range(len(id_list) - 1):
            outputs[id_list[i]:id_list[i + 1], :, :] = model(
                x[id_list[i]:id_list[i + 1]],
                None,
                dec_inp[id_list[i]:id_list[i + 1]],
                None
            )
        accelerator.wait_for_everyone()
        f_dim = -1 if args.features == 'MS' else 0
        outputs = outputs[:, -args.pred_len:, f_dim:]
        outputs = outputs.detach().cpu().numpy()

        preds = outputs
        trues = y
        x = x.detach().cpu().numpy()

    accelerator.print('test shape:', preds.shape)

    folder_path = './m4_results/' + args.model + '-' + args.model_comment + '/'
    if not os.path.exists(folder_path) and accelerator.is_local_main_process:
        os.makedirs(folder_path)

    if accelerator.is_local_main_process:
        forecasts_df = pandas.DataFrame(preds[:, :, 0], columns=[f'V{i + 1}' for i in range(args.pred_len)])
        forecasts_df.index = test_loader.dataset.ids[:preds.shape[0]]
        forecasts_df.index.name = 'id'
        forecasts_df.set_index(forecasts_df.columns[0], inplace=True)
        forecasts_df.to_csv(folder_path + args.seasonal_patterns + '_forecast.csv')

        # calculate metrics
        accelerator.print(args.model)
        file_path = folder_path
        if 'Weekly_forecast.csv' in os.listdir(file_path) :
                # and 'Monthly_forecast.csv' in os.listdir(file_path) \
                # and 'Yearly_forecast.csv' in os.listdir(file_path) \
                # and 'Daily_forecast.csv' in os.listdir(file_path) \
                # and 'Hourly_forecast.csv' in os.listdir(file_path) \
                # and 'Quarterly_forecast.csv' in os.listdir(file_path):
            m4_summary = M4Summary(file_path, args.root_path)
            # m4_forecast.set_index(m4_winner_forecast.columns[0], inplace=True)
            smape_results, owa_results, mape, mase = m4_summary.evaluate()
            accelerator.print('smape:', smape_results)
            accelerator.print('mape:', mape)
            accelerator.print('mase:', mase)
            accelerator.print('owa:', owa_results)
        else:
            accelerator.print('After all 6 tasks are finished, you can calculate the averaged performance')

accelerator.wait_for_everyone()
if accelerator.is_local_main_process:
    path = './checkpoints'  # unique checkpoint saving path
    del_files(path)  # delete checkpoint files
    accelerator.print('success delete checkpoints')


local variable 'model' referenced before assignment


[0;31m---------------------------------------------------------------------------[0m
[0;31mRuntimeError[0m                              Traceback (most recent call last)
File [0;32m<command-1345708696421536>, line 36[0m
[1;32m     29[0m test_data, test_loader [38;5;241m=[39m data_provider(args, [38;5;124m'[39m[38;5;124mtest[39m[38;5;124m'[39m)
[1;32m     31[0m [38;5;66;03m# if args.model == 'Autoformer':[39;00m
[1;32m     32[0m [38;5;66;03m#     model = Autoformer.Model(args).float()[39;00m
[1;32m     33[0m [38;5;66;03m# elif args.model == 'DLinear':[39;00m
[1;32m     34[0m [38;5;66;03m#     model = DLinear.Model(args).float()[39;00m
[1;32m     35[0m [38;5;66;03m# else:[39;00m
[0;32m---> 36[0m model [38;5;241m=[39m TimeLLM[38;5;241m.[39mModel(args)[38;5;241m.[39mfloat()
[1;32m     38[0m path [38;5;241m=[39m os[38;5;241m.[39mpath[38;5;241m.[39mjoin(args[38;5;241m.[39mcheckpoints,
[1;32m     39[0m                     setting [38;5;