In [1]:
## Dataset

In [1]:
import os
import datetime
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset
!pip install -U scikit-learn
from sklearn.preprocessing import StandardScaler

import warnings

warnings.filterwarnings('ignore')

Collecting scikit-learn
  Downloading scikit_learn-1.4.1.post1-cp39-cp39-macosx_12_0_arm64.whl.metadata (11 kB)
Downloading scikit_learn-1.4.1.post1-cp39-cp39-macosx_12_0_arm64.whl (10.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.5/10.5 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.4.0
    Uninstalling scikit-learn-1.4.0:
      Successfully uninstalled scikit-learn-1.4.0
Successfully installed scikit-learn-1.4.1.post1


In [2]:



class EarlyStopping:
    def __init__(self, verbose=False, delta=0):
        self.patience = 3
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.use_multi_gpu = False

    def __call__(self, val_loss, model, path):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            if self.verbose:
                if (self.use_multi_gpu and self.local_rank == 0) or not self.use_multi_gpu:
                    print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).')
            self.val_loss_min = val_loss
            if self.use_multi_gpu:
                if self.local_rank == 0:
                    self.save_checkpoint(val_loss, model, path)
                dist.barrier()
            else:
                self.save_checkpoint(val_loss, model, path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if (self.use_multi_gpu and self.local_rank == 0) or not self.use_multi_gpu:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            if self.use_multi_gpu:
                if self.local_rank == 0:
                    self.save_checkpoint(val_loss, model, path)
                dist.barrier()
            else:
                self.save_checkpoint(val_loss, model, path)
            if self.verbose:
                if (self.use_multi_gpu and self.local_rank == 0) or not self.use_multi_gpu:
                    print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).')
            self.val_loss_min = val_loss
            self.counter = 0


    def save_checkpoint(self, val_loss, model, path):
        param_grad_dic = {
        k: v.requires_grad for (k, v) in model.named_parameters()}
        state_dict = model.state_dict()
        for k in list(state_dict.keys()):
            if k in param_grad_dic.keys() and not param_grad_dic[k]:
                # delete parameters that do not require gradient
                del state_dict[k]
        torch.save(state_dict, path + '/' + f'checkpoint.pth')


In [3]:
class Dataset_ETT_hour(Dataset):
    def __init__(self, root_path, flag='train', size=None, data_path='ETTh1.csv',
                 scale=True, seasonal_patterns=None, drop_short=False):
        self.seq_len = size[0]
        self.label_len = size[1]
        self.pred_len = size[2]
        self.token_len = self.seq_len - self.label_len
        self.token_num = self.seq_len // self.token_len
        self.flag = flag
        # init
        assert flag in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[flag]

        self.scale = scale

        self.root_path = root_path
        self.data_path = data_path
        self.read_data()
        self.enc_in = self.data_x.shape[-1]
        self.tot_len = len(self.data_x) - self.seq_len - self.pred_len + 1

    def read_data(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(os.path.join(self.root_path,
                                          self.data_path))

        border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
        border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]

        cols_data = df_raw.columns[1:]
        df_data = df_raw[cols_data]

        if self.scale:
            train_data = df_data[border1s[0]:border2s[0]]
            self.scaler.fit(train_data.values)
            data = self.scaler.transform(df_data.values)
        else:
            data = df_data.values

        data_name = self.data_path.split('.')[0]
        self.data_stamp = torch.load(os.path.join(self.root_path, f'{data_name}.pt'))
        self.data_stamp = self.data_stamp[border1:border2]
        self.data_x = data[border1:border2]
        self.data_y = data[border1:border2]

    def __getitem__(self, index):
        feat_id = index // self.tot_len
        s_begin = index % self.tot_len
        
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len
        seq_x = self.data_x[s_begin:s_end, feat_id:feat_id+1]
        seq_y = self.data_y[r_begin:r_end, feat_id:feat_id+1]
        seq_x_mark = self.data_stamp[s_begin:s_end:self.token_len]
        seq_y_mark = self.data_stamp[s_end:r_end:self.token_len]

        return seq_x, seq_y, seq_x_mark, seq_y_mark

    def __len__(self):
        return (len(self.data_x) - self.seq_len - self.pred_len + 1) * self.enc_in

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)

In [4]:
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler

In [5]:
class Dataset_Preprocess(Dataset):
    def __init__(self, root_path, flag='train', size=None,
                 data_path='ETTh1.csv', scale=True, seasonal_patterns=None):
        self.seq_len = size[0]
        self.label_len = size[1]
        self.pred_len = size[2]
        self.token_len = self.seq_len - self.label_len
        self.token_num = self.seq_len // self.token_len
        self.flag = flag
        self.data_set_type = data_path.split('.')[0]
        # init
        assert flag in ['train', 'test', 'val']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[flag]

        self.scale = scale

        self.root_path = root_path
        self.data_path = data_path
        self.__read_data__()
        self.tot_len = len(self.data_stamp)

    def __read_data__(self):
        df_raw = pd.read_csv(os.path.join(self.root_path, self.data_path), nrows=5000)
        df_stamp = df_raw[['date']]
        df_stamp['date'] = pd.to_datetime(df_stamp.date).apply(str)
        self.data_stamp = df_stamp['date'].values
        self.data_stamp = [str(x) for x in self.data_stamp]
        

    def __getitem__(self, index):
        s_begin = index % self.tot_len
        s_end = s_begin + self.token_len
        start = datetime.datetime.strptime(self.data_stamp[s_begin], "%Y-%m-%d %H:%M:%S")
        if self.data_set_type in ['traffic', 'electricity', 'ETTh1', 'ETTh2']:
            end = (start + datetime.timedelta(hours=self.token_len-1)).strftime("%Y-%m-%d %H:%M:%S")
        elif self.data_set_type == 'weather':
            end = (start + datetime.timedelta(minutes=10*(self.token_len-1))).strftime("%Y-%m-%d %H:%M:%S")
        elif self.data_set_type in ['ETTm1', 'ETTm2']:
            end = (start + datetime.timedelta(minutes=15*(self.token_len-1))).strftime("%Y-%m-%d %H:%M:%S")
        seq_x_mark = f"This is Time Series from {self.data_stamp[s_begin]} to {end}"
        return seq_x_mark

    def __len__(self):
        return len(self.data_stamp)

In [6]:
data_set = Dataset_Preprocess(
            root_path='./',
            data_path='ETTm1.csv',
            size=[672, 576, 96])

In [7]:
data_loader = DataLoader(
        data_set,
        batch_size=4,
        shuffle=False,
    )

In [8]:
import torch
import torch.nn as nn
!pip install transformers
from transformers import (
    LlamaForCausalLM,
    LlamaTokenizer,
)

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.device = 'cuda:0'
        print(self.device)
        
        self.llama = LlamaForCausalLM.from_pretrained(
            'meta-llama/Llama-2-7b-chat-hf',
            device_map= self.device,
            torch_dtype=torch.float16,
        )
        self.llama_tokenizer = LlamaTokenizer.from_pretrained('meta-llama/Llama-2-7b-chat-hf')
        self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token
        self.vocab_size = self.llama_tokenizer.vocab_size
        self.hidden_dim_of_llama = 4096
        
        for name, param in self.llama.named_parameters():
            param.requires_grad = False

    def tokenizer(self, x):
        output = self.llama_tokenizer(x, return_tensors="pt")['input_ids'].to(self.device)
        result = self.llama.get_input_embeddings()(output)
        return result   
    
    def forecast(self, x_mark_enc):        
        # x_mark_enc: [bs x T x hidden_dim_of_llama]
        x_mark_enc = torch.cat([self.tokenizer(x_mark_enc[i]) for i in range(len(x_mark_enc))], 0)
        text_outputs = self.llama.model(inputs_embeds=x_mark_enc)[0]
        text_outputs = text_outputs[:, -1, :]
        return text_outputs
    
    def forward(self, x_mark_enc):
        return self.forecast(x_mark_enc)



In [9]:
!pip install accelerate
model = Model()
model.to('cuda')
print(model)

cuda:0


OSError: You are trying to access a gated repo.
Make sure to request access at https://huggingface.co/meta-llama/Llama-2-7b-chat-hf and pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`.

In [10]:
from tqdm.notebook import tqdm

In [11]:

print(len(data_set.data_stamp))
print(data_set.tot_len)
save_dir_path = './'
output_list = []
for idx, data in tqdm(enumerate(data_loader)):
    output = model(data)
    output_list.append(output.detach().cpu())
result = torch.cat(output_list, dim=0)
print(result.shape)
torch.save(result, save_dir_path + f'ETTm1.pt')

5000
5000


0it [00:00, ?it/s]

NameError: name 'model' is not defined

In [None]:
import torch.nn.utils.rnn as rnn_utils

def collate_fn(batch):
    # Sort the batch by sequence length
    batch = sorted(batch, key=lambda x: x[0].shape[0], reverse=True)
    # Extract sequences, targets, and marks
    sequences, targets, seq_marks, target_marks = zip(*batch)
    # Convert NumPy arrays to tensors
    sequences = [torch.tensor(seq) for seq in sequences]
    targets = [torch.tensor(target) for target in targets]
    seq_marks = [torch.tensor(mark) for mark in seq_marks]
    target_marks = [torch.tensor(mark) for mark in target_marks]
    # Pad sequences
    padded_sequences = rnn_utils.pad_sequence(sequences, batch_first=True)
    padded_targets = rnn_utils.pad_sequence(targets, batch_first=True)
    padded_seq_marks = rnn_utils.pad_sequence(seq_marks, batch_first=True)
    padded_target_marks = rnn_utils.pad_sequence(target_marks, batch_first=True)
    return padded_sequences, padded_targets, padded_seq_marks, padded_target_marks


def data_provider(flag):
    Data = Dataset_ETT_hour

    if flag == 'test':
        shuffle_flag = False
        drop_last = False
        batch_size = 1
    elif flag == 'val':
        shuffle_flag = True
        drop_last = False
        batch_size = 1
    else:
        shuffle_flag = True
        drop_last = False
        batch_size = 32

    if flag in ['train', 'val']:
        data_set = Data(
            root_path='./',
            data_path='ETTm1.csv',
            flag=flag,
            size=[672, 576, 96],
            seasonal_patterns= 'Monthly',
            drop_short=False,
        )
    else:
        data_set = Data(
            root_path='./',
            data_path='ETTm1.csv',
            flag=flag,
            size=[672, 576, 96],
            seasonal_patterns='Monthly',
            drop_short=False,
        )
    
    data_loader = DataLoader(
        data_set,
        batch_size=batch_size,
        shuffle=shuffle_flag,
        num_workers=0,
        collate_fn=collate_fn,
        drop_last=drop_last)
    return data_set, data_loader

In [None]:
train_data, train_loader = data_provider('train')
vali_data, vali_loader = data_provider(flag='val')

In [None]:
path = os.path.join('.','./Model')
if not os.path.exists(path):
    os.makedirs(path)

In [None]:
train_steps = len(train_loader)
early_stopping = EarlyStopping(verbose=True)


In [None]:
import torch
import torch.nn as nn
import time
from torch import optim

In [None]:
import torch.nn as nn

class MLP(nn.Module):
    '''
    Multilayer perceptron to encode/decode high dimension representation of sequential data
    '''
    def __init__(self, 
                 f_in, 
                 f_out, 
                 hidden_dim=256, 
                 hidden_layers=2, 
                 dropout=0.1,
                 activation='tanh'): 
        super(MLP, self).__init__()
        self.f_in = f_in
        self.f_out = f_out
        self.hidden_dim = hidden_dim
        self.hidden_layers = hidden_layers
        self.dropout = dropout
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        elif activation == 'gelu':
            self.activation = nn.GELU()
        else:
            raise NotImplementedError

        layers = [nn.Linear(self.f_in, self.hidden_dim), 
                  self.activation, nn.Dropout(self.dropout)]
        for i in range(self.hidden_layers-2):
            layers += [nn.Linear(self.hidden_dim, self.hidden_dim),
                       self.activation, nn.Dropout(dropout)]
        
        layers += [nn.Linear(hidden_dim, f_out)]
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        # x:     B x S x f_in
        # y:     B x S x f_out
        # x = x.to(torch.float16)
        y = self.layers(x)
        return y

In [None]:
# Load llama2
from torch import cuda, bfloat16
import transformers

In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.token_len = 96
        self.device = torch.device('cuda:{}'.format(0))
        print(self.device)
        # Load llama2
        bnb_config = transformers.BitsAndBytesConfig(
                    load_in_4bit=True,
                    bnb_4bit_quant_type='nf4',
                    bnb_4bit_use_double_quant=True,
                    bnb_4bit_compute_dtype=bfloat16 )


        
        self.llama = LlamaForCausalLM.from_pretrained(
            'meta-llama/Llama-2-7b-chat-hf',
            trust_remote_code=True,
            quantization_config=bnb_config,
            device_map=self.device,
        )
        self.hidden_dim_of_llama = 4096
        #self.llama.half() 
        self.mix = False
        if self.mix:
            self.add_scale = nn.Parameter(torch.ones([]))
        
        for name, param in self.llama.named_parameters():
            param.requires_grad = False

        self.encoder = MLP(self.token_len, self.hidden_dim_of_llama, 
                            256, 2, 0.1, 'tanh')
        self.decoder = MLP(self.hidden_dim_of_llama, self.token_len, 256, 2, 0.1, 'tanh') 
    
    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
        means = x_enc.mean(1, keepdim=True).detach()    
        x_enc = x_enc - means
        stdev = torch.sqrt(
            torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
        x_enc /= stdev
        
        bs, _, n_vars = x_enc.shape
        # x_enc: [bs x nvars x seq_len]
        x_enc = x_enc.permute(0, 2, 1)
        # x_enc: [bs * nvars x seq_len]
        x_enc = x_enc.reshape(x_enc.shape[0] * x_enc.shape[1], -1)
        # fold_out: [bs * n_vars x token_num x token_len]
        fold_out = x_enc.unfold(dimension=-1, size=self.token_len, step=self.token_len)
        token_num = fold_out.shape[1]
        # times_embeds: [bs * n_vars x token_num x hidden_dim_of_llama]
        times_embeds = self.encoder(fold_out)
        if self.mix:
            times_embeds = times_embeds / times_embeds.norm(dim=2, keepdim=True)
            x_mark_enc = x_mark_enc / x_mark_enc.norm(dim=2, keepdim=True)
            times_embeds = times_embeds + self.add_scale * x_mark_enc
        # outputs: [bs * n_vars x token_num x hidden_dim_of_llama]
        outputs = self.llama.model(
            inputs_embeds=times_embeds)[0]
        # dec_out: [bs * n_vars x token_num x token_len]
        dec_out = self.decoder(outputs)
        dec_out = dec_out.reshape(bs, n_vars, -1)
        # dec_out: [bs x token_num * token_len x n_vars]
        dec_out = dec_out.permute(0, 2, 1)
        
        dec_out = dec_out * \
            (stdev[:, 0, :].unsqueeze(1).repeat(1, token_num * self.token_len, 1))
        dec_out = dec_out + \
            (means[:, 0, :].unsqueeze(1).repeat(1, token_num * self.token_len, 1))
        
        return dec_out
    
    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
        # x_enc = x_enc.to(torch.float16)
        # x_mark_enc = x_mark_enc.to(torch.float16)
        # # x_dec = x_dec.to(torch.float16)
        # x_mark_dec = x_mark_dec.to(torch.float16)
        return self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)

In [None]:
device = torch.device('cuda:{}'.format(0))

In [None]:
import gc
gc.collect()

In [None]:
model = Model()
model.to(device)

In [None]:
p_list = []
for n, p in model.named_parameters():
    if not p.requires_grad:
        continue
    else:
        p_list.append(p)
        print(n, p.dtype, p.shape)
model_optim = optim.Adam([{'params': p_list}], lr=0.0001, weight_decay= 0)
print('next learning rate is {}'.format(0.0001))

In [None]:
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(model_optim, T_max=10, eta_min=1e-8)
criterion = nn.MSELoss()

In [None]:
# import os

# # Directory to save models
# save_dir = 'models'
# os.makedirs(save_dir, exist_ok=True)

# for epoch in range(1):
#     iter_count = 0
#     loss_val = torch.tensor(0., device=device)
#     count = torch.tensor(0., device=device)
#     model.train()
#     epoch_time = time.time()
    
#     # Wrap train_loader with tqdm for progress tracking
#     train_loader_iter = tqdm(train_loader, desc="loss", leave=False)
    
#     for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader_iter):
#         # gc.collect()
#         iter_count += 1
#         time_now = time.time()
#         model_optim.zero_grad()
#         batch_x = batch_x.float().to(device)
#         batch_y = batch_y.float().to(device)
#         batch_x_mark = batch_x_mark.float().to(device)
#         batch_y_mark = batch_y_mark.float().to(device)
       
#         outputs = model(batch_x, batch_x_mark, None, batch_y_mark)
#         loss = criterion(outputs, batch_y)
        
#         loss_val += loss
#         count += 1
        
#         if (i + 1) % 100 == 0:
#             speed = (time.time() - time_now) / iter_count
#             left_time = speed * ((1 - epoch) * train_steps - i)
#             print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
#             iter_count = 0
#             time_now = time.time()
            
#             # Save the model
#             save_path = os.path.join(save_dir, f'model_epoch{epoch}_iter{i}.pt')
#             torch.save(model.state_dict(), save_path)
    
#     train_loss = loss_val.item() / count.item()
#     print(train_loss)


In [None]:
test_data, test_loader = data_provider(flag='test')

In [None]:
total_samples = len(test_loader.dataset)

# Calculate the number of samples to include (50% of the total)
num_samples_to_include = total_samples // 10

# Create a SubsetRandomSampler to select the first half of the data
subset_indices = list(range(num_samples_to_include))
subset_sampler = torch.utils.data.sampler.SubsetRandomSampler(subset_indices)

# Create a new DataLoader using the SubsetRandomSampler
half_test_loader = torch.utils.data.DataLoader(test_loader.dataset, 
                                               batch_size=test_loader.batch_size,
                                               sampler=subset_sampler,
                                               num_workers=test_loader.num_workers)


In [None]:
model_path = os.path.join('models', f'model_epoch{0}_iter{99}.pt')
load_item = torch.load(model_path)
state_dict = {k.replace('module.', ''): v for k, v in load_item.items()}
model.load_state_dict(state_dict, strict=False)
model.to(device)

In [None]:
import os
from tqdm.notebook import tqdm

preds = []
trues = []
folder_path = './test_results/' + '/'
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

time_now = time.time()
test_steps = len(test_loader)
print(test_steps)
iter_count = 0
model.eval()

# Wrap test_loader with tqdm for progress tracking
test_loader_iter = tqdm(test_loader, desc="Testing", leave=False)

with torch.no_grad():
    for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(half_test_loader):
        iter_count += 1
        batch_x = batch_x.float().to(device)
        batch_y = batch_y.float().to(device)
        batch_x_mark = batch_x_mark.float().to(device)
        batch_y_mark = batch_y_mark.float().to(device)

        inference_steps = 1
        dis = 96 - inference_steps * 96
        if dis != 0:
            inference_steps += 1
        pred_y = []
        for j in range(inference_steps):
            if len(pred_y) != 0:
                batch_x = torch.cat([batch_x[:, 96:, :], pred_y[-1]], dim=1)
                tmp = batch_y_mark[:, j-1:j, :]
                batch_x_mark = torch.cat([batch_x_mark[:, 1:, :], tmp], dim=1)
            outputs = model(batch_x, batch_x_mark, None, batch_y_mark)
            pred_y.append(outputs[:, -96:, :])
        pred_y = torch.cat(pred_y, dim=1)
        if dis != 0:
            pred_y = pred_y[:, :-dis, :]
        batch_y = batch_y[:, -96:, :].to(device)
        outputs = pred_y.detach().cpu()
        batch_y = batch_y.detach().cpu()
        pred = outputs
        true = batch_y

        preds.append(pred)
        trues.append(true)
        break  
    preds = torch.cat(preds, dim=0).numpy()
    trues = torch.cat(trues, dim=0).numpy()  


In [None]:
preds, trues

In [None]:
import matplotlib.pyplot as plt

# Plot and save preds
plt.figure(figsize=(10, 6))
plt.plot(preds[0], label='Predictions', color='blue')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.title('Predicted Values')
plt.legend()
plt.grid(True)
plt.savefig('predicted_values.png')  # Save the plot as an image
plt.show()

# Plot and save trues
plt.figure(figsize=(10, 6))
plt.plot(trues[0], label='True Values', color='green')
plt.xlabel('Time Step')
plt.ylabel('Value')
plt.title('True Values')
plt.legend()
plt.grid(True)
plt.savefig('true_values.png')  # Save the plot as an image
plt.show()
