In [1]:
import os
import sys
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
from torch import fft 
from Embed import DataEmbedding
from Conv_Blocks import Inception_Block_V1
from sklearn.preprocessing import StandardScaler
from timesfeatures import time_features
from torch.utils.data import DataLoader, Dataset
from torch.optim import Adam
import time 
from timesNet import Model as timesNetModel
from argparse import Namespace

# 1. loading data 
- long term forcast
- data: [ETT-small](https://github.com/zhouhaoyi/ETDataset/tree/main)

In [2]:
data_dir = "/home/scc/Downloads/Datas/ETDataset/ETT-small"
data_file = f'{data_dir}{os.sep}ETTh1.csv'
df = pd.read_csv(data_file)
print(df.shape)
df.head()

(17420, 8)


Unnamed: 0,date,HUFL,HULL,MUFL,MULL,LUFL,LULL,OT
0,2016-07-01 00:00:00,5.827,2.009,1.599,0.462,4.203,1.34,30.531
1,2016-07-01 01:00:00,5.693,2.076,1.492,0.426,4.142,1.371,27.787001
2,2016-07-01 02:00:00,5.157,1.741,1.279,0.355,3.777,1.218,27.787001
3,2016-07-01 03:00:00,5.09,1.942,1.279,0.391,3.807,1.279,25.044001
4,2016-07-01 04:00:00,5.358,1.942,1.492,0.462,3.868,1.279,21.948


In [3]:
base_ = 30 * 24
seq_len = 96
border1s = [0, 12 * base_ - seq_len, (12 + 4) * base_ - seq_len]
border2s = [12 * base_, (12 + 4) * base_, (12 + 8) * base_]
list(zip(border1s, border2s))

[(0, 8640), (8544, 11520), (11424, 14400)]

## dataSet & dataLoader

- dataset: [Time-Series-Library:data_provider/data_loader.py:Dataset_ETT_hour](https://github.com/thuml/Time-Series-Library/blob/main/data_provider/data_loader.py)
- dataLoader: [Time-Series-Library:data_provider/data_factory.py:data_provider](https://github.com/thuml/Time-Series-Library/blob/main/data_provider/data_factory.py)

In [4]:
class Dataset_ETT_hour(Dataset):
    def __init__(self, root_path, flag='train', size=[24 * 4 * 4, 24 * 4, 24 *4], features='S', data_path='ETTh1.csv',
                 target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
        # size [seq_len, label_len, pred_len]
        self.seq_len = size[0]
        self.label_len = size[1]
        self.pred_len = size[2]
        assert flag in ['train', 'val', 'test']
        type_map = {'train': 0, 'val': 1, 'test': 2}
        self.set_type = type_map[flag]
        
        self.features = features
        self.target = target 
        self.scale = scale
        self.timeenc = timeenc
        self.freq = freq 
        
        self.root_path = root_path
        self.data_path = data_path
        self.__read_data__()
        
    def __read_data__(self):
        self.scaler = StandardScaler()
        df_raw = pd.read_csv(f'{self.root_path}{os.sep}{self.data_path}')
        base_ = 30 * 24
        border1s = [0, 12 * base_ - self.seq_len, (12 + 4) * base_ - self.seq_len]
        border2s = [12 * base_, (12 + 4) * base_, (12 + 8) * base_]
        border1 = border1s[self.set_type]
        border2 = border2s[self.set_type]
        print(self.set_type, f"{border1} -> {border2}")
        if self.features in ['M', 'MS']: 
            cols_data = df_raw.columns[1:]
            df_data = df_raw[cols_data]
        elif self.features == 'S':
            df_data = df_raw[[self.target]]

        if self.scale:
            train_data = df_data[border1s[0]:border2s[0]]
            self.scaler.fit(train_data.values)
            data = self.scaler.transform(df_data.values)
        else:
            data = df_data.values
        
        print(f"df_raw.shape={df_raw.shape} data.shape={data.shape}")
        df_stamp = df_raw[['date']][border1:border2]
        df_stamp['date'] = pd.to_datetime(df_stamp.date)
        if self.timeenc == 0:
            df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
            df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
            df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
            df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
            data_stamp = df_stamp.drop(['date'], 1).values
        if self.timeenc == 1: # encoded as value between [-0.5, 0.5]
            data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
            data_stamp = data_stamp.transpose(1, 0)
        
        
        self.data_x = data[border1:border2]
        self.data_y = data[border1:border2]
        print("data.shape=", data.shape, "\nself.data_x.shape=", self.data_x.shape)
        self.data_stamp = data_stamp
    
    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_end - self.label_len
        r_end = r_begin + self.label_len + self.pred_len
        
        seq_x = self.data_x[s_begin:s_end]
        seq_y = self.data_y[r_begin:r_end]
        seq_x_mark = self.data_stamp[s_begin:s_end]
        seq_y_mark = self.data_stamp[r_begin:r_end]
        return seq_x, seq_y, seq_x_mark, seq_y_mark
    
    def __len__(self):
        return len(self.data_x) - self.seq_len - self.pred_len + 1

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)

In [5]:
def simple_data_provider(args, flag):
    Data = Dataset_ETT_hour
    timeenc = 0 if args.embed != 'timeF' else 1
    drop_last = True
    shuffle_flag = True
    freq = args.freq
    if flag == 'test':
        shuffle_flag = False
        batch_size = 1
    else:
        batch_size = args.batch_size

    data_set = Data(
        root_path=args.root_path,
        data_path=args.data_path,
        flag=flag,
        size=[args.seq_len, args.label_len, args.pred_len],
        features=args.features,
        target=args.target,
        timeenc=timeenc,
        freq=freq,
        seasonal_patterns=args.seasonal_patterns
    )
    print(flag, len(data_set))
    data_loader = DataLoader(
        data_set,
        batch_size=batch_size,
        shuffle=shuffle_flag,
        num_workers=args.num_workers,
        drop_last=drop_last)
    return data_set, data_loader

# train

In [6]:


class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta

    def __call__(self, val_loss, model, path):
        score = -val_loss
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model, path)
            self.counter = 0

    def save_checkpoint(self, val_loss, model, path):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
        self.val_loss_min = val_loss


def vali(args, model, device, vali_data, vali_loader, criterion):
    total_loss = []
    model.eval()
    with torch.no_grad():
        for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float()

            batch_x_mark = batch_x_mark.float().to(device)
            batch_y_mark = batch_y_mark.float().to(device)

            # decoder input
            dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float()
            dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(device)
            # encoder - decoder
            if args.use_amp:
                with torch.cuda.amp.autocast():
                    if args.output_attention:
                        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                    else:
                        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
            else:
                if args.output_attention:
                    outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                else:
                    outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
            f_dim = -1 if args.features == 'MS' else 0
            outputs = outputs[:, -args.pred_len:, f_dim:]
            batch_y = batch_y[:, -args.pred_len:, f_dim:].to(device)

            pred = outputs.detach().cpu()
            true = batch_y.detach().cpu()

            loss = criterion(pred, true)

            total_loss.append(loss)
    total_loss = np.average(total_loss)
    model.train()
    return total_loss


def adjust_learning_rate(optimizer, epoch, args):
    # lr = args.learning_rate * (0.2 ** (epoch // 2))
    if args.lradj == 'type1':
        lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
    elif args.lradj == 'type2':
        lr_adjust = {
            2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
            10: 5e-7, 15: 1e-7, 20: 5e-8
        }
    if epoch in lr_adjust.keys():
        lr = lr_adjust[epoch]
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
        print('Updating learning rate to {}'.format(lr))


def train(model, args, setting):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = model.to(device)
    train_data, train_loader = simple_data_provider(args, flag='train')
    vali_data, vali_loader = simple_data_provider(args, flag='val')
    test_data, test_loader = simple_data_provider(args, flag='test') 

    path = os.path.join(args.checkpoints, setting)
    if not os.path.exists(path):
        os.makedirs(path)

    time_now = time.time()

    train_steps = len(train_loader)
    early_stopping = EarlyStopping(patience=args.patience, verbose=True)

    model_optim = Adam(model.parameters(), lr=args.learning_rate)
    criterion = nn.MSELoss()

    if args.use_amp:
        scaler = torch.cuda.amp.GradScaler()

    for epoch in range(args.train_epochs):
        iter_count = 0
        train_loss = []

        model.train()
        epoch_time = time.time()
        for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
            iter_count += 1
            model_optim.zero_grad()
            batch_x = batch_x.float().to(device)

            batch_y = batch_y.float().to(device)
            batch_x_mark = batch_x_mark.float().to(device)
            batch_y_mark = batch_y_mark.float().to(device)

            # decoder input
            dec_inp = torch.zeros_like(batch_y[:, -args.pred_len:, :]).float()
            dec_inp = torch.cat([batch_y[:, :args.label_len, :], dec_inp], dim=1).float().to(device)

            # encoder - decoder
            if args.use_amp:
                with torch.cuda.amp.autocast():
                    if args.output_attention:
                        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                    else:
                        outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

                    f_dim = -1 if args.features == 'MS' else 0
                    outputs = outputs[:, -args.pred_len:, f_dim:]
                    batch_y = batch_y[:, -args.pred_len:, f_dim:].to(device)
                    loss = criterion(outputs, batch_y)
                    train_loss.append(loss.item())
            else:
                if args.output_attention:
                    outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                else:
                    outputs = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

                f_dim = -1 if args.features == 'MS' else 0
                outputs = outputs[:, -args.pred_len:, f_dim:]
                batch_y = batch_y[:, -args.pred_len:, f_dim:].to(device)
                loss = criterion(outputs, batch_y)
                train_loss.append(loss.item())

            if (i + 1) % 100 == 0:
                print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
                speed = (time.time() - time_now) / iter_count
                left_time = speed * ((args.train_epochs - epoch) * train_steps - i)
                print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
                iter_count = 0
                time_now = time.time()

            if args.use_amp:
                scaler.scale(loss).backward()
                scaler.step(model_optim)
                scaler.update()
            else:
                loss.backward()
                model_optim.step()

        print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
        train_loss = np.average(train_loss)
        vali_loss = vali(args, model, device, vali_data, vali_loader, criterion)
        test_loss = vali(args, model, device, test_data, test_loader, criterion)

        print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
            epoch + 1, train_steps, train_loss, vali_loss, test_loss))
        early_stopping(vali_loss, model, path)
        if early_stopping.early_stop:
            print("Early stopping")
            break

        adjust_learning_rate(model_optim, epoch + 1, args)

    best_model_path = path + '/' + 'checkpoint.pth'
    model.load_state_dict(torch.load(best_model_path))
    return model

In [7]:
args = Namespace(
    task_name='long_term_forecast' ,
    is_training=1 ,
    root_path=data_dir,
    data_path='ETTh1.csv',
    model_id='ETTh1_96_336' ,
    model='TimesNet',
    data='ETTh1' ,
    features='M' ,
    seq_len=96 ,
    label_len=48 ,
    pred_len=96 ,
    e_layers=2 ,
    d_layers=1 ,
    factor=3 ,
    enc_in=7 ,
    dec_in=7 ,
    c_out=7 ,
    n_heads=8,
    d_model=16 ,
    embed='timeF',
    d_ff=32 ,
    des='Exp' ,
    distil=True,
    itr=1 ,
    num_kernels=6,
    top_k=5,
    freq='h',
    dropout=0.1,
    batch_size=32,
    target='OT',
    seasonal_patterns="Monthly",
    num_workers=10,
    checkpoints='./checkpoints/',
    patience=7,
    learning_rate=0.0001,
    use_amp=False,
    train_epochs=10,
    output_attention=False,
    lradj='type1'
)

setting = '{}_{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
                args.task_name,
                args.model_id,
                args.model,
                args.data,
                args.features,
                args.seq_len,
                args.label_len,
                args.pred_len,
                args.d_model,
                args.n_heads,
                args.e_layers,
                args.d_layers,
                args.d_ff,
                args.factor,
                args.embed,
                args.distil,
                args.des, 0)

In [8]:
model_ = timesNetModel(args)
train(model_, args, setting)

0 0 -> 8640
df_raw.shape=(17420, 8) data.shape=(17420, 7)


data.shape= (17420, 7) 
self.data_x.shape= (8640, 7)
train 8449
1 8544 -> 11520
df_raw.shape=(17420, 8) data.shape=(17420, 7)
data.shape= (17420, 7) 
self.data_x.shape= (2976, 7)
val 2785
2 11424 -> 14400
df_raw.shape=(17420, 8) data.shape=(17420, 7)
data.shape= (17420, 7) 
self.data_x.shape= (2976, 7)
test 2785
	iters: 100, epoch: 1 | loss: 0.5475962
	speed: 0.0449s/iter; left time: 114.0182s
	iters: 200, epoch: 1 | loss: 0.4669370
	speed: 0.0319s/iter; left time: 77.7779s
Epoch: 1 cost time: 9.265677452087402
Epoch: 1, Steps: 264 | Train Loss: 0.4885100 Vali Loss: 0.8533527 Test Loss: 0.4471019
Validation loss decreased (inf --> 0.853353).  Saving model ...
Updating learning rate to 0.0001
	iters: 100, epoch: 2 | loss: 0.3565480
	speed: 0.2424s/iter; left time: 551.9603s
	iters: 200, epoch: 2 | loss: 0.3541487
	speed: 0.0297s/iter; left time: 64.7493s
Epoch: 2 cost time: 7.9857823848724365
Epoch: 2, Steps: 264 | Train Loss: 0.4009555 Vali Loss: 0.7987773 Test Loss: 0.4295301
Validati

Model(
  (model): ModuleList(
    (0-1): 2 x TimesBlock(
      (conv): Sequential(
        (0): Inception_Block_V1(
          (kernels): ModuleList(
            (0): Conv2d(16, 32, kernel_size=(1, 1), stride=(1, 1))
            (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
            (2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
            (3): Conv2d(16, 32, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
            (4): Conv2d(16, 32, kernel_size=(9, 9), stride=(1, 1), padding=(4, 4))
            (5): Conv2d(16, 32, kernel_size=(11, 11), stride=(1, 1), padding=(5, 5))
          )
        )
        (1): GELU(approximate='none')
        (2): Inception_Block_V1(
          (kernels): ModuleList(
            (0): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1))
            (1): Conv2d(32, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
            (2): Conv2d(32, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
        