In [1]:
from minerva.models.ssl.tfc import TFC_Model
from minerva.models.nets.tfc import TFC_Backbone
from minerva.models.nets.tnc import TSEncoder
import numpy as np
import pandas as pd
import os
import warnings
from torch.utils.data import Dataset
from sklearn.preprocessing import StandardScaler
from utils.timefeatures import time_features
import warnings
import lightning as L
from torch.utils.data import DataLoader
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from lightning.pytorch.loggers import CSVLogger
from torch.nn import TransformerEncoder, TransformerEncoderLayer


warnings.filterwarnings("ignore")


# os.environ["CUDA_VISIBLE_DEVICES"] = "1"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
batch_size = 128
input_channels = 1
TS_length = 1024
num_classes = 1

# model = TFC_Model(
#     input_channels=input_channels,
#     batch_size=batch_size,
#     TS_length=TS_length,
#     num_classes=None,
#     batch_1_correction=True,
#     backbone=TFC_Backbone(
#         input_channels=input_channels,
#         TS_length=TS_length,
#         time_encoder=TSEncoder(
#             input_dims=input_channels,
#             output_dims=64,
#             hidden_dims=64,
#             depth=10,
#             permute=True,
#         ),
#         frequency_encoder=TSEncoder(
#             input_dims=input_channels,
#             output_dims=64,
#             hidden_dims=64,
#             depth=10,
#             permute=True,
#         ),
#     ),
# )

# model


# model = TFC_Model(input_channels=input_channels, TS_length=TS_length, batch_1_correction=True, batch_size=batch_size)

model = TFC_Model(
    input_channels=input_channels,
    batch_size=batch_size,
    TS_length=TS_length,
    num_classes=None,
    batch_1_correction=True,
    backbone=TFC_Backbone(
        input_channels=input_channels,
        TS_length=TS_length,
        time_encoder=TransformerEncoder(
            TransformerEncoderLayer(
                d_model=TS_length, dim_feedforward=2*128, nhead=2
            ),
            num_layers=2,
        ),
        frequency_encoder=TransformerEncoder(
            TransformerEncoderLayer(
                d_model=TS_length, dim_feedforward=2*128, nhead=2
            ),
            num_layers=2,
        ),
    ),
)

model

TFC_Model(
  (backbone): TFC_Backbone(
    (time_encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-1): 2 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=1024, out_features=1024, bias=True)
          )
          (linear1): Linear(in_features=1024, out_features=256, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=256, out_features=1024, bias=True)
          (norm1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (frequency_encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-1): 2 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLine

In [3]:
class Dataset_pretrain(Dataset):
    def __init__(
        self,
        pt_data="ETTh1_ETTm1_ETTh2_ETTm2_weather_traffic_electricity_illness",
        patch_len=16,
        stride=16,
        root_path="/workspaces/HIAAC-KR-Dev-Container/workspace/aLLM4TS/dataset/",
        flag="train",
        size=[1024, 0, 1024],
        features="M",
        data_path="ETTh1.csv",
        target="OT",
        scale=True,
        timeenc=1,
        freq="h",
        percent=100,
    ):

        self.pt_data = pt_data
        self.patch_len = patch_len
        self.stride = stride
        if size == None:
            raise NotImplementedError
        else:
            self.seq_len = size[0]
            self.label_len = size[1]
            self.pred_len = size[2]

        assert flag in ["train", "test", "val"]
        type_map = {"train": 0, "val": 1, "test": 2}
        self.set_type = type_map[flag]

        self.features = features
        self.target = target
        self.scale = scale
        self.timeenc = timeenc
        self.freq = freq
        self.percent = percent

        self.root_path = root_path
        self.data_path = data_path
        self.__read_data__()

    def __read_data__(self):
        self.scaler = StandardScaler()
        pt_datasets = self.pt_data.split("_")

        data_list = []
        data_stamp_list = []
        for pt_dataset in pt_datasets:
            df_raw = pd.read_csv(
                os.path.join(self.root_path, f"{pt_dataset}.csv")
            )
            dataset_len = len(df_raw)
            if "ETTh" in pt_dataset:
                border1s = [
                    0,
                    12 * 30 * 24 - self.seq_len,
                    12 * 30 * 24 + 4 * 30 * 24 - self.seq_len,
                ]
                border2s = [
                    12 * 30 * 24,
                    12 * 30 * 24 + 4 * 30 * 24,
                    12 * 30 * 24 + 8 * 30 * 24,
                ]
                border1 = border1s[self.set_type]
                border2 = border2s[self.set_type]
            elif "ETTm" in pt_dataset:
                border1s = [
                    0,
                    12 * 30 * 24 * 4 - self.seq_len,
                    12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len,
                ]
                border2s = [
                    12 * 30 * 24 * 4,
                    12 * 30 * 24 * 4 + 4 * 30 * 24 * 4,
                    12 * 30 * 24 * 4 + 8 * 30 * 24 * 4,
                ]
                border1 = border1s[self.set_type]
                border2 = border2s[self.set_type]
            else:
                num_train = int(dataset_len * 0.7)
                num_test = int(dataset_len * 0.2)
                num_vali = dataset_len - num_train - num_test
                border1s = [
                    0,
                    num_train - self.seq_len,
                    dataset_len - num_test - self.seq_len,
                ]
                border2s = [num_train, num_train + num_vali, dataset_len]
                border1 = border1s[self.set_type]
                border2 = border2s[self.set_type]
            if self.set_type == 0:
                border2 = (
                    border2 - self.seq_len
                ) * self.percent // 100 + self.seq_len
            if self.features == "M" or self.features == "MS":
                cols_data = df_raw.columns[1:]
                df_data = df_raw[cols_data]
            elif self.features == "S":
                df_data = df_raw[[self.target]]

            df_data = df_data.values

            if self.scale:
                train_data = df_data[border1s[0] : border2s[0]]
                self.scaler.fit(train_data)
                data = self.scaler.transform(df_data)
            else:
                data = df_data

            data = data[border1:border2]
            data = data.reshape((len(data) * len(cols_data), 1))
            df_stamp = df_raw[["date"]][border1:border2]
            df_stamp["date"] = pd.to_datetime(df_stamp.date)
            if self.timeenc == 0:
                df_stamp["month"] = df_stamp.date.apply(
                    lambda row: row.month, 1
                )
                df_stamp["day"] = df_stamp.date.apply(lambda row: row.day, 1)
                df_stamp["weekday"] = df_stamp.date.apply(
                    lambda row: row.weekday(), 1
                )
                df_stamp["hour"] = df_stamp.date.apply(lambda row: row.hour, 1)
                data_stamp = df_stamp.drop(["date"], axis=1).values
            elif self.timeenc == 1:
                data_stamp = time_features(
                    pd.to_datetime(df_stamp["date"].values), freq=self.freq
                )
                data_stamp = data_stamp.transpose(1, 0)

            data_list.append(data)
            df_stamp = np.array(
                [data_stamp for i in range(len(cols_data))]
            ).reshape((len(data_stamp) * len(cols_data), 4))
            data_stamp_list.append(df_stamp)

        self.data = np.concatenate(data_list, axis=0)
        self.data_stamp = np.concatenate(data_stamp_list, axis=0)

    def __getitem__(self, index):
        s_begin = index
        s_end = s_begin + self.seq_len
        r_begin = s_begin + self.stride
        r_end = s_end + self.stride

        seq_x = self.data[s_begin:s_end].swapaxes(0, 1)
        seq_y = self.data[r_begin:r_end].swapaxes(0, 1)
        return seq_x, seq_y
        # seq_x_mark = self.data_stamp[s_begin:s_end]
        # seq_y_mark = self.data_stamp[r_begin:r_end]

        # return seq_x, seq_y, seq_x_mark, seq_y_mark

    def __len__(self):
        return len(self.data) - self.seq_len - self.patch_len + 1

    def inverse_transform(self, data):
        return self.scaler.inverse_transform(data)

In [4]:
train_dataset = Dataset_pretrain(flag="train")
val_dataset = Dataset_pretrain(flag="val")

train_dataloader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    drop_last=True,
)

val_dataloader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=0,
    drop_last=True,
)

In [5]:
callbacks = [
    ModelCheckpoint(
        filename="tfc-{epoch:02d}",
        every_n_epochs=1,
    ),
    EarlyStopping(
        patience=50,
        verbose=True,
        monitor="val_loss",
    )
]

logger = CSVLogger(save_dir="logs", name="tfc-transformer")

trainer = L.Trainer(
    max_epochs=100,
    accelerator="gpu",
    devices=1,
    callbacks=callbacks,
    logger=logger,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [None]:
# batch_x, batch_y = next(iter(train_dataloader))
# batch_x.shape, batch_y.shape

trainer.fit(
    model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader
)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type            | Params | Mode 
-----------------------------------------------------
0 | backbone | TFC_Backbone    | 19.5 M | train
1 | loss_fn  | NTXentLoss_poly | 0      | train
-----------------------------------------------------
19.5 M    Trainable params
0         Non-trainable params
19.5 M    Total params
78.015    Total estimated model params size (MB)
63        Modules in train mode
0         Modules in eval mode


Epoch 0:   0%|          | 583/139677 [01:30<6:01:01,  6.42it/s, v_num=4]   

: 