In [1]:
from minerva.models.ssl.tfc import TFC_Model
from minerva.models.nets.tfc import TFC_Backbone
import warnings
import warnings
import lightning as L
from torch.utils.data import DataLoader, ConcatDataset
from lightning.pytorch.callbacks import ModelCheckpoint
from lightning.pytorch.loggers import CSVLogger
from minerva.data.datasets.series_dataset import MultiModalSeriesCSVDataset
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
input_channels = 6
batch_size = 128
TS_length = 60

In [2]:
model = TFC_Model(
    input_channels=input_channels,
    batch_size=batch_size,
    TS_length=TS_length,
    num_classes=None,
    batch_1_correction=True,
    backbone=TFC_Backbone(
        input_channels=input_channels,
        TS_length=TS_length,
        time_encoder=TransformerEncoder(
            TransformerEncoderLayer(
                d_model=TS_length, dim_feedforward=2 * 128, nhead=2
            ),
            num_layers=2,
        ),
        frequency_encoder=TransformerEncoder(
            TransformerEncoderLayer(
                d_model=TS_length, dim_feedforward=2 * 128, nhead=2
            ),
            num_layers=2,
        ),
    ),
)

model



TFC_Model(
  (backbone): TFC_Backbone(
    (time_encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-1): 2 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=60, out_features=60, bias=True)
          )
          (linear1): Linear(in_features=60, out_features=256, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=256, out_features=60, bias=True)
          (norm1): LayerNorm((60,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((60,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (frequency_encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-1): 2 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_featur

In [3]:
root_dir = Path("/workspaces/HIAAC-KR-Dev-Container/some_datasets/ES_Raw/")
datasets = []


for i, f in enumerate(root_dir.glob("es_full.*.csv")):   
    train_dataset = MultiModalSeriesCSVDataset(
        f,
        feature_prefixes=[
            "accel-x",
            "accel-y",
            "accel-z",
            "gyro-x",
            "gyro-y",
            "gyro-z",
        ],
        label="standard activity code",
        features_as_channels=True,
    )
    
    datasets.append(train_dataset)
    print(f"Dataset {i} has {len(train_dataset)} samples! ({f})")
    
train_dataset = ConcatDataset(datasets)
train_dataset

Dataset 0 has 250000 samples! (/workspaces/HIAAC-KR-Dev-Container/some_datasets/ES_Raw/es_full.160000.csv)
Dataset 1 has 250000 samples! (/workspaces/HIAAC-KR-Dev-Container/some_datasets/ES_Raw/es_full.340000.csv)
Dataset 2 has 160950 samples! (/workspaces/HIAAC-KR-Dev-Container/some_datasets/ES_Raw/es_full.10000.csv)
Dataset 3 has 250000 samples! (/workspaces/HIAAC-KR-Dev-Container/some_datasets/ES_Raw/es_full.320000.csv)
Dataset 4 has 238525 samples! (/workspaces/HIAAC-KR-Dev-Container/some_datasets/ES_Raw/es_full.300000.csv)
Dataset 5 has 119575 samples! (/workspaces/HIAAC-KR-Dev-Container/some_datasets/ES_Raw/es_full.180000.csv)
Dataset 6 has 250000 samples! (/workspaces/HIAAC-KR-Dev-Container/some_datasets/ES_Raw/es_full.90000.csv)
Dataset 7 has 55925 samples! (/workspaces/HIAAC-KR-Dev-Container/some_datasets/ES_Raw/es_full.110000.csv)
Dataset 8 has 250000 samples! (/workspaces/HIAAC-KR-Dev-Container/some_datasets/ES_Raw/es_full.200000.csv)
Dataset 9 has 250000 samples! (/workspac

<torch.utils.data.dataset.ConcatDataset at 0x7d3615b2d210>

In [9]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
train_dataloader

<torch.utils.data.dataloader.DataLoader at 0x7d341ab3c760>

In [10]:
batch_x, batch_y = next(iter(train_dataloader))
batch_x.shape, batch_y.shape

(torch.Size([128, 6, 60]), torch.Size([128]))

In [11]:
callbacks = [
    ModelCheckpoint(
        filename="tfc-{epoch:02d}",
        every_n_epochs=1,
    ),
]

logger = CSVLogger(save_dir="./checkpoints/tfc-transformer-encoder/aLLM4TS-E/ES", name="tfc-transformer-encoder", version="final")

trainer = L.Trainer(
    max_epochs=100,
    accelerator="gpu",
    devices=1,
    callbacks=callbacks,
    logger=logger,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [None]:
# batch_x, batch_y = next(iter(train_dataloader))
# batch_x.shape, batch_y.shape

trainer.fit(
    model, train_dataloaders=train_dataloader
)

/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type            | Params | Mode 
-----------------------------------------------------
0 | backbone | TFC_Backbone    | 435 K  | train
1 | loss_fn  | NTXentLoss_poly | 0      | train
-----------------------------------------------------
435 K     Trainable params
0         Non-trainable params
435 K     Total params
1.741     Total estimated model params size (MB)
63        Modules in train mode
0         Modules in eval mode
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=23` in the `DataLoader` to improve performance.


Epoch 0:   0%|          | 0/16210 [00:00<?, ?it/s] 

Epoch 0:   5%|▌         | 839/16210 [01:45<32:18,  7.93it/s, v_num=3] 

In [None]:
print("Done!")