In [2]:
!sbatch eval_simmtm.job

In [2]:
import pandas as pd

from utils.path_utils import project_root
import os

import torch

lengths_path = os.path.join(project_root(), 'data', 'tl_datasets', 'finetune', 'lengths.txt')
sepsis_path = os.path.join(project_root(), 'data', 'tl_datasets', 'finetune', 'is_sepsis.txt')


In [3]:
ft_files = torch.load(os.path.join(project_root(), 'data', 'tl_datasets', 'finetune', 'finetune.pt'))['samples']


In [5]:
type(ft_files), ft_files.shape

In [9]:
pd.read_csv(sepsis_path, header=None).values.squeeze()

In [8]:
pd.read_csv(lengths_path, header=None).values.squeeze()

In [15]:
from torch.utils.data import Dataset


class FinetuneDataset(Dataset):
    def __init__(self, data_tensor, labels):
        self.data = data_tensor
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        sample = self.data[idx]
        label = self.labels[idx]
        return sample, label
    

In [27]:
from torch.utils.data import DataLoader

# Create the dataset
dataset = FinetuneDataset(ft_files, pd.read_csv(sepsis_path, header=None).values.squeeze())

# Create a DataLoader
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


In [28]:
for i, j in dataloader:
    break

In [29]:
j

In [37]:
import tqdm

pt_pickle = pd.read_pickle(os.path.join(project_root(), 'data', 'tl_datasets', 'final_dataset_pretrain_A.pickle'))

pt_files = []
for pdata in tqdm.tqdm(pt_pickle, desc='Preparing pretraining dataset', total=len(pt_pickle)):
    pt_files.append(pdata)

pt_lengths = pd.read_csv(os.path.join(project_root(), 'data', 'tl_datasets', 'lengths_pretrain_A.txt'), header=None).values
pt_sepsis = pd.read_csv(os.path.join(project_root(), 'data', 'tl_datasets', 'is_sepsis_pretrain_A.txt'), header=None).values


In [39]:
len(pt_files), len(pt_lengths), len(pt_sepsis)

In [44]:
import torch

ft_files = torch.load(os.path.join(project_root(), 'data', 'tl_datasets', 'finetune', 'finetune.pt'))['samples']
ft_lengths = pd.read_csv(os.path.join(project_root(), 'data', 'tl_datasets', 'finetune', 'lengths.txt'), header=None)
ft_sepsis = pd.read_csv(os.path.join(project_root(), 'data', 'tl_datasets', 'finetune', 'is_sepsis.txt'), header=None)


In [46]:
ft_files[0].shape

In [49]:
pd.read_csv(os.path.join(project_root(), 'data', 'tl_datasets', 'lengths_pretrain_A.txt'), header=None).values.squeeze()

# Simmtm GTN Backbone Summary

In [2]:
from models.simmtm.model import TFC
from models.simmtm.config import Config
from utils.pretrain_utils.get_args import get_args
from torchinfo import summary 

# Gathering args and configs
args, unknown = get_args()
config = Config()
model = TFC(configs=config, args=args)

summary(model)


# GTN Backbone Summary

In [5]:
from models.gtn.config import Config
from models.gtn.transformer import Transformer

config = Config()

# Model
model = Transformer(d_model=config.d_model, d_input=config.d_input,
                    d_channel=config.d_channel, d_output=config.d_output,
                    d_hidden=config.d_hidden, q=config.q, v=config.v,
                    h=config.h, N=config.N, device=config.device,
                    dropout=config.dropout, pe=config.pe, mask=config.mask)

summary(model)
