In [1]:
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import os
import numpy as np

import torch.fft as fft

from utils.path_utils import project_root
import os

from models.tfc.config import Config
from models.tfc.dataloader import data_generator



In [2]:

configs = Config()

# Gather datasets
tl_datasets = os.path.join(project_root(), 'data', 'tl_datasets')
pretrain = torch.load(os.path.join(tl_datasets, 'pretrain', 'pretrain.pt'))
finetune = torch.load(os.path.join(tl_datasets, 'finetune', 'finetune.pt'))

train_loader, finetune_loader = data_generator(pretrain, finetune, configs)


In [4]:
from torchinfo import summary 
from models.tfc.model import TFC

configs = Config()


summary(TFC(configs))



Layer (type:depth-idx)                                            Param #
TFC                                                               --
├─TransformerEncoder: 1-1                                         --
│    └─ModuleList: 2-1                                            --
│    │    └─TransformerEncoderLayer: 3-1                          906,864
│    │    └─TransformerEncoderLayer: 3-2                          906,864
├─Sequential: 1-2                                                 --
│    └─Linear: 2-2                                                86,272
│    └─BatchNorm1d: 2-3                                           512
│    └─ReLU: 2-4                                                  --
│    └─Linear: 2-5                                                32,896
├─TransformerEncoder: 1-3                                         --
│    └─ModuleList: 2-6                                            --
│    │    └─TransformerEncoderLayer: 3-3                          906,864
│    

In [10]:
import numpy as np
# def T(x, y):
#     return (100) / (pow(x+1, 2) + pow(y+1, 2))

# def T(x, y):
#     return np.log(pow(x, 2) + pow(y, 2))

def T(x, y):
    return np.sin(x + 3*y)


In [2]:
# from matplotlib import pyplot as plt
# 
# import random
# 
# x = [random.uniform(0, 1) for _ in range(1000)]
# y = [random.uniform(0, 1) for _ in range(1000)]
# 
# ans = [T(i, j) for i, j in zip(x, y)]
# 
# # Create a scatter plot
# plt.figure(figsize=(10, 6))
# plt.scatter(x, y, c=ans, cmap='viridis', alpha=0.5)
# 
# plt.colorbar(label='T(x, y)')
# plt.title('Scatter plot of T(x, y) values')
# plt.xlabel('x values')
# plt.ylabel('y values')
# 
# plt.grid(True)
# plt.show()


In [1]:
# import random
# from matplotlib import pyplot as plt
# 
# # Define the function T
# def T(x, y):
#     return np.sin(x + 3*y)
# 
# # Generate random x and y values
# x = [random.uniform(0, 1) for _ in range(1000)]
# y = [random.uniform(0, 1) for _ in range(1000)]
# 
# # Calculate T values for the scatter plot
# ans = [T(i, j) for i, j in zip(x, y)]
# 
# # Create a scatter plot
# plt.figure(figsize=(10, 6))
# plt.scatter(x, y, c=ans, cmap='viridis', alpha=0.5)
# 
# plt.colorbar(label='T(x, y)')
# plt.title('Scatter Plot of T(x, y) Values')
# plt.xlabel('x values')
# plt.ylabel('y values')
# plt.grid(True)
# plt.show()


In [2]:
from models.tfc.config import Config
from models.tfc.model import TFC

model = TFC(Config())


In [3]:
model

TFC(
  (encoder_list_1_t): ModuleList(
    (0-7): 8 x Encoder(
      (MHA): MultiHeadAttention(
        (W_q): Linear(in_features=512, out_features=64, bias=True)
        (W_k): Linear(in_features=512, out_features=64, bias=True)
        (W_v): Linear(in_features=512, out_features=64, bias=True)
        (W_o): Linear(in_features=64, out_features=512, bias=True)
        (dropout): Dropout(p=0.2, inplace=False)
      )
      (feedforward): FeedForward(
        (linear_1): Linear(in_features=512, out_features=1024, bias=True)
        (linear_2): Linear(in_features=1024, out_features=512, bias=True)
      )
      (dropout): Dropout(p=0.2, inplace=False)
      (layerNormal_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (layerNormal_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    )
  )
  (encoder_list_1_f): ModuleList(
    (0-7): 8 x Encoder(
      (MHA): MultiHeadAttention(
        (W_q): Linear(in_features=512, out_features=64, bias=True)
        (W_k): Linea

In [8]:
from models.tfc.dataloader import Load_Dataset
from models.tfc.config import Config
import torch

from torch.utils.data import DataLoader
import os

from utils.path_utils import project_root

tl_datasets = os.path.join(project_root(), 'data', 'tl_datasets')
pretrain = torch.load(os.path.join(tl_datasets, 'pretrain', 'pretrain.pt')) 

configs = Config()
dataset = Load_Dataset(pretrain, configs, training_mode='pre_train')
loader = DataLoader(dataset=dataset, batch_size=configs.batch_size, shuffle=True,
                          drop_last=configs.drop_last, num_workers=4)


In [7]:

import tqdm


def csv_to_pt(patient_files, desc):
    
    all_patients = {'samples': [], 'labels': []}
    
    max_time_step = 336
    for idx, file in tqdm.tqdm(enumerate(zip(patient_files)), 
                                                      desc=f"{desc}", 
                                                      total=len(patient_files)):
        
        pad_width = ((0, max_time_step - len(file)), (0, 0))
        file = np.pad(file, pad_width=pad_width, mode='constant').astype(np.float32)
        
        if len(file) == max_time_step:
            all_patients['samples'].append(torch.from_numpy(file).unsqueeze(0))
            
    all_patients['samples'] = torch.cat(all_patients['samples'], dim=0)
    all_patients['labels'] = torch.cat(all_patients['labels'], dim=0)
    
    return {'samples': all_patients['samples'], 'labels': all_patients['labels']}


In [12]:
import pandas as pd

pd.read_csv(os.path.join(project_root(), 'data', 'tl_datasets', 'test', 'psv_files', 'p102016.psv'),
            sep='|')


Unnamed: 0,HR,O2Sat,Temp,SBP,MAP,DBP,Resp,EtCO2,BaseExcess,HCO3,...,WBC,Fibrinogen,Platelets,Age,Gender,Unit1,Unit2,HospAdmTime,ICULOS,SepsisLabel
0,,,,,,,,,,,...,,,,65,1,0,1,-0.01,1,0
1,88.0,90.0,,100.0,74.0,60.0,16.0,,,,...,17.9,452.0,242.0,65,1,0,1,-0.01,2,0
2,86.0,96.0,,93.0,69.0,52.0,32.0,,,,...,,,,65,1,0,1,-0.01,3,0
3,104.0,99.0,,94.0,86.0,77.0,28.0,,,,...,,,,65,1,0,1,-0.01,4,0
4,100.0,100.0,,120.0,77.0,61.0,22.0,,,,...,,,,65,1,0,1,-0.01,5,0
5,77.0,100.0,,84.0,81.0,78.0,25.0,,,,...,,,,65,1,0,1,-0.01,6,0
6,85.5,96.5,35.3,123.0,82.0,63.0,26.0,,,,...,,,,65,1,0,1,-0.01,7,0
7,95.0,96.0,,117.0,80.0,63.0,24.0,,,,...,,,,65,1,0,1,-0.01,8,0
8,93.0,96.0,,125.0,77.0,59.0,24.0,,,,...,,,,65,1,0,1,-0.01,9,0
9,100.0,95.0,36.7,118.0,70.0,50.0,30.0,,,,...,,,,65,1,0,1,-0.01,10,0


In [14]:
test_files = []

for fileName in os.listdir(os.path.join(project_root(), 'data', 'tl_datasets', 'test', 'psv_files')):
    test_files.append(pd.read_csv(os.path.join(project_root(), 'data', 'tl_datasets', 'test', 'psv_files', fileName), sep='|'))
    