In [1]:
import sys
sys.path.append('..')

import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''

import time

import torch
torch.multiprocessing.set_sharing_strategy('file_system')


In [2]:
dataframe = []

# Poincare

## Poincare PreProcessing

In [3]:
from preprocess.cinc2020 import process_cinc2020_poincare_diagram

In [4]:
!rm -rf /tmp/cinc2020/processed

In [5]:
class args:
    data_path='/home/huypham/Projects/ecg/dataset/cinc2020/raw'
    out_dir='/tmp/cinc2020/processed'
    seed=42
    mode='poincare'

os.makedirs(args.out_dir, exist_ok=True)

In [6]:
%matplotlib agg

In [7]:
# Poincare generation
start = time.time()

process_cinc2020_poincare_diagram(args)

stop = time.time()

# print('Processing time:', stop - start)

Processing:   0%|          | 0/43101 [00:00<?, ?it/s]

In [8]:
poincare_process_time = (stop - start)
print(poincare_process_time)

9793.93241739273


In [62]:
poincare_process_time = poincare_process_time / 43101
print(poincare_process_time)

0.22723213886899912


In [9]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint


from data.images.images import PtbXlDataModule
from models.images.images import ImageClassifier

Global seed set to 42
Global seed set to 42


In [10]:
class args:
    data_path = '/home/huypham/Projects/ecg/dataset/cinc2020/'
    batch_size = 1
    model_barebone = 'resnet50'
    learning_rate = 1e-5
    max_epochs = 50
    log_dir = '/tmp/logs'
    resume_from_checkpoint = None
    seed = 42

In [11]:
train_dir = os.path.join(args.data_path, 'processed')
train_label = os.path.join(args.data_path, 'processed/y_train.csv')
val_dir = os.path.join(args.data_path, 'processed')
val_label = os.path.join(args.data_path, 'processed/y_val.csv')
test_dir = os.path.join(args.data_path, 'processed')
test_label = os.path.join(args.data_path, 'processed/y_test.csv')

datamodule = PtbXlDataModule(
    train_dir=train_dir,
    train_label=train_label,
    val_dir=val_dir,
    val_label=val_label,
    test_dir=test_dir,
    test_label=test_label,
    batch_size=args.batch_size
)

classes = datamodule.train_dataset.labels.columns
print('Train data lenghth:', len(datamodule.train_dataset))   

Global seed set to 42
Global seed set to 42


Train data lenghth: 25860


## ResNet50

In [12]:
start = time.time()

model = ImageClassifier(
    classes=classes,
    barebone='resnet50',
    learning_rate=args.learning_rate,
    loss_type='bce'
)

trainer = pl.Trainer(
    accelerator='cpu',
    deterministic=False,
)

trainer.test(
    model=model,
    datamodule=datamodule,
    ckpt_path='/home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/poincare/resnet50/lightning_logs/version_0/ckpt/best-epoch=3-val_loss=0.15-val_f1=0.46.ckpt',
)

stop = time.time()


Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at /home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/poincare/resnet50/lightning_logs/version_0/ckpt/best-epoch=3-val_loss=0.15-val_f1=0.46.ckpt
Loaded model weights from checkpoint at /home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/poincare/resnet50/lightning_logs/version_0/ckpt/best-epoch=3-val_loss=0.15-val_f1=0.46.ckpt
Global seed set to 42
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         test_f1            0.4504656493663788
        test_loss           0.15440258383750916
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [13]:
# print('Prediction time:', stop - start)
resnet50 = (stop-start)/len(datamodule.test_dataset)
print('Per image:', resnet50)

Per image: 0.035986953151708634


## DenseNet121

In [14]:
start = time.time()

model = ImageClassifier(
    classes=classes,
    barebone='densenet121',
    learning_rate=args.learning_rate,
    loss_type='bce'
)

trainer = pl.Trainer(
    accelerator='cpu',
    deterministic=False,
)

trainer.test(
    model=model,
    datamodule=datamodule,
    ckpt_path='/home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/poincare/densenet121/lightning_logs/version_0/ckpt/best-epoch=9-val_loss=0.15-val_f1=0.50.ckpt',
)

stop = time.time()

Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at /home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/poincare/densenet121/lightning_logs/version_0/ckpt/best-epoch=9-val_loss=0.15-val_f1=0.50.ckpt
Loaded model weights from checkpoint at /home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/poincare/densenet121/lightning_logs/version_0/ckpt/best-epoch=9-val_loss=0.15-val_f1=0.50.ckpt
Global seed set to 42
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         test_f1            0.4962764084339142
        test_loss           0.15268494188785553
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [15]:
# print('Prediction time:', stop - start)
densenet121 = (stop-start)/len(datamodule.test_dataset)
print('Per image:', densenet121)

Per image: 0.03631659785302661


# 1D CNN

In [16]:
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint

from data.timeseries.timeseries import TimeSeriesDataModule
from models.timeseries.cnn import Net1DLightningModule

import warnings
warnings.filterwarnings(action='ignore')

In [17]:
class args:
    data_path='/home/huypham/Projects/ecg/dataset/cinc2020/raw/'
    csv_path='/home/huypham/Projects/ecg/dataset/cinc2020/processed'
    batch_size=1
    learning_rate=1e-5
    max_epochs=100
    log_dir='/tmp/logs/cnn1d'
    resume_from_checkpoint=None
    seed=42

In [18]:
def set_seed(seed=0):
    import numpy, torch, random
    numpy.random.seed(seed)
    torch.random.manual_seed(seed)
    random.seed(seed)

In [20]:
start = time.time()

set_seed(args.seed)
train_dir = args.data_path
val_dir = train_dir
test_dir = train_dir

train_label = os.path.join(args.csv_path, 'y_train.csv') # '/home/huypham/Projects/ecg/dataset/cinc2020/processed/y_train.csv'
val_label = os.path.join(args.csv_path, 'y_val.csv')
test_label = os.path.join(args.csv_path, 'y_test.csv')

data_module = TimeSeriesDataModule(
    train_dir=None, #train_dir,
    train_label=None, #train_label,
    val_dir=None, #val_dir,
    val_label=None, #val_label,
    test_dir=test_dir,
    test_label=test_label,
    batch_size=args.batch_size
)

# train_dataloader = data_module.train_dataloader()
# classes = data_module.train_dataset.classes
# class_weights = data_module.train_dataset.class_weights

# val_dataloader = data_module.val_dataloader()
test_dataloader = data_module.test_dataloader()
classes = data_module.test_dataset.classes
class_weights = data_module.test_dataset.class_weights


stop = time.time()

Global seed set to 42
Global seed set to 42


  0%|          | 0/8621 [00:00<?, ?it/s]

In [21]:
# print('Processing time:', stop - start)
ts_processing_time = (stop-start)/(len(data_module.test_dataset))
print('Per image:', ts_processing_time)

Per image: 0.14429691127966926


In [22]:
start = time.time()

model = Net1DLightningModule(classes=classes, class_weights=class_weights)

trainer = pl.Trainer(
    accelerator='cpu'
)

trainer.test(model, dataloaders=test_dataloader, ckpt_path='/home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/cnn1d/lightning_logs/version_0/ckpt/best-epoch=98-val_loss=0.01-val_f1=0.69.ckpt')

stop = time.time()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at /home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/cnn1d/lightning_logs/version_0/ckpt/best-epoch=98-val_loss=0.01-val_f1=0.69.ckpt
Loaded model weights from checkpoint at /home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/cnn1d/lightning_logs/version_0/ckpt/best-epoch=98-val_loss=0.01-val_f1=0.69.ckpt


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         test_f1            0.6897772550582886
        test_loss          0.014760381542146206
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [23]:
# print('Prediction time:',)
cnn1d = (stop-start) / (len(data_module.test_dataset))
print('Per image:', cnn1d)

Per image: 0.02979877340504609


# 1D ResNet

In [24]:
class args:
    data_path='/home/huypham/Projects/ecg/dataset/cinc2020/raw'
    csv_path='/home/huypham/Projects/ecg/dataset/cinc2020/processed'
    batch_size=32
    learning_rate=1e-4
    max_epochs=500
    log_dir='/tmp/logs/resnet1d'
    resume_from_checkpoint=None
    seed=42

In [25]:
from models.timeseries.resnet1d import ResNet1DLightningModule

start = time.time()

model = ResNet1DLightningModule(
    classes=classes,
    class_weights=class_weights,
    learning_rate=args.learning_rate,
)

trainer = pl.Trainer(
    accelerator='cpu'
)

trainer.test(model, dataloaders=test_dataloader, ckpt_path='/home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/resnet1d/lightning_logs/version_0/ckpt/best-epoch=99-val_loss=0.01-val_f1=0.71.ckpt')

stop = time.time()

Global seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Restoring states from the checkpoint path at /home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/resnet1d/lightning_logs/version_0/ckpt/best-epoch=99-val_loss=0.01-val_f1=0.71.ckpt
Loaded model weights from checkpoint at /home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/resnet1d/lightning_logs/version_0/ckpt/best-epoch=99-val_loss=0.01-val_f1=0.71.ckpt


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
         test_f1             0.712304413318634
        test_loss          0.013201124966144562
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


In [26]:
# print('Prediction time:', stop - start )
resnet1d = (stop-start) / (len(data_module.test_dataset))
print('Per image:', resnet1d)

Per image: 0.0215992074100701


# XGBoost

In [27]:
import pandas as pd

from preprocess.cinc2017 import process_cinc2017_timeseries
from preprocess.utils import *

In [28]:
import joblib
search = joblib.load('/home/huypham/Projects/ecg/logs_with_eco2ai/logs_cinc2020/tabular/search.pkl')



In [29]:
class args:
    data_path='/home/huypham/Projects/ecg/dataset/cinc2020/raw/'
    output_dir='/tmp/cinc2020/processed'
    sampling_rate=300

In [30]:
os.makedirs(args.output_dir, exist_ok=True)

In [31]:
train_df = pd.read_csv('/home/huypham/Projects/ecg/dataset/cinc2020/processed/y_train.csv')
val_df = pd.read_csv('/home/huypham/Projects/ecg/dataset/cinc2020/processed/y_val.csv')
test_df = pd.read_csv('/home/huypham/Projects/ecg/dataset/cinc2020/processed/y_test.csv')

train_files = [os.path.join(args.data_path, i) for i in train_df['idx'].to_list()]
val_files = [os.path.join(args.data_path, i) for i in val_df['idx'].to_list()]
test_files = [os.path.join(args.data_path, i) for i in test_df['idx'].to_list()]

X_test = pd.read_csv('/home/huypham/Projects/ecg/dataset/cinc2020/processed/test_features.csv')
y_test = test_df.drop(columns=['idx']).to_numpy()

# train_features = extract_ts_features(train_files, verbose=True)
# train_features.to_csv(os.path.join(args.output_dir, 'train_features.csv'), index=False)

# val_features = extract_ts_features(val_files, verbose=True)
# val_features.to_csv(os.path.join(args.output_dir, 'val_features.csv'), index=False)

In [32]:
mask = search.best_estimator_[0][3].get_support()
selected_columns = [c for m, c in zip(mask, X_test.columns) if m]

tsfresh_settings = {}
for col in selected_columns:
    parts = col.split('__')
    # print(parts)
    # if len(parts) == 2:
    #     tsfresh_settings[parts[1]] = None
    # elif len(parts) == 3:
    #     name = parts[1]
    #     k, v = parts[2].rsplit('_', 1)

    #     try:
    #         v_ = int(v)
    #     except:
    #         v_ = v

    #     if name in tsfresh_settings:
    #         tsfresh_settings[name].append({k: v_})
    #     else:
    #         tsfresh_settings[name] = [{k: v_}]
    tsfresh_settings[parts[1]] = EfficientFCParameters()[parts[1]]

In [33]:
start = time.time()

test_features = extract_ts_features(test_files, settings=tsfresh_settings, verbose=True)
test_features.to_csv(os.path.join(args.output_dir, 'test_features.csv'), index=False)

stop = time.time()

  0%|          | 0/8621 [00:00<?, ?it/s]

In [None]:
start = time.time()

test_features = extract_ts_features(test_files, settings=tsfresh_settings, verbose=True)
test_features.to_csv(os.path.join(args.output_dir, 'test_features.csv'), index=False)

stop = time.time()

In [34]:
# print('Processing time:', stop - start )
tabular_processing_time = (stop-start) / (8621)
print('Per image:',  tabular_processing_time)

Per image: 0.3826583266161445


In [35]:
search.best_estimator_[-1].gpu_id=-1

In [50]:
X_test = pd.read_csv(os.path.join('/home/huypham/Projects/ecg/dataset/cinc2020/processed', 'test_features.csv'))
colnames = [i for i in X_test.columns if i.startswith('x__')]
X_test = X_test[colnames]

In [53]:
start = time.time()

y_pred = search.predict(X_test)

stop = time.time()

In [54]:
# print('Processing time:', stop - start )
xgb = (stop-start) / (8621)
print('Per image:',  xgb)

Per image: 3.762762890297912e-05


# Summary

In [55]:
import seaborn as sns

In [63]:
report = [
    dict(model='ResNet50', processing=poincare_process_time, prediction=resnet50),
    dict(model='DenseNet121', processing=poincare_process_time, prediction=densenet121),
    dict(model='1D CNN', processing=ts_processing_time, prediction=cnn1d),
    dict(model='1D ResNet', processing=ts_processing_time, prediction=resnet1d),
    dict(model='XGBoost', processing=tabular_processing_time, prediction=xgb),
]

In [64]:
print(report)
# """[{'model': 'ResNet50', 'processing': 0.033698169559967225, 'prediction': 0.037894168604440455}, {'model': 'DenseNet121', 'processing': 0.033698169559967225, 'prediction': 0.03815144628880312}, {'model': '1D CNN', 'processing': 0.013436714379469852, 'prediction': 0.027547516968158037}, {'model': '1D ResNet', 'processing': 0.013436714379469852, 'prediction': 0.01880842635947403}, {'model': 'XGBoost', 'processing': 1.7176005028336716, 'prediction': 0.00022670560257655656}]"""

[{'model': 'ResNet50', 'processing': 0.22723213886899912, 'prediction': 0.035986953151708634}, {'model': 'DenseNet121', 'processing': 0.22723213886899912, 'prediction': 0.03631659785302661}, {'model': '1D CNN', 'processing': 0.14429691127966926, 'prediction': 0.02979877340504609}, {'model': '1D ResNet', 'processing': 0.14429691127966926, 'prediction': 0.0215992074100701}, {'model': 'XGBoost', 'processing': 0.3826583266161445, 'prediction': 3.762762890297912e-05}]


In [65]:
report = [{'model': 'ResNet50', 'processing': 0.22723213886899912, 'prediction': 0.035986953151708634}, {'model': 'DenseNet121', 'processing': 0.22723213886899912, 'prediction': 0.03631659785302661}, {'model': '1D CNN', 'processing': 0.14429691127966926, 'prediction': 0.02979877340504609}, {'model': '1D ResNet', 'processing': 0.14429691127966926, 'prediction': 0.0215992074100701}, {'model': 'XGBoost', 'processing': 0.3826583266161445, 'prediction': 3.762762890297912e-05}]

In [66]:
df = pd.DataFrame(report)
df[df.select_dtypes(include=['number']).columns] *= 1000
df = df.set_index('model')
# df.to_csv('./timing.csv', index=False)

In [67]:
df['total'] = df['processing'] + df['prediction']

print(df.round(1).to_markdown())

| model       |   processing |   prediction |   total |
|:------------|-------------:|-------------:|--------:|
| ResNet50    |        227.2 |         36   |   263.2 |
| DenseNet121 |        227.2 |         36.3 |   263.5 |
| 1D CNN      |        144.3 |         29.8 |   174.1 |
| 1D ResNet   |        144.3 |         21.6 |   165.9 |
| XGBoost     |        382.7 |          0   |   382.7 |


In [77]:
df

Unnamed: 0_level_0,processing,prediction,total
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ResNet50,227.232139,35.986953,263.219092
DenseNet121,227.232139,36.316598,263.548737
1D CNN,144.296911,29.798773,174.095685
1D ResNet,144.296911,21.599207,165.896119
XGBoost,382.658327,0.037628,382.695954


In [76]:
fig, (ax1,ax2) = plt.subplots(2, 1, sharex=True, figsize=(6, 4))
ax1.spines['bottom'].set_visible(False)
ax1.tick_params(axis='x',which='both',bottom=False)
ax2.spines['top'].set_visible(False)
# ax2.set_ylim(0,100)
# ax1.set_ylim(1620,1720)
# ax2.set_yticks(np.arange(0, 101, 50))
# ax1.set_yticks(np.arange(1650, 1751, 50))
df.drop(columns=['total']).plot(ax=ax1, kind='bar', stacked=True)
df.drop(columns=['total']).plot(ax=ax2, kind='bar', stacked=True)
for tick in ax2.get_xticklabels():
    tick.set_rotation(0)

# d = .01
# kwargs = dict(transform=ax1.transAxes, color='k', clip_on=False)
# ax1.plot((-d, +d), (-d, +d), **kwargs)      
# ax1.plot((1 - d, 1 + d), (-d, +d), **kwargs)

# kwargs.update(transform=ax2.transAxes)  
# ax2.plot((-d, +d), (1 - d, 1 + d), **kwargs)  
# ax2.plot((1 - d, 1 + d), (1 - d, 1 + d), **kwargs)

ax1.get_legend().remove()
ax2.legend(
    loc='lower center', ncols=2, bbox_to_anchor=(0.5, -0.5, 0, 0)
)
ax2.get_legend().get_texts()[0].set_text('Processing Time')
ax2.get_legend().get_texts()[1].set_text('Prediction Time')
# ax2.get_legend().remove()


plt.xlabel('')
ax2.set_ylabel('Time (ms)', y=1)
plt.savefig('inference_time_2020.pdf', bbox_inches='tight', dpi=100)
# plt.show()

In [None]:
# sns.barplot(data=df, x='model', y='total')
# plt.yscale('log')
# df.drop(columns=['total']).plot(kind='bar', stacked=True)
# plt.yscale('log')