In [9]:
%load_ext autoreload
%autoreload 2

import numpy as np

from msr.utils import ordered_dict_to_dict
from msr.data.download.ptbxl import RAW_TENSORS_DATA_PATH
import torch
from msr.data.measurements import PtbXLMeasurement
from msr.signals.representation_extractor import PeriodicRepresentationExtractor

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [85]:
data = torch.load(RAW_TENSORS_DATA_PATH / "val.pt").numpy()
print(data.shape)

idx = 0
sample = data[idx].T
print(sample.shape)

measurement = PtbXLMeasurement(sample, FS)

beats_params = dict(source_channel="ecg_2")
windows_params = dict(win_len_s=4, step_s=2)

measurement.set_beats(**beats_params)
measurement.set_windows(**windows_params)
measurement.set_agg_beat()

(1642, 1000, 12)
(12, 1000)


---
# **Whole Signal**

## Waveforms

In [44]:
feats = measurement.get_whole_signal_waveforms(return_arr=True)
feats.shape

(1000, 12)

In [45]:
feats = measurement.get_whole_signal_waveforms(return_arr=False)
ordered_dict_to_dict(feats)

{'ecg_1': array([-4.100e-02, -3.900e-02, -1.800e-02, -1.300e-02, -2.000e-02,
        -1.000e-02, -3.400e-02, -4.700e-02, -1.900e-02, -3.000e-02,
        -4.600e-02, -2.800e-02, -1.900e-02, -5.000e-02, -3.700e-02,
        -2.200e-02, -2.800e-02, -2.000e-02, -5.400e-02, -7.200e-02,
        -6.900e-02, -7.200e-02, -7.700e-02, -7.300e-02, -6.800e-02,
        -4.300e-02, -1.400e-02, -2.400e-02,  7.000e-03,  3.600e-02,
         5.100e-02,  4.800e-02,  3.800e-02,  3.400e-02,  0.000e+00,
        -3.900e-02, -6.500e-02, -5.900e-02, -6.800e-02, -7.000e-02,
        -8.300e-02, -9.000e-02, -1.000e-01, -1.080e-01, -1.150e-01,
         4.400e-02,  8.040e-01,  1.038e+00,  2.800e-01, -8.100e-02,
        -3.500e-02, -1.200e-02, -9.000e-03, -2.400e-02,  0.000e+00,
         1.000e-03,  8.000e-03,  2.000e-03,  6.000e-03,  1.600e-02,
         1.700e-02,  3.500e-02,  4.000e-02,  4.600e-02,  7.700e-02,
         1.240e-01,  1.430e-01,  1.550e-01,  2.180e-01,  2.430e-01,
         2.360e-01,  2.650e-01,  2.790e

## Features

In [46]:
feats = measurement.get_whole_signal_features(return_arr=True)
feats.shape

(697,)

In [47]:
feats = measurement.get_whole_signal_features(return_arr=False)
ordered_dict_to_dict(feats)

{'ecg_1': {'ecg_1_basic_features__mean': 0.0023210000000000014,
  'ecg_1_basic_features__std': 0.16871967270890492,
  'ecg_1_basic_features__median': -0.048,
  'ecg_1_basic_features__skewness': 3.781523372461828,
  'ecg_1_basic_features__kurtosis': 17.815038523607146,
  'ecg_1_agg_beat_features__ecg_1_agg_beat_basic_features__mean': 0.014818211688089404,
  'ecg_1_agg_beat_features__ecg_1_agg_beat_basic_features__std': 0.1526245518832135,
  'ecg_1_agg_beat_features__ecg_1_agg_beat_basic_features__median': -0.03855840264855802,
  'ecg_1_agg_beat_features__ecg_1_agg_beat_basic_features__skewness': 2.5506198250147727,
  'ecg_1_agg_beat_features__ecg_1_agg_beat_basic_features__kurtosis': 7.617773572088446,
  'ecg_1_agg_beat_features__ecg_1_agg_beat_crit_points__p_onset_loc': 12,
  'ecg_1_agg_beat_features__ecg_1_agg_beat_crit_points__p_onset_time': 0.080784,
  'ecg_1_agg_beat_features__ecg_1_agg_beat_crit_points__p_onset_val': -0.070751280997554,
  'ecg_1_agg_beat_features__ecg_1_agg_beat_c

---
# **Windows**

## Waveforms

In [12]:
feats = measurement.get_windows_waveforms(return_arr=True)
feats.shape

(4, 400, 12)

In [49]:
feats = measurement.get_windows_waveforms(return_arr=False)
ordered_dict_to_dict(feats)

{'ecg_1': OrderedDict([('window_0',
               array([-4.100e-02, -3.900e-02, -1.800e-02, -1.300e-02, -2.000e-02,
                      -1.000e-02, -3.400e-02, -4.700e-02, -1.900e-02, -3.000e-02,
                      -4.600e-02, -2.800e-02, -1.900e-02, -5.000e-02, -3.700e-02,
                      -2.200e-02, -2.800e-02, -2.000e-02, -5.400e-02, -7.200e-02,
                      -6.900e-02, -7.200e-02, -7.700e-02, -7.300e-02, -6.800e-02,
                      -4.300e-02, -1.400e-02, -2.400e-02,  7.000e-03,  3.600e-02,
                       5.100e-02,  4.800e-02,  3.800e-02,  3.400e-02,  0.000e+00,
                      -3.900e-02, -6.500e-02, -5.900e-02, -6.800e-02, -7.000e-02,
                      -8.300e-02, -9.000e-02, -1.000e-01, -1.080e-01, -1.150e-01,
                       4.400e-02,  8.040e-01,  1.038e+00,  2.800e-01, -8.100e-02,
                      -3.500e-02, -1.200e-02, -9.000e-03, -2.400e-02,  0.000e+00,
                       1.000e-03,  8.000e-03,  2.000e-03,  6.0

## Features

In [23]:
feats = measurement.get_windows_features(return_arr=True)
feats.shape

(4, 696)

In [14]:
feats = measurement.get_windows_features(return_arr=False)
ordered_dict_to_dict(feats)

{'ecg_1': {'window_0': {'window_0_basic_features__mean': 0.0182625,
   'window_0_basic_features__std': 0.16966634490596536,
   'window_0_basic_features__median': -0.034,
   'window_0_basic_features__skewness': 3.788877566567407,
   'window_0_basic_features__kurtosis': 17.72528550856611,
   'window_0_agg_beat_features__window_0_agg_beat_basic_features__mean': 0.01669092868006964,
   'window_0_agg_beat_features__window_0_agg_beat_basic_features__std': 0.14874292864775582,
   'window_0_agg_beat_features__window_0_agg_beat_basic_features__median': -0.030468801446316553,
   'window_0_agg_beat_features__window_0_agg_beat_basic_features__skewness': 2.4809067634439836,
   'window_0_agg_beat_features__window_0_agg_beat_basic_features__kurtosis': 7.20221689339988,
   'window_0_agg_beat_features__window_0_agg_beat_crit_points__p_onset_loc': 5,
   'window_0_agg_beat_features__window_0_agg_beat_crit_points__p_onset_time': 0.033536250000000004,
   'window_0_agg_beat_features__window_0_agg_beat_crit_

---
# **Beats**

## Waveforms

In [52]:
feats = measurement.get_beats_waveforms(return_arr=True)
feats.shape


(12, 100, 12)

In [53]:
feats = measurement.get_beats_waveforms(return_arr=False)
ordered_dict_to_dict(feats)

{'ecg_1': OrderedDict([('beat_0',
               array([-0.0633699 , -0.06652158, -0.06871391, -0.07037988, -0.07271498,
                      -0.07301787, -0.07177795, -0.06648476, -0.05637457, -0.04173122,
                      -0.0290799 , -0.02104746, -0.0147369 , -0.00209416,  0.01426636,
                       0.0306749 ,  0.04161666,  0.04304616,  0.04222445,  0.03761691,
                       0.03061948,  0.02006048,  0.00139022, -0.02156171, -0.04413863,
                      -0.06027232, -0.06708242, -0.07053596, -0.07308512, -0.07683803,
                      -0.08205061, -0.08830108, -0.09482615, -0.10136222, -0.1076036 ,
                      -0.11456437, -0.11780026, -0.09634181,  0.00183331,  0.23566349,
                       0.59754415,  0.73837697,  0.7618565 ,  0.45275828,  0.16025514,
                      -0.00813318, -0.05239793, -0.04425855, -0.03079721, -0.02867989,
                      -0.02917785, -0.02935153, -0.02317503, -0.01710941, -0.01340264,
         

## Features

In [54]:
feats = measurement.get_beats_features(return_arr=True)
feats.shape

(12, 600)

In [55]:
feats = measurement.get_beats_features(return_arr=False)
ordered_dict_to_dict(feats)

{'ecg_1': {'beat_0': {'beat_0_resampleWithInterp(100)_basic_features__mean': 0.03208884907986025,
   'beat_0_resampleWithInterp(100)_basic_features__std': 0.1557077030332044,
   'beat_0_resampleWithInterp(100)_basic_features__median': -0.012693017425480901,
   'beat_0_resampleWithInterp(100)_basic_features__skewness': 2.7068971816948033,
   'beat_0_resampleWithInterp(100)_basic_features__kurtosis': 8.656331593698802,
   'beat_0_resampleWithInterp(100)_crit_points__p_onset_loc': 13,
   'beat_0_resampleWithInterp(100)_crit_points__p_onset_time': 0.0871,
   'beat_0_resampleWithInterp(100)_crit_points__p_onset_val': -0.00209415727124833,
   'beat_0_resampleWithInterp(100)_crit_points__p_loc': 17,
   'beat_0_resampleWithInterp(100)_crit_points__p_time': 0.11389999999999999,
   'beat_0_resampleWithInterp(100)_crit_points__p_val': 0.043046164601311936,
   'beat_0_resampleWithInterp(100)_crit_points__p_offset_loc': 18,
   'beat_0_resampleWithInterp(100)_crit_points__p_offset_time': 0.1206,
   

---
# **Aggregated beat**

## Waveforms

In [56]:
feats = measurement.get_agg_beat_waveforms(return_arr=True)
feats.shape

(100, 12)

In [57]:
feats = measurement.get_agg_beat_waveforms(return_arr=False)
ordered_dict_to_dict(feats)

{'ecg_1': array([-0.08687695, -0.08933035, -0.09181556, -0.09433741, -0.09751211,
        -0.09711799, -0.09400826, -0.0867401 , -0.08041367, -0.0769153 ,
        -0.07629643, -0.07451368, -0.07075128, -0.06168526, -0.05073375,
        -0.03895574, -0.02696845, -0.01732213, -0.01059819, -0.00565659,
        -0.00364349, -0.00652906, -0.01427308, -0.02831746, -0.04695719,
        -0.06803735, -0.08514367, -0.09273226, -0.09675504, -0.09734435,
        -0.09803487, -0.09955882, -0.10322709, -0.10533625, -0.10655334,
        -0.10726309, -0.11215636, -0.1178325 , -0.1163573 , -0.05926279,
         0.10083433,  0.4169002 ,  0.67325898,  0.73541252,  0.52855023,
         0.231064  ,  0.01840374, -0.07212907, -0.06710832, -0.05514374,
        -0.04279711, -0.03944566, -0.03994205, -0.04104276, -0.04022952,
        -0.03816106, -0.03477994, -0.03016557, -0.0272117 , -0.02389888,
        -0.01989826, -0.01679391, -0.01303011, -0.00804347,  0.00224668,
         0.01658213,  0.03269955,  0.04497

## Features

In [58]:
feats = measurement.get_agg_beat_features(return_arr=True)
feats.shape

(600,)

In [59]:
feats = measurement.get_agg_beat_features(return_arr=False)
ordered_dict_to_dict(feats)

{'ecg_1': {'ecg_1_agg_beat_basic_features__mean': 0.014818211688089404,
  'ecg_1_agg_beat_basic_features__std': 0.1526245518832135,
  'ecg_1_agg_beat_basic_features__median': -0.03855840264855802,
  'ecg_1_agg_beat_basic_features__skewness': 2.5506198250147727,
  'ecg_1_agg_beat_basic_features__kurtosis': 7.617773572088446,
  'ecg_1_agg_beat_crit_points__p_onset_loc': 12,
  'ecg_1_agg_beat_crit_points__p_onset_time': 0.080784,
  'ecg_1_agg_beat_crit_points__p_onset_val': -0.070751280997554,
  'ecg_1_agg_beat_crit_points__p_loc': 20,
  'ecg_1_agg_beat_crit_points__p_time': 0.13463999999999998,
  'ecg_1_agg_beat_crit_points__p_val': -0.003643485679305214,
  'ecg_1_agg_beat_crit_points__p_offset_loc': 26,
  'ecg_1_agg_beat_crit_points__p_offset_time': 0.175032,
  'ecg_1_agg_beat_crit_points__p_offset_val': -0.08514367405884357,
  'ecg_1_agg_beat_crit_points__q_loc': 37,
  'ecg_1_agg_beat_crit_points__q_time': 0.249084,
  'ecg_1_agg_beat_crit_points__q_val': -0.1178325021312301,
  'ecg_1_a

---
# **RepresentationExtractor**

In [61]:
rep_extractor = PeriodicRepresentationExtractor(measurement)

rep_extractor.set_beats(**beats_params)
rep_extractor.set_windows(**windows_params)
rep_extractor.set_agg_beat()

reps = rep_extractor.get_representations(representation_types="all", return_arr=True)

for name, rep in reps.items():
    print(name, rep.shape)

  for colname, column in epochs[i].select_dtypes(include=["object"]).iteritems():
  for colname, column in epochs[i].select_dtypes(include=["object"]).iteritems():


whole_signal_waveforms (1000, 12)
whole_signal_features (697,)
windows_waveforms (4, 400, 12)
windows_features (4, 696)
beats_waveforms (12, 100, 12)
beats_features (12, 600)
agg_beat_waveforms (100, 12)
agg_beat_features (600,)


# TODO:
* Create representations datasets!

In [75]:
print(measurement.get_whole_signal_features().shape)
print(measurement.get_whole_signal_feature_names().shape)
measurement.get_whole_signal_feature_names()

(697,)
(697,)


array(['ecg_1__basic__mean', 'ecg_1__basic__std', 'ecg_1__basic__median',
       'ecg_1__basic__skewness', 'ecg_1__basic__kurtosis',
       'ecg_1__agg_beat__basic__mean', 'ecg_1__agg_beat__basic__std',
       'ecg_1__agg_beat__basic__median',
       'ecg_1__agg_beat__basic__skewness',
       'ecg_1__agg_beat__basic__kurtosis',
       'ecg_1__agg_beat__crit_points__p_onset_loc',
       'ecg_1__agg_beat__crit_points__p_onset_time',
       'ecg_1__agg_beat__crit_points__p_onset_val',
       'ecg_1__agg_beat__crit_points__p_loc',
       'ecg_1__agg_beat__crit_points__p_time',
       'ecg_1__agg_beat__crit_points__p_val',
       'ecg_1__agg_beat__crit_points__p_offset_loc',
       'ecg_1__agg_beat__crit_points__p_offset_time',
       'ecg_1__agg_beat__crit_points__p_offset_val',
       'ecg_1__agg_beat__crit_points__q_loc',
       'ecg_1__agg_beat__crit_points__q_time',
       'ecg_1__agg_beat__crit_points__q_val',
       'ecg_1__agg_beat__crit_points__r_onset_loc',
       'ecg_1__agg_beat

In [80]:
print(measurement.get_windows_features().shape)
print(measurement.get_windows_feature_names().shape)
measurement.get_windows_feature_names()[0]

(4, 696)
(4, 696)


array(['ecg_1_window_0__basic__mean', 'ecg_1_window_0__basic__std',
       'ecg_1_window_0__basic__median', 'ecg_1_window_0__basic__skewness',
       'ecg_1_window_0__basic__kurtosis',
       'ecg_1_window_0__agg_beat__basic__mean',
       'ecg_1_window_0__agg_beat__basic__std',
       'ecg_1_window_0__agg_beat__basic__median',
       'ecg_1_window_0__agg_beat__basic__skewness',
       'ecg_1_window_0__agg_beat__basic__kurtosis',
       'ecg_1_window_0__agg_beat__crit_points__p_onset_loc',
       'ecg_1_window_0__agg_beat__crit_points__p_onset_time',
       'ecg_1_window_0__agg_beat__crit_points__p_onset_val',
       'ecg_1_window_0__agg_beat__crit_points__p_loc',
       'ecg_1_window_0__agg_beat__crit_points__p_time',
       'ecg_1_window_0__agg_beat__crit_points__p_val',
       'ecg_1_window_0__agg_beat__crit_points__p_offset_loc',
       'ecg_1_window_0__agg_beat__crit_points__p_offset_time',
       'ecg_1_window_0__agg_beat__crit_points__p_offset_val',
       'ecg_1_window_0__agg_b

In [86]:
print(measurement.get_beats_features().shape)
print(measurement.get_beats_feature_names().shape)
measurement.get_beats_feature_names()[0]

(12, 600)
(12, 600)


array(['ecg_1_beat_0_interp(100)__basic__mean',
       'ecg_1_beat_0_interp(100)__basic__std',
       'ecg_1_beat_0_interp(100)__basic__median',
       'ecg_1_beat_0_interp(100)__basic__skewness',
       'ecg_1_beat_0_interp(100)__basic__kurtosis',
       'ecg_1_beat_0_interp(100)__crit_points__p_onset_loc',
       'ecg_1_beat_0_interp(100)__crit_points__p_onset_time',
       'ecg_1_beat_0_interp(100)__crit_points__p_onset_val',
       'ecg_1_beat_0_interp(100)__crit_points__p_loc',
       'ecg_1_beat_0_interp(100)__crit_points__p_time',
       'ecg_1_beat_0_interp(100)__crit_points__p_val',
       'ecg_1_beat_0_interp(100)__crit_points__p_offset_loc',
       'ecg_1_beat_0_interp(100)__crit_points__p_offset_time',
       'ecg_1_beat_0_interp(100)__crit_points__p_offset_val',
       'ecg_1_beat_0_interp(100)__crit_points__q_loc',
       'ecg_1_beat_0_interp(100)__crit_points__q_time',
       'ecg_1_beat_0_interp(100)__crit_points__q_val',
       'ecg_1_beat_0_interp(100)__crit_points__r

In [89]:
print(measurement.get_agg_beat_features().shape)
print(measurement.get_agg_beat_feature_names().shape)
measurement.get_agg_beat_feature_names()

(600,)
(600,)


array(['ecg_1_agg_beat__basic__mean', 'ecg_1_agg_beat__basic__std',
       'ecg_1_agg_beat__basic__median', 'ecg_1_agg_beat__basic__skewness',
       'ecg_1_agg_beat__basic__kurtosis',
       'ecg_1_agg_beat__crit_points__p_onset_loc',
       'ecg_1_agg_beat__crit_points__p_onset_time',
       'ecg_1_agg_beat__crit_points__p_onset_val',
       'ecg_1_agg_beat__crit_points__p_loc',
       'ecg_1_agg_beat__crit_points__p_time',
       'ecg_1_agg_beat__crit_points__p_val',
       'ecg_1_agg_beat__crit_points__p_offset_loc',
       'ecg_1_agg_beat__crit_points__p_offset_time',
       'ecg_1_agg_beat__crit_points__p_offset_val',
       'ecg_1_agg_beat__crit_points__q_loc',
       'ecg_1_agg_beat__crit_points__q_time',
       'ecg_1_agg_beat__crit_points__q_val',
       'ecg_1_agg_beat__crit_points__r_onset_loc',
       'ecg_1_agg_beat__crit_points__r_onset_time',
       'ecg_1_agg_beat__crit_points__r_onset_val',
       'ecg_1_agg_beat__crit_points__r_loc',
       'ecg_1_agg_beat__crit_poin