##### Explore ECG waveform characteristics and relationships with EHR measurements in MIMIC-IV cohort with emergency attendance and hospitalisation

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from matplotlib.patches import Patch
from matplotlib.dates import DateFormatter
from datetime import timedelta, datetime
from tqdm import tqdm
import numpy as np
from scipy import stats, special
from tableone import TableOne

import os
import json
import pprint
import missingno as msno
from statannotations.Annotator import Annotator
import warnings

pd.set_option('display.max_rows', None)

#### Load ECG machine measurements and MIMIC-IV-ED cohort

In [2]:
mimic_path = '../../data/MIMIC-IV/'
extracted_data_path = '../outputs/ext_data/'

ed_pts = pd.read_csv(os.path.join(extracted_data_path, 'ehr_static.csv'))
lab_measures = pd.read_csv(os.path.join(extracted_data_path, 'events_ts.csv'))
ecg_measures = pd.read_csv(os.path.join(mimic_path, 'mimic-iv-ecg/1.0/machine_measurements.csv'))
ecg_dd = pd.read_csv(os.path.join(mimic_path, 'mimic-iv-ecg/1.0/machine_measurements_data_dictionary.csv'))

  ecg_measures = pd.read_csv(os.path.join(mimic_path, 'mimic-iv-ecg/1.0/machine_measurements.csv'))


In [3]:
ecg_measures.dtypes

subject_id      int64
study_id        int64
cart_id         int64
ecg_time       object
report_0       object
report_1       object
report_2       object
report_3       object
report_4       object
report_5       object
report_6       object
report_7       object
report_8       object
report_9       object
report_10      object
report_11      object
report_12      object
report_13      object
report_14      object
report_15      object
report_16      object
report_17      object
bandwidth      object
filtering      object
rr_interval     int64
p_onset         int64
p_end           int64
qrs_onset       int64
qrs_end         int64
t_end           int64
p_axis          int64
qrs_axis        int64
t_axis          int64
dtype: object

In [5]:
ed_pts.columns

Index(['subject_id', 'gender', 'dod', 'anchor_age', 'yob', 'hadm_id',
       'admittime', 'dischtime', 'deathtime', 'admission_location',
       ...
       'n_presc_vancomycin', 'n_presc_vitamin_d', 'total_proc_count',
       'pon_nutrition', 'pon_respiratory', 'pon_radiology', 'pon_cardiology',
       'pon_tpn', 'pon_hemodialysis', 'pon_neurology'],
      dtype='object', length=211)

In [8]:
print(ed_pts.subject_id.nunique(), ed_pts.shape)
print(lab_measures.subject_id.nunique(), lab_measures.shape)

48531 (48531, 211)
48531 (6683839, 7)


In [15]:
ed_pts.isnull().sum()

subject_id                                 0
gender                                     0
dod                                    31579
anchor_age                                 0
yob                                        0
hadm_id                                    0
admittime                                  0
dischtime                                  0
deathtime                              44517
admission_location                         0
discharge_location                         0
insurance                                  0
marital_status                             0
race                                       0
edregtime                                  0
edouttime                                  0
los_days                                   0
ext_stay_7                                 0
in_hosp_death                              0
non_home_discharge                         0
race_group                                 0
intime                                 38138
outtime   

In [17]:
ecg_pts = ed_pts.merge(ecg_measures, on=['subject_id'], how='left')
ecg_pts['ecg_time'] = pd.to_datetime(ecg_pts['ecg_time'])
ecg_pts['edregtime'] = pd.to_datetime(ecg_pts['edregtime'])
ecg_pts['dischtime'] = pd.to_datetime(ecg_pts['dischtime'])
ecg_pts = ecg_pts.sort_values(by=['subject_id', 'ecg_time'])
ecg_pts = ecg_pts[ecg_pts['ecg_time'].notna()]
ecg_pts = ecg_pts[(ecg_pts['ecg_time'] >= ecg_pts['edregtime'])&(ecg_pts['ecg_time'] <= ecg_pts['dischtime'])]
ecg_pts = ecg_pts.drop_duplicates(subset=['subject_id'], keep='first')
print(ecg_pts.subject_id.nunique(), ecg_pts.shape)

20851 (20851, 243)


In [21]:
ecg_dd

Unnamed: 0,Variable,Description
0,subject_id,An identifier for the subject which aligns acr...
1,study_id,An identifier for the study which the diagnost...
2,cart_id,An identifier specific to the ECG cart used to...
3,ecg_time,The date that the diagnostic ECG was collected on
4,report_#,A text based cardiology report generated by th...
5,bandwidth,The bandwidth of the ECG machine
6,filtering,An indicator for the filter setting
7,rr_interval,Time between successive R-waves (msec)
8,p_onset,Time at the onset of the P-wave (msec)
9,p_end,Time at the end of the P-wave (msec)


In [23]:
ecg_measures.bandwidth.value_counts()

bandwidth
0.005-150 Hz     637197
0.0005-150 Hz    111330
0.05-150 Hz       51508
Name: count, dtype: int64

In [25]:
ecg_measures.filtering.value_counts()

filtering
60 Hz notch Baseline filter    677415
<not specified>                107398
Baseline filter                 12505
50 Hz notch Baseline filter      2717
Name: count, dtype: int64

In [33]:
ecg_measures.report_0.value_counts()

report_0
Sinus rhythm                                                                                                                               317278
Sinus rhythm.                                                                                                                               59739
Sinus bradycardia                                                                                                                           58924
Sinus tachycardia                                                                                                                           40528
Atrial fibrillation                                                                                                                         31961
Sinus tachycardia.                                                                                                                          13252
Sinus rhythm with borderline 1st degree A-V block                                                                  

In [32]:
ecg_measures.report_5.value_counts()

report_5
Abnormal ECG                                                                                                         80439
Borderline ECG                                                                                                       10640
Low QRS voltages in precordial leads                                                                                  3135
Low QRS voltages in limb leads                                                                                        1972
Lateral ST-T changes may be due to myocardial ischemia                                                                1758
Lateral ST-T changes may be due to hypertrophy and/or ischemia                                                        1484
Lateral ST-T changes are probably due to ventricular hypertrophy                                                      1140
Inferior/lateral ST-T changes may be due to hypertrophy and/or ischemia                                                911
Left ve