#### source
- https://physionet.org/content/mitdb/1.0.0/

#### AAMI standard: 
- https://www.researchgate.net/figure/Mapping-the-MIT-BIH-Arrhythmia-types-to-the-AAMI-Classes_tbl1_267411759

In [50]:
import os
import wfdb as wf
import numpy as np
import pandas as pd
from glob import glob
from scipy import signal as ss
from matplotlib import pyplot as plt
from biosppy.signals import ecg

In [51]:
def get_records():
    """ Get paths for data in data/mit/ directory """
    # There are 3 files for each record
    # *.atr is one of them
    path_source = 'C:/Users/mbl-mavoly/py/ECGsignal/mitdb/*.atr'
    paths = glob(path_source)

    # Get rid of the extension
    paths = [path[:-4] for path in paths]
    paths.sort()

    return paths

In [52]:
# Check number of patients
records = np.transpose(get_records())
for i,val in enumerate(records): 
    print("No."+str(i+1)+" => "+val)
    
print('** Total record :'+str(len(records))+" **")

No.1 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\100
No.2 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\101
No.3 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\102
No.4 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\103
No.5 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\104
No.6 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\105
No.7 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\106
No.8 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\107
No.9 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\108
No.10 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\109
No.11 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\111
No.12 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\112
No.13 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\113
No.14 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\114
No.15 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\115
No.16 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\116
No.17 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\117
No.18 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\118
No.19 => C:/Users/mbl-mavoly/py/ECGsignal/mitdb\119
No.20 => C:/Users/mbl

In [53]:
# Show Patient meta data
"""
fs = Sampling rate
sig_len = total number of samples
n_sig: total number of channel
base_date: additional info
base_time: additional info
units: Channel units
sig_name: Channel name
comments: additional info
"""
for path in records[:]:
    print('Loading file:', path)

    # Read in the data
    record = wf.rdsamp(path)
    
    # patient ECG data samples are in record[0]
    # patient meta data are in record[1]
    print(record[1]) # print all meta data
    for key, value in record[1].items() :
        print (key+": "+str(value))
    print()

Loading file: C:/Users/mbl-mavoly/py/ECGsignal/mitdb\100
{'fs': 360, 'sig_len': 650000, 'n_sig': 2, 'base_date': None, 'base_time': None, 'units': ['mV', 'mV'], 'sig_name': ['MLII', 'V5'], 'comments': ['69 M 1085 1629 x1', 'Aldomet, Inderal']}
fs: 360
sig_len: 650000
n_sig: 2
base_date: None
base_time: None
units: ['mV', 'mV']
sig_name: ['MLII', 'V5']
comments: ['69 M 1085 1629 x1', 'Aldomet, Inderal']

Loading file: C:/Users/mbl-mavoly/py/ECGsignal/mitdb\101
{'fs': 360, 'sig_len': 650000, 'n_sig': 2, 'base_date': None, 'base_time': None, 'units': ['mV', 'mV'], 'sig_name': ['MLII', 'V1'], 'comments': ['75 F 1011 654 x1', 'Diapres']}
fs: 360
sig_len: 650000
n_sig: 2
base_date: None
base_time: None
units: ['mV', 'mV']
sig_name: ['MLII', 'V1']
comments: ['75 F 1011 654 x1', 'Diapres']

Loading file: C:/Users/mbl-mavoly/py/ECGsignal/mitdb\102
{'fs': 360, 'sig_len': 650000, 'n_sig': 2, 'base_date': None, 'base_time': None, 'units': ['mV', 'mV'], 'sig_name': ['V5', 'V2'], 'comments': ['84 F 

{'fs': 360, 'sig_len': 650000, 'n_sig': 2, 'base_date': None, 'base_time': None, 'units': ['mV', 'mV'], 'sig_name': ['MLII', 'V2'], 'comments': ['69 M 950 654 x2', 'None']}
fs: 360
sig_len: 650000
n_sig: 2
base_date: None
base_time: None
units: ['mV', 'mV']
sig_name: ['MLII', 'V2']
comments: ['69 M 950 654 x2', 'None']

Loading file: C:/Users/mbl-mavoly/py/ECGsignal/mitdb\118
{'fs': 360, 'sig_len': 650000, 'n_sig': 2, 'base_date': None, 'base_time': None, 'units': ['mV', 'mV'], 'sig_name': ['MLII', 'V1'], 'comments': ['69 M 1456 653 x2', 'Digoxin, Norpace', 'The PVCs are multiform.']}
fs: 360
sig_len: 650000
n_sig: 2
base_date: None
base_time: None
units: ['mV', 'mV']
sig_name: ['MLII', 'V1']
comments: ['69 M 1456 653 x2', 'Digoxin, Norpace', 'The PVCs are multiform.']

Loading file: C:/Users/mbl-mavoly/py/ECGsignal/mitdb\119
{'fs': 360, 'sig_len': 650000, 'n_sig': 2, 'base_date': None, 'base_time': None, 'units': ['mV', 'mV'], 'sig_name': ['MLII', 'V1'], 'comments': ['51 F 1129 654 x2

{'fs': 360, 'sig_len': 650000, 'n_sig': 2, 'base_date': None, 'base_time': None, 'units': ['mV', 'mV'], 'sig_name': ['MLII', 'V1'], 'comments': ['61 M 2817 1629 x1', 'Digoxin', 'The PVCs are multiform and usually late-cycle, frequently resulting in', 'fusion PVCs.  The morphology of the fusion PVCs varies from almost normal', 'to almost identical to that of the PVCs.']}
fs: 360
sig_len: 650000
n_sig: 2
base_date: None
base_time: None
units: ['mV', 'mV']
sig_name: ['MLII', 'V1']
comments: ['61 M 2817 1629 x1', 'Digoxin', 'The PVCs are multiform and usually late-cycle, frequently resulting in', 'fusion PVCs.  The morphology of the fusion PVCs varies from almost normal', 'to almost identical to that of the PVCs.']

Loading file: C:/Users/mbl-mavoly/py/ECGsignal/mitdb\214
{'fs': 360, 'sig_len': 650000, 'n_sig': 2, 'base_date': None, 'base_time': None, 'units': ['mV', 'mV'], 'sig_name': ['MLII', 'V1'], 'comments': ['53 M 3189 1629 x1', 'Digoxin, Dilantin', 'The PVCs are multiform.  There ar

In [54]:
# MALE-FEMALE PATIENT VIEW AND AGE

ptn_id_male = list()
ptn_id_female = list()
ptn_age_val = []
for path in records[:]:
    ptn_id = path[-3:]
    record = wf.rdsamp(path)
    gender = str(record[1]['comments']).split(" ")[1]
    age = int(str(record[1]['comments']).split(" ")[0][2:])
    
    #print(gender)
    #print(age)
    
    if(gender == 'M'):
        ptn_id_male.append(ptn_id)
    elif(gender == 'F'):
        ptn_id_female.append(ptn_id)
    
    ptn_age_val.append(age)

print("Total Male patient: {}".format(len(ptn_id_male)))
print("Male patient id: {}".format(ptn_id_male))

print("")
print("Total Female patient: {}".format(len(ptn_id_female)))
print("female patient id: {}".format(ptn_id_female))

print("")
arr_age = np.array(ptn_age_val)
arr_age = arr_age[arr_age != -1] # remove unknown patient age
#print(arr_age)
print("Oldest Patient age: {}".format(np.max(arr_age)))
print("Youngest Patient age: {}".format(np.min(arr_age)))
print("Average Patient age: {}".format(np.mean(arr_age)))

Total Male patient: 26
Male patient id: ['100', '103', '107', '109', '112', '116', '117', '118', '122', '124', '200', '201', '202', '203', '205', '209', '210', '213', '214', '215', '217', '219', '221', '223', '230', '233']

Total female patient: 22
female patient id: ['101', '102', '104', '105', '106', '108', '111', '113', '114', '115', '119', '121', '123', '207', '208', '212', '220', '222', '228', '231', '232', '234']

Oldest Patient age: 89
Youngest Patient age: 23
Average Patient age: 63.67391304347826


In [55]:
# CHANNEL LIST
# Extract Channel name
temp_chname = list()
for path in records[:]:
    record = wf.rdsamp(path)
    for i in range(0,len(record[1]['sig_name'])):
        temp_chname.append(record[1]['sig_name'][1])
    
temp_chname = list(set(temp_chname)) #distinct
print("Channel List: ")
print(temp_chname)

Channel List: 
['V4', 'V5', 'V1', 'V2', 'MLII']


In [56]:
# Categorize patient based on ECG channel

channel_patient = list()
for i in range(0,len(temp_chname)):
    channel_patient.append(list())
    

totalChan = len(temp_chname);
for path in records[:]:
    record = wf.rdsamp(path)
    for i in range(0,totalChan):
        #print(str(record[1]['sig_name'][0])+' '+temp_chname[i])
        if(temp_chname[i] == record[1]['sig_name'][0]):
            channel_patient[i].append(path[-3:])
        if(temp_chname[i] == record[1]['sig_name'][1]):
            channel_patient[i].append(path[-3:])

total_check = 0;
for i in range(0,totalChan):
    print(temp_chname[i])
    print(channel_patient[i])
    print("Total Patient that have channel {}: {}".format(temp_chname[i],str(len(channel_patient[i]))))
    total_check += len(channel_patient[i])
    print()

print("Total patient :"+str(total_check/2)) # 1 patient have 2 channel
print("Total patient * Total Channel :"+str(total_check)) # 1 patient have 2 channel

V4
['124']
Total Patient that have channel V4: 1

V5
['100', '102', '104', '114', '123']
Total Patient that have channel V5: 5

V1
['101', '105', '106', '107', '108', '109', '111', '112', '113', '115', '116', '118', '119', '121', '122', '200', '201', '202', '203', '205', '207', '208', '209', '210', '212', '213', '214', '215', '217', '219', '220', '221', '222', '223', '228', '230', '231', '232', '233', '234']
Total Patient that have channel V1: 40

V2
['102', '103', '104', '117']
Total Patient that have channel V2: 4

MLII
['100', '101', '103', '105', '106', '107', '108', '109', '111', '112', '113', '114', '115', '116', '117', '118', '119', '121', '122', '123', '124', '200', '201', '202', '203', '205', '207', '208', '209', '210', '212', '213', '214', '215', '217', '219', '220', '221', '222', '223', '228', '230', '231', '232', '233', '234']
Total Patient that have channel MLII: 46

Total patient :48.0
Total patient * Total Channel :96


In [57]:
# categorize patient with channel divide between patient 100 and 200
channel_patient_100 = list() # patient with code 100-199
for i in range(0,len(temp_chname)):
    channel_patient_100 .append(list())
channel_patient_200 = list() # patient with code 200-299
for i in range(0,len(temp_chname)):
    channel_patient_200.append(list())
    

totalChan = len(temp_chname);
for path in records[:]:
    record = wf.rdsamp(path)
    for i in range(0,totalChan):
        if(int(path[-3:]) >= 200):
                        #print(str(record[1]['sig_name'][0])+' '+temp_chname[i])
            if(temp_chname[i] == record[1]['sig_name'][0]):
                channel_patient_200[i].append(path[-3:])
            if(temp_chname[i] == record[1]['sig_name'][1]):
                channel_patient_200[i].append(path[-3:])
        else:  
            #print(str(record[1]['sig_name'][0])+' '+temp_chname[i])
            if(temp_chname[i] == record[1]['sig_name'][0]):
                channel_patient_100[i].append(path[-3:])
            if(temp_chname[i] == record[1]['sig_name'][1]):
                channel_patient_100[i].append(path[-3:])

total_check = 0;
for i in range(0,totalChan):
    print("#### "+temp_chname[i])
    print("= id 100 = | Count patient in this Channel: "+str(len(channel_patient_100[i])))
    print(channel_patient_100[i])
    print()
    total_check += len(channel_patient_100[i])
    print("= id 200 = | Count patient in this Channel: "+str(len(channel_patient_200[i])))
    print(channel_patient_200[i])
    
    total_check += len(channel_patient_200[i])
    print()
print("Total patient : "+str(total_check/2)) # 1 patient have 2 channel
print("Total patient x Total Channel :"+str(total_check)) # 1 patient have 2 channel


#### V4
= id 100 = | Count patient in this Channel: 1
['124']

= id 200 = | Count patient in this Channel: 0
[]

#### V5
= id 100 = | Count patient in this Channel: 5
['100', '102', '104', '114', '123']

= id 200 = | Count patient in this Channel: 0
[]

#### V1
= id 100 = | Count patient in this Channel: 15
['101', '105', '106', '107', '108', '109', '111', '112', '113', '115', '116', '118', '119', '121', '122']

= id 200 = | Count patient in this Channel: 25
['200', '201', '202', '203', '205', '207', '208', '209', '210', '212', '213', '214', '215', '217', '219', '220', '221', '222', '223', '228', '230', '231', '232', '233', '234']

#### V2
= id 100 = | Count patient in this Channel: 4
['102', '103', '104', '117']

= id 200 = | Count patient in this Channel: 0
[]

#### MLII
= id 100 = | Count patient in this Channel: 21
['100', '101', '103', '105', '106', '107', '108', '109', '111', '112', '113', '114', '115', '116', '117', '118', '119', '121', '122', '123', '124']

= id 200 = | Count p