In [1]:
import numpy as np
import pandas as pd
import sys
import wfdb
import matplotlib.pyplot as plt
import scipy
from collections import Counter
%matplotlib notebook

## challenge data

In [19]:
fname = 'A00001'

In [20]:
record = wfdb.rdsamp('data/training2017/{}'.format(fname))

In [13]:
record

(array([[ 0.056],
        [ 0.073],
        [ 0.085],
        ...,
        [-0.064],
        [-0.036],
        [-0.02 ]]),
 {'comments': [],
  'fs': 300,
  'n_sig': 1,
  'sig_len': 18000,
  'sig_name': ['ECG'],
  'units': ['mV']})

In [27]:
record[1]

{'comments': [],
 'fs': 300,
 'n_sig': 1,
 'sig_len': 2714,
 'sig_name': ['ECG'],
 'units': ['mV']}

In [11]:
record[0]

array([[-0.127],
       [-0.162],
       [-0.197],
       ...,
       [-0.018],
       [-0.022],
       [-0.021]])

In [14]:
plt.plot(record[0]);

<IPython.core.display.Javascript object>

In [24]:
record2 = wfdb.rdrecord('training2017/{}'.format(fname))

In [39]:
idx = record2.adc()/1000==record[0]

In [43]:
foo = np.arange(9000)

In [50]:
foo[~idx.reshape(-1)]

array([], dtype=int32)

### sanity check

In [33]:
lenLst=[]
with open('data/training2017/RECORDS', 'r') as f:
    for fName in f:
        record = wfdb.rdsamp('data/training2017/{}'.format(fName.strip()))
        l = len(record[0])
        record = record[1]
        if record['fs']!=300:
            print(record['fs'])
        if record['n_sig']!=1:
            print(record['n_sig'])
        if record['sig_len']!=l:
            print(record['sig_len'], l)
        lenLst.append(record['sig_len'])
        if record['sig_name']!=['ECG']:
            print(record['sig_name'])
        if record['units']!=['mV']:
            print(record['units'])

In [34]:
len(lenLst)

8528

In [35]:
min(lenLst)

2714

In [36]:
max(lenLst)

18286

In [50]:
plt.hist(varLst);

<IPython.core.display.Javascript object>

In [51]:
varLst

[5570,
 6102,
 11520,
 8694,
 14936,
 17800,
 6542,
 6982,
 14054,
 8141,
 17574,
 3002,
 18170,
 5048,
 7028,
 3760,
 5144,
 17020,
 3028,
 6166,
 3238,
 5866,
 4568,
 6174,
 3306,
 8802,
 5140,
 4072,
 5288,
 13110,
 7240,
 5878,
 5288,
 6054,
 17990,
 12696,
 3742,
 8606,
 8158,
 6878,
 6206,
 4562,
 5478,
 8722,
 6986,
 6016,
 3106,
 5826,
 4414,
 6688,
 17130,
 3898,
 4038,
 3020,
 6344,
 6300,
 15492,
 8162,
 8686,
 7064,
 4492,
 3844,
 3264,
 8326,
 3972,
 12636,
 4622,
 5264,
 5012,
 5544,
 7586,
 7256,
 6230,
 4198,
 3998,
 3846,
 12310,
 9592,
 7800,
 8636,
 8806,
 17390,
 5322,
 3776,
 3044,
 5056,
 2996,
 4220,
 3536,
 5718,
 8172,
 4702,
 5472,
 4130,
 7682,
 6150,
 3858,
 17992,
 5810,
 5520,
 11648,
 3494,
 7934,
 4972,
 9006,
 6716,
 5946,
 15266,
 3964,
 5912,
 4692,
 9508,
 7056,
 17862,
 6140,
 9020,
 3978,
 8288,
 4468,
 10420,
 4028,
 6114,
 4580,
 3256,
 5074,
 7634,
 3560,
 3690,
 5702,
 3682,
 2868,
 10052,
 3038,
 7528,
 4116,
 16714,
 4448,
 5466,
 3846,
 3178

In [46]:
ctr=ctr2=0
varLst=[]
for l in lenLst:
    if l ==9000:
        ctr+=1
    elif l == 18000:
        ctr2+=1
    else:
        varLst.append(l)

In [49]:
(ctr,ctr2)

(5977, 809)

In [52]:
len(varLst)

1742

## MIT vent data

In [34]:
record = wfdb.rdsamp('data/418')

In [5]:
record

(array([[-0.64 , -0.07 ],
        [-0.91 , -0.065],
        [-0.935, -0.06 ],
        ...,
        [-0.495, -0.245],
        [-0.58 , -0.295],
        [-0.68 , -0.325]]),
 {'comments': [],
  'fs': 250,
  'n_sig': 2,
  'sig_len': 525000,
  'sig_name': ['ECG', 'ECG'],
  'units': ['mV', 'mV']})

In [35]:
plt.figure(figsize=(15,8))
plt.plot(record[0][0:2000,:]);

<IPython.core.display.Javascript object>

In [41]:
ann = wfdb.rdann('data/418','atr')

In [42]:
vars(ann)

{'ann_len': 121,
 'aux_note': ['(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00',
  '(VFL\x00',
  '(N\x00

In [26]:
len(ann.symbol)

121

In [11]:
wfdb.plot_items(record[0])

<IPython.core.display.Javascript object>

In [15]:
record = wfdb.rdrecord('data/418')

In [16]:
wfdb.plot_wfdb(record,ann,figsize=(10,4))

<IPython.core.display.Javascript object>

In [14]:
ann.sample

array([    18,  99624, 101499, 133092, 134038, 135775, 136628, 153057,
       154115, 154942, 156291, 159442, 160516, 169192, 169807, 173054,
       173673, 174788, 175403, 176259, 177868, 190080, 191249, 191807,
       192695, 195631, 196794, 200211, 200634, 216788, 219038, 219961,
       224019, 225355, 226057, 227211, 229269, 231310, 232724, 234499,
       235538, 254230, 255365, 256019, 256884, 257249, 257980, 259557,
       261903, 262749, 263519, 269307, 270999, 271326, 271596, 272057,
       272384, 273673, 275846, 279576, 281384, 289384, 291211, 300480,
       301076, 301525, 301711, 302230, 302538, 302871, 303346, 303641,
       304634, 311442, 311807, 312096, 313480, 313826, 314173, 314493,
       316807, 317237, 317480, 318682, 319769, 327480, 329038, 329365,
       329749, 333826, 334211, 339769, 340019, 346634, 347038, 347403,
       347730, 357634, 358403, 362711, 363115, 367749, 368134, 368583,
       369211, 370153, 370557, 370971, 371653, 372041, 372692, 373076,
      

In [18]:
ann.num

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [26]:
ann.chan

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## MIT base data

In [44]:
fLst = wfdb.get_record_list('mitdb')

In [70]:
symCtr = Counter()
auxCtr = Counter()
for fName in fLst:
    ann = wfdb.rdann('data/mit/' + fName,'atr')
    symCtr+=Counter(ann.symbol)
    auxCtr+=Counter(map(lambda x: x.strip('\x00'), ann.aux_note))
print(symCtr, '\n', auxCtr)

Counter({'N': 75052, 'L': 8075, 'R': 7259, 'V': 7130, '/': 7028, 'A': 2546, '+': 1291, 'f': 982, 'F': 803, '~': 616, '!': 472, '"': 437, 'j': 229, 'x': 193, 'a': 150, '|': 132, 'E': 106, 'J': 83, 'Q': 33, 'e': 16, '[': 6, ']': 6, 'S': 2}) 
 Counter({'': 110919, '(N': 530, 'MISSB': 428, '(B': 221, '(AFIB': 107, '(PREX': 103, '(T': 83, '(VT': 61, '(P': 60, '(AFL': 45, '(NOD': 36, '(SVTA': 26, '(VFL': 6, 'TS': 6, '(BII': 5, '(IVR': 4, 'PSE': 3, '(AB': 3, '(SBR': 1})


In [69]:
'(AFIB\x00'.strip('\x00')

'(AFIB'

In [52]:
sum(symCtr.values())

112647

In [53]:
sum(auxCtr.values())

112647

In [67]:
lenLst=[]
sigLst=[]
for fName in fLst:
    record = wfdb.rdsamp('data/mit/' + fName)
    l = len(record[0])
    record = record[1]
    if record['fs']!=360:
        print(record['fs'])
    if record['n_sig']!=2:
        print(record['n_sig'])
    if record['sig_len']!=l:
        print(record['sig_len'], l)
    lenLst.append(record['sig_len'])
    sigLst.append(str(record['sig_name']))
    if record['units']!=['mV', 'mV']:
        print(record['units'])
lenCtr=Counter(lenLst)
sigCtr=Counter(sigLst)
print(lenCtr, '\n', sigCtr)

Counter({650000: 48}) 
 Counter({"['MLII', 'V1']": 40, "['MLII', 'V5']": 2, "['V5', 'V2']": 2, "['MLII', 'V2']": 2, "['V5', 'MLII']": 1, "['MLII', 'V4']": 1})


In [65]:
str(['MLII', 'V5'])

"['MLII', 'V5']"

In [62]:
foo.add(1)

AttributeError: 'Counter' object has no attribute 'add'

# downloading

In [28]:
wfdb.get_dbs()

[['adfecgdb', 'Abdominal and Direct Fetal ECG Database'],
 ['aftdb', 'AF Termination Challenge Database'],
 ['ahadb', 'AHA Database [sample excluded record]'],
 ['aami-ec13', 'ANSI/AAMI EC13 Test Waveforms'],
 ['apnea-ecg', 'Apnea-ECG Database'],
 ['chfdb', 'BIDMC Congestive Heart Failure Database'],
 ['bpssrat', 'Blood Pressure in Salt-Sensitive Dahl Rats'],
 ['capslpdb', 'CAP Sleep Database'],
 ['crisdb', 'CAST RR Interval Sub-Study Database'],
 ['challenge/2009/test-set-a', 'Challenge 2009 Test Set A'],
 ['challenge/2009/test-set-b', 'Challenge 2009 Test Set B'],
 ['challenge/2010/set-a', 'Challenge 2010 Training Set A'],
 ['challenge/2010/set-b', 'Challenge 2010 Test Set B'],
 ['challenge/2010/set-c', 'Challenge 2010 Test Set C'],
 ['challenge/2011/set-a', 'Challenge 2011 Training Set A'],
 ['challenge/2011/set-b', 'Challenge 2011 Test Set B'],
 ['challenge/2011/sim', 'Challenge 2011 Pilot Set'],
 ['challenge/2013/set-a', 'Challenge 2013 Training Set A'],
 ['challenge/2013/set-b', 

In [75]:
recLst=wfdb.get_record_list('afdb')
recLst

['00735',
 '03665',
 '04015',
 '04043',
 '04048',
 '04126',
 '04746',
 '04908',
 '04936',
 '05091',
 '05121',
 '05261',
 '06426',
 '06453',
 '06995',
 '07162',
 '07859',
 '07879',
 '07910',
 '08215',
 '08219',
 '08378',
 '08405',
 '08434',
 '08455']

In [76]:
recLst[:5]

['00735', '03665', '04015', '04043', '04048']

In [86]:
wfdb.dl_database('afdb', 'C:\\Users\\pierrecurie\\Documents\\galvanize\\capstone\\mitaf', records=recLst[2:6])

Downloading files...
Finished downloading files


In [34]:
pwd

'C:\\Users\\pierrecurie\\Documents\\galvanize\\capstone'

In [87]:
record = wfdb.rdrecord('data/mitaf/04015')
ann = wfdb.rdann('data/mitaf/04015','atr')
wfdb.plot_wfdb(record,ann,figsize=(10,4))

<IPython.core.display.Javascript object>

In [88]:
vars(ann)

{'ann_len': 15,
 'aux_note': ['(N',
  '(AFIB',
  '(N',
  '(AFIB',
  '(N',
  '(AFIB',
  '(N',
  '(AFIB',
  '(N',
  '(AFIB',
  '(N',
  '(AFIB',
  '(N',
  '(AFIB',
  '(N'],
 'chan': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 'contained_labels': None,
 'custom_labels': None,
 'description': None,
 'extension': 'atr',
 'fs': 250,
 'label_store': None,
 'num': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 'record_name': '04015',
 'sample': array([     30,  102584,  119604,  121773,  122194,  133348,  166857,
        1096245, 1098054, 1135296, 1139595, 1422436, 1423548, 1459277,
        1460416]),
 'subtype': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
 'symbol': ['+',
  '+',
  '+',
  '+',
  '+',
  '+',
  '+',
  '+',
  '+',
  '+',
  '+',
  '+',
  '+',
  '+',
  '+']}

In [11]:
len(ann.sample)

2274

In [12]:
(ann.subtype!=0).sum()

1

In [33]:
mask=(ann.subtype!=0)
print(np.array(ann.symbol)[mask], np.array(ann.aux_note)[mask], ann.sample[mask])

['V'] [''] [546792]


In [31]:
ctr = Counter(ann.symbol)
ctr

Counter({'+': 1, 'A': 33, 'N': 2239, 'V': 1})

In [32]:
ctr2 = Counter(ann.aux_note)
ctr2

Counter({'': 2273, '(N\x00': 1})

In [47]:
ctr3=Counter()
ctr3+=ctr2
ctr3

Counter({'': 9092, '(N\x00': 4})

### xqrs

In [15]:
from wfdb import processing

In [21]:
xqrs = processing.XQRS(record[0][:,0], record[1]['fs'])

In [22]:
xqrs.detect()

Learning initial signal parameters...
Failed to find 8 beats during learning.
Initializing using default parameters
Running QRS detection...
QRS detection complete.


In [23]:
wfdb.plot_items(record[0][:,0], ann_samp=[xqrs.qrs_inds])

<IPython.core.display.Javascript object>

In [42]:
wfdb.plot_items(record[0][:,0], ann_samp=[np.array(range(0,200,10))])

<IPython.core.display.Javascript object>

In [52]:
wfdb.plot_items(record[0][:,0], ann_samp=[xqrs.peak_inds_i])

<IPython.core.display.Javascript object>

In [55]:
xqrs.qrs_inds

array([ 127,  342,  560,  797, 1040, 1271, 1510, 1754, 1995, 2229, 2470,
       2713, 2951, 3189, 3432, 3679, 3914, 4139, 4370, 4598, 4826, 5044,
       5259, 5484, 5711, 5942, 6164, 6385, 6607, 6827, 7039, 7258, 7481,
       7697, 7909, 8128, 8355, 8586, 8810])

In [57]:
idx = xqrs.qrs_inds
idx2 = idx[1:]-idx[:-1]
idx2

array([215, 218, 237, 243, 231, 239, 244, 241, 234, 241, 243, 238, 238,
       243, 247, 235, 225, 231, 228, 228, 218, 215, 225, 227, 231, 222,
       221, 222, 220, 212, 219, 223, 216, 212, 219, 227, 231, 224])

In [58]:
idx2.mean()

228.5

In [61]:
record[0][idx].reshape(-1)

array([0.593, 0.865, 0.735, 0.93 , 0.881, 0.79 , 0.933, 0.925, 0.874,
       0.464, 0.693, 0.901, 0.851, 0.844, 0.843, 0.869, 0.781, 0.738,
       0.773, 0.803, 0.882, 0.762, 0.805, 0.812, 0.946, 0.813, 0.764,
       0.861, 0.883, 0.8  , 0.811, 0.907, 0.867, 0.783, 0.805, 0.864,
       0.829, 0.942, 0.79 ])

In [66]:
xqrs = processing.XQRS(-record[0][:,0], record[1]['fs'])
xqrs.detect()
idx3 = xqrs.qrs_inds

Learning initial signal parameters...
Failed to find 8 beats during learning.
Initializing using default parameters
Running QRS detection...
QRS detection complete.


In [68]:
idx-idx3

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
#insensitive to upside down signal

In [70]:
processing.xqrs_detect(record[0][:,0], record[1]['fs'])-idx

Learning initial signal parameters...
Failed to find 8 beats during learning.
Initializing using default parameters
Running QRS detection...
QRS detection complete.


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

# Misc

In [62]:
from scipy.signal import stft, istft

In [63]:
sig = np.random.randint(0,10,16)

In [70]:
plt.plot(sig)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1f72db2def0>]

### IO

In [225]:
foo=np.random.randint(0,10,[20000,2000])

In [226]:
foo

array([[2, 9, 2, ..., 7, 2, 8],
       [8, 1, 2, ..., 1, 8, 9],
       [8, 9, 4, ..., 8, 8, 6],
       ...,
       [9, 6, 1, ..., 4, 9, 6],
       [9, 0, 9, ..., 7, 3, 9],
       [8, 5, 6, ..., 4, 3, 3]])

In [227]:
np.save('np no pck', foo, allow_pickle=False, fix_imports=False)
np.save('np pck', foo, allow_pickle=True, fix_imports=False)

In [219]:
import pickle

In [228]:
with open('pure pck', 'wb') as f:
    pickle.dump(foo,f)

In [230]:
bar=np.load('pure pck')

In [234]:
np.abs(foo-bar).sum()

0

### STFT

In [196]:
_, _, fft = stft(sig, window='boxcar', noverlap=4, nperseg=8,boundary=None)

In [197]:
fft

array([[ 3.125     +0.j        ,  2.875     +0.j        ,
         3.75      +0.j        ],
       [ 0.21338835-0.24371843j, -0.1982233 -0.21966991j,
        -0.76516504+0.10983496j],
       [-0.75      +0.625j     , -0.125     +1.j        ,
         1.125     +0.875j     ],
       [ 0.03661165-0.99371843j, -0.5517767 +1.28033009j,
        -0.23483496-0.64016504j],
       [-0.125     +0.j        , -0.125     +0.j        ,
         0.        +0.j        ]])

In [198]:
plt.plot(np.abs(fft));

<IPython.core.display.Javascript object>

In [205]:
_, sig2=istft(fft, window='boxcar', noverlap=4, nperseg=8,boundary=None)

In [206]:
sig2

array([2.00000000e+00, 4.00000000e+00, 3.00000000e+00, 6.00000000e+00,
       1.00000000e+00, 5.55111512e-17, 6.00000000e+00, 3.00000000e+00,
       4.00000000e+00, 2.00000000e+00, 0.00000000e+00, 7.00000000e+00,
       8.00000000e+00, 2.00000000e+00, 3.00000000e+00, 4.00000000e+00])

In [207]:
sig

array([2, 4, 3, 6, 1, 0, 6, 3, 4, 2, 0, 7, 8, 2, 3, 4])

In [208]:
sig-sig2

array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00, -5.55111512e-17,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00])

In [1]:
import sklearn

In [2]:
sklearn.linear_model

AttributeError: module 'sklearn' has no attribute 'linear_model'

In [3]:
from sklearn import linear_model

In [4]:
linear_model

<module 'sklearn.linear_model' from 'C:\\Utility, writing software\\anaconda\\lib\\site-packages\\sklearn\\linear_model\\__init__.py'>