In [172]:
import vcg
import tools
import perturb_ecg
import numpy as np
from matplotlib import pyplot as plt
from mpl_toolkits import mplot3d
import tqdm
import pandas as pd
import random
import os
import multiprocessing as mp

In [173]:
print(mp.cpu_count())

80


In [174]:
import seaborn as sns
sns.set()


SMALL_SIZE = 15
MEDIUM_SIZE = 20
BIGGER_SIZE = 25

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [175]:
import copy

In [176]:
dataset = 'ecg_large'
if not os.path.exists(dataset):
    os.makedirs(f'./{dataset}/train')
    os.makedirs(f'./{dataset}/val/train')
    os.makedirs(f'./{dataset}/val/test')

fs = 496 # frequency
duration = 100 # seconds to solve
save_duration = 10 # seconds to save
dur = save_duration*fs # number of samples to save
upper_hr = 220
lower_hr = 20
hr_step = 0.1
draw_num = 2
types = 8


keys = pd.DataFrame(np.random.randint(0, (duration-save_duration)*fs,size=(int((upper_hr-lower_hr)/hr_step), draw_num*types)))
keys.columns = ['ind0', 'ind1', 'ind2', 'ind3', 'ind4', 'ind5', 'ind6', 'ind7', 'ind8', 'ind9', 
               'ind10', 'ind11' , 'ind12', 'ind13', 'ind14', 'ind15']

keys['hr'] = range(lower_hr*10, upper_hr*10)
keysc = keys.sample(frac = 0.2)
keysc['train'] = int(0)
val_keys = keysc.sample(frac = 0.2)
val_keys['val_train'] = int(0)
key = keysc.merge(val_keys, how = 'outer')
df_key = keys.merge(key, how = 'outer')
df_key = df_key.fillna(1)
df_key['val_train'].value_counts()
df_key['train'] = df_key['train'].astype(int)
df_key['val_train'] = df_key['val_train'].astype(int)
df_key

Unnamed: 0,ind0,ind1,ind2,ind3,ind4,ind5,ind6,ind7,ind8,ind9,ind10,ind11,ind12,ind13,ind14,ind15
0,25323,24085,34270,28429,2967,20641,23961,30107,41482,1758,18719,34507,28645,5624,15902,30113
1,9688,25201,23687,14666,15383,12927,6473,6736,38124,10670,40728,4615,32546,37326,43054,28053
2,8318,20795,17043,22710,37093,43266,41554,4091,35002,4194,30538,4811,17768,2893,24929,8963
3,8402,30925,1331,18485,592,1242,18949,29625,26285,2890,37951,37474,8210,40064,13405,6145
4,23193,5627,40659,32412,967,39046,6195,42834,11163,30437,15928,17296,28652,11020,18611,32082
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,36894,20898,19968,37546,24655,37307,11349,3123,9254,24462,41195,12619,864,20725,33984,20825
1996,37367,20348,34958,33070,24363,8951,4764,17624,38164,30628,21226,7331,32959,34628,5009,14287
1997,11800,43752,30478,19281,11955,5261,26133,8700,28727,21918,37809,12410,15579,21635,39364,44357
1998,1422,4978,16397,37394,4927,36438,2077,2588,13541,32640,17076,13905,11527,21643,32070,42941


In [181]:
def amp_qrs(vcg_ode_original, scale=1.5) :

    vcg_ode = copy.deepcopy(vcg_ode_original)

    b_x = vcg_ode.b_x
    b_y = vcg_ode.b_y
    b_z = vcg_ode.b_z

    alpha_x = vcg_ode.alpha_x
    alpha_y = vcg_ode.alpha_y
    alpha_z = vcg_ode.alpha_z

    # make bigger
    alpha_x[3] *= scale
    alpha_x[4] *= scale
    alpha_x[5] *= scale

    alpha_y[3] *= scale
    alpha_y[4] *= scale
    alpha_y[5] *= scale
    
    alpha_z[3] *= scale
    alpha_z[4] *= scale
    alpha_z[5] *= scale

    vcg_ode.b_x = b_x
    vcg_ode.b_y = b_y
    vcg_ode.b_z = b_z

    vcg_ode.alpha_x = alpha_x
    vcg_ode.alpha_y = alpha_y
    vcg_ode.alpha_z = alpha_z

    return vcg_ode

In [182]:
def amp_pwave1(vcg_ode_original, scale=25) :

    vcg_ode = copy.deepcopy(vcg_ode_original)

    b_x = vcg_ode.b_x
    b_y = vcg_ode.b_y
    b_z = vcg_ode.b_z

    alpha_x = vcg_ode.alpha_x
    alpha_y = vcg_ode.alpha_y
    alpha_z = vcg_ode.alpha_z

    # make bigger
    alpha_x[0] *= scale
#     alpha_x[1] *= scale

#     alpha_y[0] *= scale
#     alpha_y[1] *= scale
#     alpha_y[2] *= scale
    
#     alpha_z[0] *= scale
#     alpha_z[1] *= scale
#     alpha_z[2] *= scale

    vcg_ode.b_x = b_x
    vcg_ode.b_y = b_y
    vcg_ode.b_z = b_z

    vcg_ode.alpha_x = alpha_x
    vcg_ode.alpha_y = alpha_y
    vcg_ode.alpha_z = alpha_z

    return vcg_ode

In [183]:
def amp_pwave2(vcg_ode_original, scale=25) :

    vcg_ode = copy.deepcopy(vcg_ode_original)

    b_x = vcg_ode.b_x
    b_y = vcg_ode.b_y
    b_z = vcg_ode.b_z

    alpha_x = vcg_ode.alpha_x
    alpha_y = vcg_ode.alpha_y
    alpha_z = vcg_ode.alpha_z

    # make bigger
    alpha_x[0] *= scale
#     alpha_x[1] *= scale

    alpha_y[0] *= scale
#     alpha_y[1] *= scale
#     alpha_y[2] *= scale
    
    alpha_z[0] *= scale
#     alpha_z[1] *= scale
#     alpha_z[2] *= scale

    vcg_ode.b_x = b_x
    vcg_ode.b_y = b_y
    vcg_ode.b_z = b_z

    vcg_ode.alpha_x = alpha_x
    vcg_ode.alpha_y = alpha_y
    vcg_ode.alpha_z = alpha_z

    return vcg_ode

In [184]:
def amp_twave(vcg_ode_original, scale=5) :

    vcg_ode = copy.deepcopy(vcg_ode_original)

    b_x = vcg_ode.b_x
    b_y = vcg_ode.b_y
    b_z = vcg_ode.b_z

    alpha_x = vcg_ode.alpha_x
    alpha_y = vcg_ode.alpha_y
    alpha_z = vcg_ode.alpha_z

    # make bigger
    alpha_x[8] *= scale
#     alpha_x[4] *= scale
#     alpha_x[5] *= scale

#     alpha_y[8] *= scale
#     alpha_y[4] *= scale
#     alpha_y[5] *= scale
    
#     alpha_z[8] *= scale
#     alpha_z[4] *= scale
#     alpha_z[5] *= scale

    vcg_ode.b_x = b_x
    vcg_ode.b_y = b_y
    vcg_ode.b_z = b_z

    vcg_ode.alpha_x = alpha_x
    vcg_ode.alpha_y = alpha_y
    vcg_ode.alpha_z = alpha_z

    return vcg_ode

In [185]:
def st_change(vcg_ode_original, scale=6) :

    vcg_ode = copy.deepcopy(vcg_ode_original)

    b_x = vcg_ode.b_x
    b_y = vcg_ode.b_y
    b_z = vcg_ode.b_z

    alpha_x = vcg_ode.alpha_x
    alpha_y = vcg_ode.alpha_y
    alpha_z = vcg_ode.alpha_z

    # make bigger
#     alpha_x[6] *= scale
#     alpha_x[4] *= scaled
#     alpha_x[6] *= scale
    alpha_x[6] *= scale

    alpha_y[6] *= scale
#     alpha_y[7] *= scale
#     alpha_y[5] *= scale
    
#     alpha_z[8] *= scale
#     alpha_z[4] *= scale
#     alpha_z[6] *= scale

    vcg_ode.b_x = b_x
    vcg_ode.b_y = b_y
    vcg_ode.b_z = b_z

    vcg_ode.alpha_x = alpha_x
    vcg_ode.alpha_y = alpha_y
    vcg_ode.alpha_z = alpha_z

    return vcg_ode

In [186]:
def make_ecgs(row):


    # find folder for the ecgs to go in
    if row[17]==1: path = (f'./{dataset}/train')
    elif row[18]==1: path = (f'./{dataset}/val/train')
    else: path = (f'./{dataset}/val/test/')
    
    for i in range(0,8):
        if not os.path.isdir(f'{path}/{row[16]}_{i}'): 
            os.mkdirs(f'{path}/{row[16]}_{i}')
    # make the base ode solves
    vcg_ode = vcg.VCG(row[16]/10)
    # get actual arrays of values and save
    # orig 
    _, y0 = tools.solve_vcg_object(vcg_ode, duration=duration, fs=fs)
    pd.DataFrame(y0[row[0]:row[0]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_0/{row[16]}_0_{row[0]}.csv', header = False, index= False)
    pd.DataFrame(y0[row[1]:row[1]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_0/{row[16]}_0_{row[1]}.csv', header = False, index= False)
    # qt_elong
    vcg_ode2 = perturb_ecg.qt_elongation(vcg_ode, ms_forward=np.random.randint(50,250))
    _, y1 = tools.solve_vcg_object(vcg_ode2, duration=duration, fs=fs)
    pd.DataFrame(y1[row[2]:row[2]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_1/{row[16]}_1_{row[2]}.csv', header = False, index= False)
    pd.DataFrame(y1[row[3]:row[3]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_1/{row[16]}_1_{row[3]}.csv', header = False, index= False)
    # wide_qrs
    vcg_ode3 = perturb_ecg.wide_qrs(vcg_ode, percent_widened=np.random.randint(200,1000)
                                    , scaledown=(np.random.randint(10,100)/100))
    _, y2 = tools.solve_vcg_object(vcg_ode3, duration=duration, fs=fs)
    pd.DataFrame(y2[row[4]:row[4]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_2/{row[16]}_2_{row[4]}.csv', header = False, index= False)
    pd.DataFrame(y2[row[5]:row[5]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_2/{row[16]}_2_{row[5]}.csv', header = False, index= False)
    # amp_qrs
    vcg_ode4 = amp_qrs(vcg_ode, scale = (np.random.randint(8,30)/10))
    _, y3 = tools.solve_vcg_object(vcg_ode4, duration=duration, fs=fs)
    pd.DataFrame(y3[row[6]:row[6]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_3/{row[16]}_3_{row[6]}.csv', header = False, index= False)
    pd.DataFrame(y3[row[7]:row[7]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_3/{row[16]}_3_{row[7]}.csv', header = False, index= False)
    # amp_pwave1
    vcg_ode5 = amp_pwave1(vcg_ode, scale = np.random.randint(2,50))
    _, y4 = tools.solve_vcg_object(vcg_ode5, duration=duration, fs=fs)
    pd.DataFrame(y4[row[8]:row[8]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_4/{row[16]}_4_{row[8]}.csv', header = False, index= False)
    pd.DataFrame(y4[row[9]:row[9]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_4/{row[16]}_4_{row[9]}.csv', header = False, index= False)
    # amp_pwave2
    vcg_ode6 = amp_pwave2(vcg_ode, scale = np.random.randint(2,50))
    _, y5 = tools.solve_vcg_object(vcg_ode6, duration=duration, fs=fs)
    pd.DataFrame(y5[row[10]:row[10]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_5/{row[16]}_5_{row[10]}.csv', header = False, index= False)
    pd.DataFrame(y5[row[11]:row[11]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_5/{row[16]}_5_{row[11]}.csv', header = False, index= False)
    # amp_twave
    vcg_ode7 = amp_twave(vcg_ode, scale = np.random.randint(2,10))
    _, y6 = tools.solve_vcg_object(vcg_ode7, duration=duration, fs=fs)
    pd.DataFrame(y6[row[12]:row[12]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_6/{row[16]}_6_{row[12]}.csv', header = False, index= False)
    pd.DataFrame(y6[row[13]:row[13]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_6/{row[16]}_6_{row[13]}.csv', header = False, index= False)
    # st_change
    vcg_ode8 = st_change(vcg_ode, scale = np.random.randint(2,10))
    _, y7= tools.solve_vcg_object(vcg_ode8, duration=duration, fs=fs)
    pd.DataFrame(y7[row[14]:row[14]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_7/{row[16]}_7_{row[14]}.csv', header = False, index= False)
    pd.DataFrame(y7[row[15]:row[15]+save_duration*fs,:].T).to_csv(
        f'{path}/{row[16]}_7/{row[16]}_7_{row[15]}.csv', header = False, index= False)
    
    del vcg_ode
    del vcg_ode2
    del vcg_ode3
    del vcg_ode4
    del vcg_ode5
    del vcg_ode6
    del vcg_ode7
    del vcg_ode8
    
    print(row[16])
    return [row[16], path]
    
    

In [187]:
# convert keys to list for pool
key_list = df_key.values.tolist()


In [188]:
pool = mp.Pool(mp.cpu_count())
results = pool.map(make_ecgs, [row for row in key_list] )
print('Done!')
pool.close()

200
207
221
214
228
249
235
242
263
256
284
270
277
298
291
319
305
326
312
333
354
368
347
340
361
375
389
382
410
403
424417

431
396
438
445
201
452
494
208
459
480
487501

508
473
515
466
222
522
215
550
564
236
536
557
543
229
529
250
243
257
571
578
271
592
599
627
264
278
585
285
620
634
648
655
613
662
676
299
669
606641

683
697
306
292
704
711
718
313
320
739
690
202
209
725
327
732
334
348
746
753355

223
216
341
362
404
376
369
237
383
390411

230
418
258
244
251
432
425
397
439
446
265
279
286
272
495
481
453
307300

460
210
203
509502

523
516
293
474
488
467
314
224
217
551
321
544
565558

328
349
238
537231

259
335
530
572
252
245
342
356
363
600
579
593
628377
586

211
266405

370
635
607412

614
273
384
656
649419

391
204
621
287
280
677
642
433663

426
308
398
670
684
301
225
218
440
698
705
447
719
712
294232

239
322
691
740
482260

253
246
315
726
496
212
454
329
461
350
510
205
733
336
747
517
503
267
475
754
489
468
357
343
274
524
281
219288

364
226
552
378


1788
1802
2057
2147
2175
2050
1995
1967
1809
2043
2009
1885
2085
2016
1851
2002
1988
1795
1912
1919
2071
2078
1926
1892
2189
1940
1947
1933
1844
1954
1872
2099
2092
2196
1823
2141
2106
1906
1865
1961
2113
2155
1837
2120
2030
2023
1858
2162
2127
2134
2169
1982
1899
2058
1879
1886
1968
2037
1975
2010
2065
1913
1996
2183
2051
1989
1941
2176
1920
2148
1927
2003
2079
2044
2072
2190
1948
2017
1955
2086
2142
1962
1893
2156
1907
2024
2093
2128
2197
1900
2121
2031
1969
1983
1934
2011
2135
2107
2038
2100
2163
1990
2170
1997
1921
1976
2059
1914
2052
1942
2066
2114
2080
2004
1928
1956
2045
1949
2149
2177
2184
2073
2018
2143
1963
2191
2157
2025
2094
2087
2198
1935
1984
2039
2012
1970
2129
1991
2032
2122
2136
2101
2171
2164
2108
2053
1998
1977
2067
2060
2115
2081
2005
2046
2074
2019
2150
2178
2144
2185
2192
2158
2026
2095
2088
2040
2199
2130
2137
2033
2102
2123
2172
2109
2165
2054
2061
2116
2068
2047
2082
2151
2186
2075
2145
2179
2193
2159
2089
2096
2131
2138
2103
2124
2110
2173
2166
2117
2194
2187


In [190]:
df_key.to_csv(f'./{dataset}/ecg_keys.csv')