In [1]:
import numpy as np
import pandas as pd
import gzip

In [2]:
# compress numpy array file and export
f = gzip.GzipFile("compressed_npy.gz", "w")
ecg_arr = np.load('ecgeq-500hzsrfava.npy')
np.save(file=f, arr=ecg_arr)
f.close()

In [3]:
# read in the compressed numpy file
f = gzip.GzipFile('compressed_npy.gz', "r")
X = np.load(f)

# get 6428 layers, 700 rows and 12 columns
X = X[:,:700,:]
X.shape

(6428, 700, 12)

In [4]:
# convert 3d array to 2d array and convert it to a dataframe
m,n,r = X.shape
print(m, n, r)

6428 700 12


In [5]:
out_arr = np.column_stack((np.repeat(np.arange(m),n),X.reshape(m*n,-1)))

In [6]:
out_df = pd.DataFrame(out_arr)
out_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,0.0,-0.005,0.135,0.14,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.210,-0.145,-0.080
1,0.0,-0.005,0.135,0.14,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.210,-0.145,-0.080
2,0.0,-0.005,0.135,0.14,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.210,-0.145,-0.080
3,0.0,-0.005,0.135,0.14,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.210,-0.145,-0.080
4,0.0,-0.005,0.135,0.14,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.211,-0.146,-0.080
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4499595,6427.0,0.010,0.170,0.16,-0.090,-0.075,0.165,0.155,0.365,0.230,0.030,-0.065,-0.060
4499596,6427.0,0.014,0.174,0.16,-0.094,-0.073,0.167,0.155,0.368,0.245,0.029,-0.057,-0.056
4499597,6427.0,0.016,0.176,0.16,-0.096,-0.073,0.167,0.155,0.383,0.261,0.040,-0.052,-0.055
4499598,6427.0,0.014,0.174,0.16,-0.094,-0.073,0.167,0.155,0.406,0.282,0.059,-0.046,-0.053


In [7]:
# rename columns and drop duplicates
out_df.columns= ['index', 'I', 'II', 'III', 'aVF', 'aVR', 'aVL', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6']
out_df['index'] = out_df['index'].astype('int32')
out_df = out_df.drop_duplicates()
out_df

Unnamed: 0,index,I,II,III,aVF,aVR,aVL,V1,V2,V3,V4,V5,V6
0,0,-0.005,0.135,0.140,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.210,-0.145,-0.080
4,0,-0.005,0.135,0.140,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.211,-0.146,-0.080
6,0,-0.005,0.131,0.136,-0.063,-0.070,0.133,-0.125,-0.082,-0.102,-0.190,-0.129,-0.072
7,0,-0.005,0.130,0.135,-0.063,-0.070,0.132,-0.122,-0.077,-0.094,-0.172,-0.116,-0.067
8,0,-0.005,0.128,0.133,-0.062,-0.069,0.130,-0.119,-0.071,-0.084,-0.157,-0.102,-0.061
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4499595,6427,0.010,0.170,0.160,-0.090,-0.075,0.165,0.155,0.365,0.230,0.030,-0.065,-0.060
4499596,6427,0.014,0.174,0.160,-0.094,-0.073,0.167,0.155,0.368,0.245,0.029,-0.057,-0.056
4499597,6427,0.016,0.176,0.160,-0.096,-0.073,0.167,0.155,0.383,0.261,0.040,-0.052,-0.055
4499598,6427,0.014,0.174,0.160,-0.094,-0.073,0.167,0.155,0.406,0.282,0.059,-0.046,-0.053


In [8]:
# write to csv file
ecg_df = out_df.drop(columns=['index'])
ecg_df.to_csv('ecg-data.csv', index=False)

In [9]:
# read in csv file
df = pd.read_csv('training_13_features.csv')
label_df = df.copy()
label_df['unique_id'] = np.arange(label_df.shape[0])
label_df

Unnamed: 0,ritmi,age,sex,height,weight,nurse,site,device,heart_axis,validated_by,second_opinion,validated_by_human,pacemaker,strat_fold,unique_id
0,2,54.0,0,166.796356,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6,0
1,1,54.0,0,166.796356,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6,1
2,0,55.0,0,166.796356,69.841845,1.0,2.0,1,1.0,1.0,0,1,0.0,10,2
3,2,29.0,1,164.000000,56.000000,7.0,1.0,10,0.0,0.0,0,1,0.0,1,3
4,2,57.0,0,166.796356,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,1,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6423,2,81.0,0,178.000000,70.000000,11.0,1.0,4,1.0,0.0,0,1,0.0,4,6423
6424,2,88.0,0,152.000000,45.000000,11.0,1.0,4,0.0,0.0,0,1,0.0,10,6424
6425,0,83.0,1,166.796356,69.841845,1.0,2.0,1,1.0,1.0,0,1,0.0,5,6425
6426,1,75.0,1,177.000000,80.000000,0.0,34.0,2,0.0,2.0,0,1,0.0,7,6426


In [10]:
# merge out_df and label_df 
merged_df = pd.merge(out_df, label_df, how='inner', left_on='index', right_on='unique_id')
merged_df = merged_df.drop(columns=['index', 'unique_id'])
merged_df

Unnamed: 0,I,II,III,aVF,aVR,aVL,V1,V2,V3,V4,...,weight,nurse,site,device,heart_axis,validated_by,second_opinion,validated_by_human,pacemaker,strat_fold
0,-0.005,0.135,0.140,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.210,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
1,-0.005,0.135,0.140,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.211,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
2,-0.005,0.131,0.136,-0.063,-0.070,0.133,-0.125,-0.082,-0.102,-0.190,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
3,-0.005,0.130,0.135,-0.063,-0.070,0.132,-0.122,-0.077,-0.094,-0.172,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
4,-0.005,0.128,0.133,-0.062,-0.069,0.130,-0.119,-0.071,-0.084,-0.157,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4361838,0.010,0.170,0.160,-0.090,-0.075,0.165,0.155,0.365,0.230,0.030,...,69.841845,1.0,2.0,1,3.0,1.0,0,1,0.0,8
4361839,0.014,0.174,0.160,-0.094,-0.073,0.167,0.155,0.368,0.245,0.029,...,69.841845,1.0,2.0,1,3.0,1.0,0,1,0.0,8
4361840,0.016,0.176,0.160,-0.096,-0.073,0.167,0.155,0.383,0.261,0.040,...,69.841845,1.0,2.0,1,3.0,1.0,0,1,0.0,8
4361841,0.014,0.174,0.160,-0.094,-0.073,0.167,0.155,0.406,0.282,0.059,...,69.841845,1.0,2.0,1,3.0,1.0,0,1,0.0,8


In [11]:
# dropna and reset index
new_mdf = merged_df.dropna()
new_mdf = new_mdf.reset_index(drop=True)
new_mdf

Unnamed: 0,I,II,III,aVF,aVR,aVL,V1,V2,V3,V4,...,weight,nurse,site,device,heart_axis,validated_by,second_opinion,validated_by_human,pacemaker,strat_fold
0,-0.005,0.135,0.140,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.210,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
1,-0.005,0.135,0.140,-0.065,-0.073,0.137,-0.125,-0.090,-0.110,-0.211,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
2,-0.005,0.131,0.136,-0.063,-0.070,0.133,-0.125,-0.082,-0.102,-0.190,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
3,-0.005,0.130,0.135,-0.063,-0.070,0.132,-0.122,-0.077,-0.094,-0.172,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
4,-0.005,0.128,0.133,-0.062,-0.069,0.130,-0.119,-0.071,-0.084,-0.157,...,69.841845,0.0,0.0,0,3.0,0.0,0,0,0.0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4319171,0.010,0.170,0.160,-0.090,-0.075,0.165,0.155,0.365,0.230,0.030,...,69.841845,1.0,2.0,1,3.0,1.0,0,1,0.0,8
4319172,0.014,0.174,0.160,-0.094,-0.073,0.167,0.155,0.368,0.245,0.029,...,69.841845,1.0,2.0,1,3.0,1.0,0,1,0.0,8
4319173,0.016,0.176,0.160,-0.096,-0.073,0.167,0.155,0.383,0.261,0.040,...,69.841845,1.0,2.0,1,3.0,1.0,0,1,0.0,8
4319174,0.014,0.174,0.160,-0.094,-0.073,0.167,0.155,0.406,0.282,0.059,...,69.841845,1.0,2.0,1,3.0,1.0,0,1,0.0,8


In [12]:
# write to csv file
new_mdf = new_mdf[['I','II','III','aVF','aVR','aVL','V1','V2','V3','V4','V5','V6','age','sex','height','weight','ritmi']]
new_mdf.to_csv('training_17_features.csv', index=False)