In [None]:
# imports

import h5py
import csv
import numpy as np
import os
import pandas as pd
import scipy


In [None]:
#Reading and combining the two csv files

# HDF5 file with exam_id and tracings
filename = "./data/exams_part0.hdf5"

# CSV file with exam_id, patient_Id and label
df_sample1 = pd.read_csv("./data/code15_chagas_labels.csv")

# main CSV file
df_sample2 = pd.read_csv("./data/exams.csv")

lista = ['exam_id','patient_id']

# csv file with exam_id, patient_id and label
df_master = pd.merge(df_sample2, df_sample1, on=lista, how='left')

# saving it as  a file
df_master.to_csv('data/exams_labels.csv', index=False)

In [4]:
#Saving important information in a dictionary

import h5py
import csv
import numpy as np
import os
import pandas as pd
import scipy

# reading file HDF5 separing the two datasets
df_master = pd.read_csv('data/exams_labels.csv')
# pupulating the dictionary exam_ids_and_signals
exam_ids_to_chagas = dict()

with open("data/merged.csv", newline="\n") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        exam_id = int(row['exam_id'])
        boolean = bool(row['chagas'])
        exam_ids_to_chagas[exam_id] = boolean

# list of files to read
files = ["./data/exams_part"+str(i)+".hdf5" for i in range(0,18)]

# populating the dictionary exam_ids_and_signals
exam_ids_and_signals = dict()

done = False

for filename in files:
    # if the file doesn not exist, skip it
    if (os.path.exists(filename) == False):
        continue

    print("Reading file: ", filename)

    with h5py.File(filename, "r") as ecgs:

        exam_ids = list(ecgs['exam_id'])

        num_exams = len(exam_ids)
        
        for i in range(num_exams):
            exam_id = exam_ids[i]
            
            if exam_id not in exam_ids_to_chagas:
                continue
            else:
                exam_ids_and_signals[exam_id] = ecgs['tracings'][i]
                if (ecgs['tracings'][i].shape[0] != 4096):
                    print("Il dio ghane")
                if done == False:
                    print("Exam id: ", exam_id)
                    print("Signal: ", ecgs['tracings'][i])
                    done = True

# so at the end i got
# exam_ids_to_chagas that maps the exam id to Chagas disease label
# exam_ids_and_signals that maps the exam id to the ECG signal


Reading file:  ./data/exams_part0.hdf5
Exam id:  590673
Signal:  [[-4.8781034e-01 -2.6677129e-01  2.2103906e-01 ... -1.1737937e+00
  -5.5640870e-01 -4.8781034e-01]
 [-4.8106578e-01 -2.6019633e-01  2.2086945e-01 ... -1.1674997e+00
  -5.5151391e-01 -4.8232922e-01]
 [-4.7979322e-01 -2.5791711e-01  2.2187607e-01 ... -1.1599243e+00
  -5.4229915e-01 -4.7729760e-01]
 ...
 [-1.3724924e+00 -1.2811700e+00  9.1322273e-02 ... -8.7824506e-01
  -5.0396240e-01  8.5938675e-04]
 [-1.3667058e+00 -1.2758980e+00  9.0807825e-02 ... -8.6861408e-01
  -4.9709535e-01  1.6935735e-03]
 [-1.3614640e+00 -1.2694675e+00  9.1996796e-02 ... -8.5737538e-01
  -4.8235169e-01  1.0566236e-02]]
Reading file:  ./data/exams_part1.hdf5
Reading file:  ./data/exams_part2.hdf5


In [6]:
# Bandpass filter

def butter_bandpass_filter(data,lowcut, highcut, fs,order):

    nyquist_freq = 0.5 * fs
    low = lowcut / nyquist_freq
    high = highcut / nyquist_freq

    b,a = scipy.signal.butter(order, [low, high], btype='band')

    y = scipy.signal.filtfilt(b, a, data,axis=0)

    return y

# signal info for the bandpass filter
fs = 400
lowcut = 0.5
highcut = 40

filtered_signals = dict()
done = False
for exam_id, signal in exam_ids_and_signals.items():
    filtered_signal = butter_bandpass_filter(signal, lowcut, highcut, fs,3)
    filtered_signals[exam_id] = filtered_signal
    if done == False:
        print("Original signal: ", signal)
        print("Filtered signal: ", filtered_signal)
        done = True

print("Filtering done")
print(len(filtered_signals))

Original signal:  [[-4.8781034e-01 -2.6677129e-01  2.2103906e-01 ... -1.1737937e+00
  -5.5640870e-01 -4.8781034e-01]
 [-4.8106578e-01 -2.6019633e-01  2.2086945e-01 ... -1.1674997e+00
  -5.5151391e-01 -4.8232922e-01]
 [-4.7979322e-01 -2.5791711e-01  2.2187607e-01 ... -1.1599243e+00
  -5.4229915e-01 -4.7729760e-01]
 ...
 [-1.3724924e+00 -1.2811700e+00  9.1322273e-02 ... -8.7824506e-01
  -5.0396240e-01  8.5938675e-04]
 [-1.3667058e+00 -1.2758980e+00  9.0807825e-02 ... -8.6861408e-01
  -4.9709535e-01  1.6935735e-03]
 [-1.3614640e+00 -1.2694675e+00  9.1996796e-02 ... -8.5737538e-01
  -4.8235169e-01  1.0566236e-02]]
Filtered signal:  [[-0.05895127 -0.05173474  0.00721654 ... -0.01504389 -0.02809456
  -0.03090328]
 [-0.05610953 -0.05003294  0.0060766  ... -0.01019733 -0.02361475
  -0.02823902]
 [-0.05325384 -0.04921234  0.00404151 ... -0.00642652 -0.0201346
  -0.02633387]
 ...
 [-0.02281117 -0.10181606 -0.07900522 ... -0.0950885  -0.1037142
  -0.11374026]
 [-0.02243046 -0.09437048 -0.07194026