In [1]:
ls

[0m[01;34mevaluation-2022[0m/  hmd.ipynb   iy_model_test.ipynb
[01;34mhmd[0m/              hmd2.ipynb  [01;34mpython-classifier-2022[0m/


In [1]:
import sys
sys.path.append('/home/jh20/narin/physionet/hmd/iy_classifier')

In [13]:
import glob
import os
import pandas as pd
import numpy as np
import scipy as sp
import scipy.stats
import librosa
import librosa.display
import math
import tensorflow as tf
from tensorflow import keras

In [3]:
from helper_code import *
from get_feature import *

In [4]:
def get_murmur(data):
    murmur = None
    for l in data.split('\n'):
        if l.startswith('#Murmur:'):
            try:
                murmur = l.split(': ')[1]
            except:
                pass
    if murmur is None:
        raise ValueError('No murmur available. Is your code trying to load labels from the hidden data?')
    return murmur

In [5]:
def get_outcome(data):
    outcome = None
    for l in data.split('\n'):
        if l.startswith('#Outcome:'):
            try:
                outcome = l.split(': ')[1]
            except:
                pass
    if outcome is None:
        raise ValueError('No outcome available. Is your code trying to load labels from the hidden data?')
    return outcome

In [6]:
def get_features_3lb(patient_files_trn) :
    features = dict()
    features['id'] = []
    features['age'] = []
    features['sex'] = []
    features['hw'] = []
    features['preg'] = []
    features['loc'] = []
    features['mel1'] = []
#    labels = []
    mm_labels = []
    out_labels = []

    age_classes = ['Neonate', 'Infant', 'Child', 'Adolescent', 'Young Adult']
    recording_locations = ['AV', 'MV', 'PV', 'TV', 'PhC']

    num_patient_files = len(patient_files_trn)

    for i in range(num_patient_files):

        # Load the current patient data and recordings.
        current_patient_data = load_patient_data(patient_files_trn[i])
        num_locations = get_num_locations(current_patient_data)
        recording_information = current_patient_data.split('\n')[1:num_locations+1]
        for j in range(num_locations) :
            entries = recording_information[j].split(' ')
            recording_file = entries[2]
            filename = os.path.join(data_folder, recording_file)

            # Extract id
            id1 = recording_file.split('_')[0]
            features['id'].append(id1)

            # Extract melspec
            mel1 = feature_extract_melspec(filename)[0]
            features['mel1'].append(mel1)

            # Extract age_group
            age_group = get_age(current_patient_data)
            current_age_group = np.zeros(6, dtype=int)
            if age_group in age_classes:
                j = age_classes.index(age_group)
                current_age_group[j] = 1
            else :
                current_age_group[5] = 1
            features['age'].append(current_age_group)

            # Extract sex
            sex = get_sex(current_patient_data)
            sex_features = np.zeros(2, dtype=int)
            if compare_strings(sex, 'Female'):
                sex_features[0] = 1
            elif compare_strings(sex, 'Male'):
                sex_features[1] = 1
            features['sex'].append(sex_features)

            # Extract height and weight.
            height = get_height(current_patient_data)
            weight = get_weight(current_patient_data)
            ## simple impute
            if math.isnan(height) :
                height = 110.846
            if math.isnan(weight) :
                weight = 23.767
                
            features['hw'].append(np.array([height, weight]))

            # Extract pregnancy
            is_pregnant = get_pregnancy_status(current_patient_data)
            features['preg'].append(is_pregnant)

            # Extract location
            locations = entries[0]
            num_recording_locations = len(recording_locations)
            loc_features = np.zeros(num_recording_locations)
            if locations in recording_locations:
                j = recording_locations.index(locations)
                loc_features[j] = 1
            features['loc'].append(loc_features)

            # Extract labels 
            mm_label = get_murmur(current_patient_data)
            out_label = get_outcome(current_patient_data)
            current_mm_labels = np.zeros(2)
            current_out_labels = np.zeros(2)
            if mm_label == 'Absent' :
                current_mm_labels = np.array([0, 0, 1])
            elif mm_label == 'unknown' :
                current_mm_labels = np.array([0, 1, 0])
            else :
                mm_loc = get_murmur_loc(current_patient_data)
                if mm_loc == 'nan' :
                    current_mm_labels = np.array([0.9, 0.05, 0.05])
                else :
                    mm_loc = mm_loc.split('+')
                    if locations in mm_loc :
                        current_mm_labels = np.array([1, 0, 0])
                    else :
                        current_mm_labels = np.array([0.7, 0.2, 0.1])

            if out_label == 'Normal' :
                current_out_labels = np.array([0, 1])
            else :
                current_out_labels = np.array([1, 0])
#                if mm_label == 'Absent' :
#                    current_out_labels = np.array([0.8, 0.2])
#                elif mm_label == 'unknown' :
#                    current_out_labels = np.array([0.85, 0.15])
#                else :
#                    current_out_labels = np.array([1, 0])
                
            mm_labels.append(current_mm_labels)
            out_labels.append(current_out_labels)

    M, N = features['mel1'][i].shape
    for i in range(len(features['mel1'])) :
        features['mel1'][i] = features['mel1'][i].reshape(M,N,1)
    
    for k1 in features.keys() :
        features[k1] = np.array(features[k1])
    
    mm_labels = np.array(mm_labels)
    out_labels = np.array(out_labels)
    return features, mm_labels, out_labels

In [7]:
data_folder = '/home/jh20/Data/nr_data/ECG/Physionet2022/physionet.org/files/circor-heart-sound/1.0.3/train/'

In [8]:
all_file = glob.glob(data_folder + '*.txt')

In [17]:
smp_file =all_wav_file[:2]

In [18]:
smp_file

['/home/jh20/Data/nr_data/ECG/Physionet2022/physionet.org/files/circor-heart-sound/1.0.3/train/50103.txt',
 '/home/jh20/Data/nr_data/ECG/Physionet2022/physionet.org/files/circor-heart-sound/1.0.3/train/50077.txt']

In [33]:
load_patient_data(smp_file[0])

'50103 4 4000\nAV 50103_AV.hea 50103_AV.wav 50103_AV.tsv\nPV 50103_PV.hea 50103_PV.wav 50103_PV.tsv\nTV 50103_TV.hea 50103_TV.wav 50103_TV.tsv\nMV 50103_MV.hea 50103_MV.wav 50103_MV.tsv\n#Age: Child\n#Sex: Male\n#Height: 128.0\n#Weight: 25.1\n#Pregnancy status: False\n#Murmur: Absent\n#Murmur locations: nan\n#Most audible location: nan\n#Systolic murmur timing: nan\n#Systolic murmur shape: nan\n#Systolic murmur grading: nan\n#Systolic murmur pitch: nan\n#Systolic murmur quality: nan\n#Diastolic murmur timing: nan\n#Diastolic murmur shape: nan\n#Diastolic murmur grading: nan\n#Diastolic murmur pitch: nan\n#Diastolic murmur quality: nan\n#Outcome: Normal\n#Campaign: CC2014\n#Additional ID: nan\n'

In [27]:
features, mm_labels, out_labels = get_features_3lb(smp_file) 

In [36]:
features

{'id': array(['50103', '50103', '50103', '50103', '50077'], dtype='<U5'),
 'age': array([[0, 0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0, 0]]),
 'sex': array([[0, 1],
        [0, 1],
        [0, 1],
        [0, 1],
        [1, 0]]),
 'hw': array([[128. ,  25.1],
        [128. ,  25.1],
        [128. ,  25.1],
        [128. ,  25.1],
        [ 89. ,  11.8]]),
 'preg': array([False, False, False, False, False]),
 'loc': array([[1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0.],
        [0., 1., 0., 0., 0.]]),
 'mel1': array([[[[ -7.55777   ],
          [ -8.246252  ],
          [-14.502152  ],
          ...,
          [-15.170034  ],
          [-23.670837  ],
          [-21.480988  ]],
 
         [[ -8.962356  ],
          [-12.39101   ],
          [-16.921469  ],
          ...,
          [-14.599627  ],
          [-15.644245  ],
          [-23.628342

In [15]:
age = keras.Input(shape=(6,), name = 'age_cat')
sex = keras.Input(shape=(2,), name = 'sex_cat')
hw = keras.Input(shape=(2,), name = 'height_weight')
preg = keras.Input(shape=(1,), name = 'is_preg')
loc = keras.Input(shape=(5,), name = 'loc')
mel1 = keras.Input(shape=((100, 313, 1)), name = 'mel')

In [16]:
age

<KerasTensor: shape=(None, 6) dtype=float32 (created by layer 'age_cat')>