In [1]:
import os
import pickle
from glob import iglob
import numpy as np
import librosa
import csv

In [2]:
DATA_AUDIO_DIR = './heartbeat_data'
TARGET_SR = 8000
OUTPUT_DIR = './output'
OUTPUT_DIR_TRAIN = os.path.join(OUTPUT_DIR, 'train')
OUTPUT_DIR_TEST = os.path.join(OUTPUT_DIR, 'test')
AUDIO_LENGTH = 5000

In [3]:
class_ids = {
    'normal': 0,
    'mumur': 1,
    'extrahls': 2,
    'artifact': 3,
    'unlabelled': 4,
}

In [4]:
def extract_class_id(wav_filename):
    if 'normal' in wav_filename:
        return class_ids.get('normal')
    elif 'murmur' in wav_filename:
        return class_ids.get('murmur')
    elif 'extrahls' in wav_filename:
        return class_ids.get('extrahls')
    elif 'artifact' in wav_filename:
        return class_ids.get('artifact')
    elif 'unlabelled' in wav_filename:
        return class_ids.get('unlabelled')
    else:
        return class_ids.get('unlabelled')

In [5]:
def read_audio_from_filename(filename, target_sr):
    audio,_ = librosa.load(filename, sr=target_sr, mono=True)
    audio = audio.reshape(-1, 1)
    return audio

In [6]:
def convert_data():
    for i, wav_filename in enumerate(iglob(os.path.join(DATA_AUDIO_DIR, '**/**.wav'), recursive=True)):
        class_id = extract_class_id(wav_filename)
        audio_buf = read_audio_from_filename(wav_filename, target_sr=TARGET_SR)
        # nomralize mean 0, variance 1
        audio_buf = (audio_buf - np.mean(audio_buf)) / np.std(audio_buf)
        original_length = len(audio_buf)
        print(i, wav_filename, original_length, np.round(np.mean(audio_buf), 4), np.std(audio_buf))
        if original_length < AUDIO_LENGTH:
            audio_buf = np.concatenate((audio_buf, np.zeros(shape=(AUDIO_LENGTH - original_length, 1))))
            print('PAD New Length = ', len(audio_buf))
        elif original_length > AUDIO_LENGTH:
            audio_buf = audio_buf[0:AUDIO_LENGTH]
            print('CUT New length = ', len(audio_buf))
            
        output_folder = OUTPUT_DIR_TRAIN
        if i // 50 == 0:
            output_folder = OUTPUT_DIR_TEST
        
        output_filename = os.path.join(output_folder, str(i) + ".csv")
        out = audio_buf
        #print(out)
        with open(output_filename, 'w') as w:
            mywriter = csv.writer(w, delimiter=",")
            mywriter.writerows(out)

In [7]:
convert_data()

0 ./heartbeat_data/set_a/murmur__201101180902.wav 64276 0.0 1.0
CUT New length =  5000
1 ./heartbeat_data/set_a/Aunlabelledtest__201101091156.wav 72000 0.0 1.0
CUT New length =  5000
2 ./heartbeat_data/set_a/normal_noisynormal_110_1305655332337_B.wav 68256 -0.0 0.99999994
CUT New length =  5000
3 ./heartbeat_data/set_a/normal__175_1307987962616_B1.wav 13828 -0.0 1.0
CUT New length =  5000
4 ./heartbeat_data/set_a/Aunlabelledtest__201105011546.wav 72000 -0.0 1.0
CUT New length =  5000
5 ./heartbeat_data/set_a/Aunlabelledtest__201103201314.wav 14040 -0.0 1.0
CUT New length =  5000
6 ./heartbeat_data/set_a/normal_noisynormal_176_1307988171173_D.wav 26590 0.0 1.0
CUT New length =  5000
7 ./heartbeat_data/set_a/normal__261_1309353556003_B.wav 23818 0.0 1.0
CUT New length =  5000
8 ./heartbeat_data/set_a/Bunlabelledtest__103_1305031931979_A.wav 57798 -0.0 1.0000001
CUT New length =  5000
9 ./heartbeat_data/set_a/Bunlabelledtest__145_1307987561278_A.wav 107044 0.0 0.99999994
CUT New length = 

84 ./heartbeat_data/set_a/normal_noisynormal_117_1306262456650_C.wav 83120 -0.0 0.99999994
CUT New length =  5000
85 ./heartbeat_data/set_a/normal__167_1307111318050_C.wav 15834 0.0 1.0
CUT New length =  5000
86 ./heartbeat_data/set_a/artifact__201106212112.wav 72000 -0.0 1.0
CUT New length =  5000
87 ./heartbeat_data/set_a/murmur_noisymurmur_135_1306428972976_C.wav 95228 0.0 1.0
CUT New length =  5000
88 ./heartbeat_data/set_a/Bunlabelledtest__170_1307970562729_D.wav 162858 -0.0 1.0
CUT New length =  5000
89 ./heartbeat_data/set_a/Aunlabelledtest__201108222241.wav 63485 -0.0 1.0
CUT New length =  5000
90 ./heartbeat_data/set_a/normal__106_1306776721273_C1.wav 25526 0.0 0.9999999
CUT New length =  5000
91 ./heartbeat_data/set_a/artifact__201106171003.wav 72000 0.0 1.0
CUT New length =  5000
92 ./heartbeat_data/set_a/murmur__201108222221.wav 63485 -0.0 1.0000001
CUT New length =  5000
93 ./heartbeat_data/set_a/normal__201106141148.wav 45995 -0.0 1.0
CUT New length =  5000
94 ./heartbeat

167 ./heartbeat_data/set_a/normal__201108011118.wav 63485 0.0 0.99999994
CUT New length =  5000
168 ./heartbeat_data/set_a/normal__216_1308245839516_C.wav 25074 0.0 0.99999994
CUT New length =  5000
169 ./heartbeat_data/set_a/normal__145_1307987561278_C.wav 96818 -0.0 1.0
CUT New length =  5000
170 ./heartbeat_data/set_a/normal_noisynormal_136_1306429977501_B.wav 45682 -0.0 1.0
CUT New length =  5000
171 ./heartbeat_data/set_a/Aunlabelledtest__201108222247.wav 63485 0.0 1.0
CUT New length =  5000
172 ./heartbeat_data/set_a/Bunlabelledtest__136_1306429977501_D.wav 22126 0.0 1.0
CUT New length =  5000
173 ./heartbeat_data/set_a/extrastole__153_1306848820671_C.wav 51712 -0.0 0.9999999
CUT New length =  5000
174 ./heartbeat_data/set_a/normal__215_1308245664733_C1.wav 14944 0.0 1.0
CUT New length =  5000
175 ./heartbeat_data/set_a/extrastole__253_1309350256198_B.wav 23342 0.0 0.99999994
CUT New length =  5000
176 ./heartbeat_data/set_a/murmur__203_1308162026258_C1.wav 34754 -0.0 1.0
CUT New

252 ./heartbeat_data/set_a/normal__201108011115.wav 63485 0.0 0.99999994
CUT New length =  5000
253 ./heartbeat_data/set_a/normal_noisynormal_194_1308139824187_C.wav 17430 0.0 1.0
CUT New length =  5000
254 ./heartbeat_data/set_a/normal__194_1308139824187_A1.wav 34352 0.0 0.99999994
CUT New length =  5000
255 ./heartbeat_data/set_a/Bunlabelledtest__109_1305653646620_B.wav 102936 0.0 1.0
CUT New length =  5000
256 ./heartbeat_data/set_a/Bunlabelledtest__148_1306768801551_C.wav 23552 0.0 1.0
CUT New length =  5000
257 ./heartbeat_data/set_a/normal__154_1306935608852_D.wav 14004 0.0 1.0
CUT New length =  5000
258 ./heartbeat_data/set_a/Bunlabelledtest__103_1305031931979_C.wav 67432 0.0 1.0
CUT New length =  5000
259 ./heartbeat_data/set_a/normal__134_1306428161797_D.wav 9814 -0.0 1.0
CUT New length =  5000
260 ./heartbeat_data/set_a/Bunlabelledtest__148_1306768801551_A.wav 63410 -0.0 0.99999994
CUT New length =  5000
261 ./heartbeat_data/set_a/Bunlabelledtest__137_1306764999211_B1.wav 331

335 ./heartbeat_data/set_a/artifact__201106221254.wav 72000 -0.0 1.0
CUT New length =  5000
336 ./heartbeat_data/set_a/artifact__201106131834.wav 72000 0.0 0.99999994
CUT New length =  5000
337 ./heartbeat_data/set_a/extrastole__138_1306762146980_B.wav 64830 -0.0 1.0
CUT New length =  5000
338 ./heartbeat_data/set_a/artifact__201106010602.wav 72000 0.0 1.0
CUT New length =  5000
339 ./heartbeat_data/set_a/Bunlabelledtest__130_1306347376079_A.wav 164588 0.0 1.0
CUT New length =  5000
340 ./heartbeat_data/set_a/normal_noisynormal_216_1308245839516_D.wav 132444 -0.0 0.9999999
CUT New length =  5000
341 ./heartbeat_data/set_a/Bunlabelledtest__217_1308246111629_A.wav 82840 0.0 0.99999994
CUT New length =  5000
342 ./heartbeat_data/set_a/normal__217_1308246111629_C.wav 36646 0.0 1.0
CUT New length =  5000
343 ./heartbeat_data/set_a/Bunlabelledtest__139_1306519274653_C.wav 61364 0.0 0.99999994
CUT New length =  5000
344 ./heartbeat_data/set_a/Bunlabelledtest__160_1307100683334_C.wav 61338 -0.

417 ./heartbeat_data/set_a/murmur__161_1307101199321_A.wav 138068 -0.0 1.0
CUT New length =  5000
418 ./heartbeat_data/set_a/Aunlabelledtest__201103241336.wav 63376 0.0 0.99999994
CUT New length =  5000
419 ./heartbeat_data/set_a/normal__169_1307970398039_C.wav 24558 -0.0 1.0
CUT New length =  5000
420 ./heartbeat_data/set_a/normal_noisynormal_115_1306259437619_B.wav 72662 0.0 1.0
CUT New length =  5000
421 ./heartbeat_data/set_a/Aunlabelledtest__201102081033.wav 57026 -0.0 0.99999994
CUT New length =  5000
422 ./heartbeat_data/set_a/Bunlabelledtest__209_1308162216750_B.wav 113672 -0.0 1.0
CUT New length =  5000
423 ./heartbeat_data/set_a/normal__204_1308159229275_C.wav 37618 0.0 0.99999994
CUT New length =  5000
424 ./heartbeat_data/set_a/normal__202_1308145175747_C.wav 40188 -0.0 1.0
CUT New length =  5000
425 ./heartbeat_data/set_a/Bunlabelledtest__127_1306764300147_D.wav 29090 0.0 1.0
CUT New length =  5000
426 ./heartbeat_data/set_a/Aunlabelledtest__201106010807.wav 72000 0.0 1.0


502 ./heartbeat_data/set_a/murmur__201108222248.wav 63485 0.0 1.0
CUT New length =  5000
503 ./heartbeat_data/set_a/artifact__201105041959.wav 72000 -0.0 0.99999994
CUT New length =  5000
504 ./heartbeat_data/set_a/Bunlabelledtest__116_1306258689913_B.wav 93440 0.0 1.0
CUT New length =  5000
505 ./heartbeat_data/set_a/extrastole__191_1308077299430_A.wav 83270 0.0 1.0
CUT New length =  5000
506 ./heartbeat_data/set_a/artifact__201106121242.wav 72000 0.0 1.0
CUT New length =  5000
507 ./heartbeat_data/set_a/Aunlabelledtest__201104012144.wav 45693 0.0 1.0
CUT New length =  5000
508 ./heartbeat_data/set_a/extrahls__201101070953.wav 71073 -0.0 1.0
CUT New length =  5000
509 ./heartbeat_data/set_a/normal__152_1306779561195_C1.wav 11600 0.0 1.0
CUT New length =  5000
510 ./heartbeat_data/set_a/normal__175_1307987962616_D.wav 38220 -0.0 1.0
CUT New length =  5000
511 ./heartbeat_data/set_a/normal__103_1305031931979_B.wav 49678 -0.0 0.99999994
CUT New length =  5000
512 ./heartbeat_data/set_a/n

586 ./heartbeat_data/set_a/Bunlabelledtest__156_1306936373241_D.wav 46254 0.0 1.0
CUT New length =  5000
587 ./heartbeat_data/set_a/artifact__201106021541.wav 72000 -0.0 0.99999994
CUT New length =  5000
588 ./heartbeat_data/set_a/normal__170_1307970562729_B.wav 61022 0.0 0.99999994
CUT New length =  5000
589 ./heartbeat_data/set_a/Bunlabelledtest__162_1307101835989_C.wav 71592 -0.0 1.0
CUT New length =  5000
590 ./heartbeat_data/set_a/murmur_noisymurmur_135_1306428972976_A.wav 72112 0.0 1.0
CUT New length =  5000
591 ./heartbeat_data/set_a/Bunlabelledtest__237_1308750231222_C1.wav 19656 0.0 1.0000001
CUT New length =  5000
592 ./heartbeat_data/set_a/normal__294_1311681084248_B.wav 37940 0.0 1.0
CUT New length =  5000
593 ./heartbeat_data/set_a/normal_noisynormal_137_1306764999211_D1.wav 78862 -0.0 1.0
CUT New length =  5000
594 ./heartbeat_data/set_a/Bunlabelledtest__227_1308594233667_D.wav 195590 -0.0 1.0
CUT New length =  5000
595 ./heartbeat_data/set_a/Bunlabelledtest__206_13081596

670 ./heartbeat_data/set_a/normal__146_1306778707532_D1.wav 117440 0.0 0.99999994
CUT New length =  5000
671 ./heartbeat_data/set_a/normal__149_1306776016110_B.wav 37610 -0.0 0.99999994
CUT New length =  5000
672 ./heartbeat_data/set_a/normal__184_1308073010307_D.wav 117380 0.0 1.0
CUT New length =  5000
673 ./heartbeat_data/set_a/murmur_noisymurmur_200_1308144251434_D.wav 60254 0.0 0.99999994
CUT New length =  5000
674 ./heartbeat_data/set_a/artifact__201106141701.wav 72000 0.0 0.99999994
CUT New length =  5000
675 ./heartbeat_data/set_a/Bunlabelledtest__167_1307111318050_C1.wav 30788 -0.0 1.0
CUT New length =  5000
676 ./heartbeat_data/set_a/murmur__254_1309350589009_A.wav 19006 -0.0 0.99999994
CUT New length =  5000
677 ./heartbeat_data/set_a/murmur__201108222226.wav 63485 -0.0 1.0
CUT New length =  5000
678 ./heartbeat_data/set_a/Bunlabelledtest__268_1309368960960_A.wav 86422 -0.0 1.0000001
CUT New length =  5000
679 ./heartbeat_data/set_a/Bunlabelledtest__260_1309353164458_A.wav 3

753 ./heartbeat_data/set_a/normal_noisynormal_123_1306331925797_B.wav 106238 -0.0 1.0
CUT New length =  5000
754 ./heartbeat_data/set_a/artifact__201106040947.wav 72000 0.0 1.0
CUT New length =  5000
755 ./heartbeat_data/set_a/normal__146_1306778707532_D3.wav 12504 -0.0 1.0
CUT New length =  5000
756 ./heartbeat_data/set_a/normal__146_1306778707532_B.wav 78206 -0.0 1.0
CUT New length =  5000
757 ./heartbeat_data/set_a/Bunlabelledtest__163_1307104470471_B.wav 20056 0.0 1.0
CUT New length =  5000
758 ./heartbeat_data/set_a/extrastole__144_1306522408528_B1.wav 26792 0.0 1.0
CUT New length =  5000
759 ./heartbeat_data/set_a/murmur__195_1308140095331_C1.wav 33904 0.0 0.99999994
CUT New length =  5000
760 ./heartbeat_data/set_a/normal__137_1306764999211_C.wav 48810 0.0 1.0
CUT New length =  5000
761 ./heartbeat_data/set_a/Aunlabelledtest__201012172010.wav 72000 0.0 1.0
CUT New length =  5000
762 ./heartbeat_data/set_a/Bunlabelledtest__163_1307104470471_D.wav 20816 0.0 1.0
CUT New length =  5