In [28]:
import librosa
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
from tqdm import tqdm
from PIL import Image

In [29]:
directory = 'archive/Respiratory_Sound_Database/Respiratory_Sound_Database/audio_and_txt_files'
audio_file_names = [file for file in os.listdir(directory) if file.endswith('.wav')]
print(audio_file_names)

['101_1b1_Al_sc_Meditron.wav', '101_1b1_Pr_sc_Meditron.wav', '102_1b1_Ar_sc_Meditron.wav', '103_2b2_Ar_mc_LittC2SE.wav', '104_1b1_Al_sc_Litt3200.wav', '104_1b1_Ar_sc_Litt3200.wav', '104_1b1_Ll_sc_Litt3200.wav', '104_1b1_Lr_sc_Litt3200.wav', '104_1b1_Pl_sc_Litt3200.wav', '104_1b1_Pr_sc_Litt3200.wav', '105_1b1_Tc_sc_Meditron.wav', '106_2b1_Pl_mc_LittC2SE.wav', '106_2b1_Pr_mc_LittC2SE.wav', '107_2b3_Al_mc_AKGC417L.wav', '107_2b3_Ar_mc_AKGC417L.wav', '107_2b3_Ll_mc_AKGC417L.wav', '107_2b3_Lr_mc_AKGC417L.wav', '107_2b3_Pl_mc_AKGC417L.wav', '107_2b3_Pr_mc_AKGC417L.wav', '107_2b3_Tc_mc_AKGC417L.wav', '107_2b4_Al_mc_AKGC417L.wav', '107_2b4_Ar_mc_AKGC417L.wav', '107_2b4_Ll_mc_AKGC417L.wav', '107_2b4_Lr_mc_AKGC417L.wav', '107_2b4_Pl_mc_AKGC417L.wav', '107_2b4_Pr_mc_AKGC417L.wav', '107_2b4_Tc_mc_AKGC417L.wav', '107_2b5_Al_mc_AKGC417L.wav', '107_2b5_Ar_mc_AKGC417L.wav', '107_2b5_Ll_mc_AKGC417L.wav', '107_2b5_Lr_mc_AKGC417L.wav', '107_2b5_Pl_mc_AKGC417L.wav', '107_2b5_Pr_mc_AKGC417L.wav', '107_2b5_

In [30]:
audio_files = {}
audio_files_sr = {}
for file_name in audio_file_names:
    audio_files[file_name], audio_files_sr[file_name] = librosa.load(f'archive/Respiratory_Sound_Database/Respiratory_Sound_Database/audio_and_txt_files/{file_name}')

In [31]:
target_duration = 20  
audio_padded_base = {}

for file_name in audio_files:
    audio_padded_base[file_name] = librosa.util.fix_length(audio_files[file_name], size=target_duration*audio_files_sr[file_name])

In [32]:
def generate_mel_features(audio_padded_base, audio_files_sr, audio_file_names):
    spectrograms_base = {}
    spectrograms_dB_base = {}
    mfcc_base = {}
    for file_name in tqdm(audio_file_names):
        spectrograms_base[file_name] = librosa.feature.melspectrogram(y=audio_padded_base[file_name], sr=audio_files_sr[file_name], n_mels=24)
        spectrograms_dB_base[file_name] = librosa.power_to_db(spectrograms_base[file_name], ref=np.max)
        mfcc_base[file_name] = librosa.feature.mfcc(S=spectrograms_dB_base[file_name], n_mfcc=24)
    
    return spectrograms_base, spectrograms_dB_base, mfcc_base

In [33]:
def generate_chroma_features(audio_padded_base, audio_files_sr, audio_file_names):
    chromagrams_base = {}
    cens_base = {}
    for file_name in tqdm(audio_file_names):
        chromagrams_base[file_name] = librosa.feature.chroma_stft(y=audio_padded_base[file_name], sr=audio_files_sr[file_name])
        cens_base[file_name] = librosa.feature.chroma_cens(C=chromagrams_base[file_name])
    
    return chromagrams_base, cens_base

In [34]:
def save_figures(data, audio_file_names, y_axis, save_name):
    #y_axis = 'mel' or 'chroma'
    for file_name in tqdm(audio_file_names):
        librosa.display.specshow(data[file_name], x_axis='time', y_axis=y_axis)
        plt.axis('off')  # Turn off axes
        plt.xticks([])  # Remove x-axis ticks
        plt.yticks([])  # Remove y-axis ticks
        plt.savefig(f'{save_name}/{file_name[:-4]}.png', bbox_inches='tight', pad_inches=0)
        plt.close()

In [35]:
def gen_save_pipeline(audio_padded_base, audio_files_sr, audio_file_names, category):
    print(f'Generating features...')
    spectrograms_base, spectrograms_dB_base, mfcc_base = generate_mel_features(audio_padded_base, audio_files_sr, audio_file_names)
    chromagrams_base, cens_base = generate_chroma_features(audio_padded_base, audio_files_sr, audio_file_names)

    print(f'Features generated. Saving figures...')
    save_figures(spectrograms_dB_base, audio_file_names, 'mel', f'{category}_spectrograms')
    save_figures(mfcc_base, audio_file_names, 'mel', f'{category}_mfcc')
    save_figures(chromagrams_base, audio_file_names, 'chroma', f'{category}_chroma')
    save_figures(cens_base, audio_file_names, 'chroma', f'{category}_cens')
    print(f'Done!')

In [36]:
def generate_noisy_audio(audio_padded_base, audio_files_sr, audio_file_names, noise_factor=0.01):
    #random_factor = (0, 0.1)
    audio_padded_noisy = {}
    for file_name in tqdm(audio_files):
        audio_padded_noisy[file_name] = audio_padded_base[file_name] + np.random.normal(0, noise_factor, audio_padded_base[file_name].shape)
    
    return audio_padded_noisy

In [37]:
def generate_timeshifted_audio(audio_padded_base, audio_files_sr, audio_file_names, random_factor=0.1):
    #random_factor = (0, 0.5]
    audio_timeshifted = {}
    for file_name in tqdm(audio_files):
        audio_timeshifted[file_name] = np.roll(audio_padded_base[file_name], int(audio_files_sr[file_name]*np.random.normal(0, 2*random_factor)*np.random.choice([-1, 1])))
    
    return audio_timeshifted

In [38]:
def generate_timestretched_audio(audio_padded_base, audio_files_sr, audio_file_names, random_factor=0.6):
    #random_factor = (0, 1)
    audio_timestretched = {}
    for file_name in tqdm(audio_files):
        audio_timestretched[file_name] = librosa.util.fix_length(librosa.effects.time_stretch(audio_padded_base[file_name], rate=abs(np.random.normal(1, 2*random_factor))), size=20*audio_files_sr[file_name])
    
    return audio_timestretched

In [39]:
def generate_pitchshifted_audio(audio_padded_base, audio_files_sr, audio_file_names, random_factor=4):
    # random_factor = [0,12]
    audio_pitchshifted = {}
    for file_name in tqdm(audio_files):
        audio_pitchshifted[file_name] = librosa.effects.pitch_shift(audio_padded_base[file_name], sr=audio_files_sr[file_name], n_steps=np.random.normal(0, 2*random_factor))
    
    return audio_pitchshifted

In [40]:
spectrograms_base, spectrograms_dB_base, mfcc_base = generate_mel_features(audio_padded_base, audio_files_sr, audio_file_names)
chromagrams_base, cens_base = generate_chroma_features(audio_padded_base, audio_files_sr, audio_file_names)

100%|██████████| 920/920 [00:25<00:00, 35.81it/s]
  return pitch_tuning(
100%|██████████| 920/920 [00:53<00:00, 17.19it/s]


In [41]:
spectrograms_dB_base_flattened = {}
mfcc_base_flattened = {}
chromagrams_base_flattened = {}
cens_base_flattened = {}

for file_name in audio_file_names:
    spectrograms_dB_base_flattened[file_name] = spectrograms_dB_base[file_name].reshape(-1,)
    mfcc_base_flattened[file_name] = mfcc_base[file_name].reshape(-1,)
    chromagrams_base_flattened[file_name] = chromagrams_base[file_name].reshape(-1,)
    cens_base_flattened[file_name] = cens_base[file_name].reshape(-1,)

In [42]:
spectrograms_dB_base_flattened_df = pd.DataFrame(spectrograms_dB_base_flattened).T
mfcc_base_flattened_df = pd.DataFrame(mfcc_base_flattened).T
chromagrams_base_flattened_df = pd.DataFrame(chromagrams_base_flattened).T
cens_base_flattened_df = pd.DataFrame(cens_base_flattened).T

In [43]:
spectrograms_dB_base_flattened_df.shape

(920, 20688)

In [44]:
# spectrograms_dB_base_flattened_df = (spectrograms_dB_base_flattened_df - spectrograms_dB_base_flattened_df.mean())/spectrograms_dB_base_flattened_df.std()
# mfcc_base_flattened_df = (mfcc_base_flattened_df - mfcc_base_flattened_df.mean())/mfcc_base_flattened_df.std()
# chromagrams_base_flattened_df = (chromagrams_base_flattened_df - chromagrams_base_flattened_df.mean())/chromagrams_base_flattened_df.std()
# cens_base_flattened_df = (cens_base_flattened_df - cens_base_flattened_df.mean())/cens_base_flattened_df.std()

In [45]:
# filenames = []
# for file_name in audio_file_names:
#     filenames.append(file_name[:-4])

In [46]:
# spectrograms_dB_base_flattened_df['filenames'] = filenames
# mfcc_base_flattened_df['filenames'] = filenames
# chromagrams_base_flattened_df['filenames'] = filenames
# cens_base_flattened_df['filenames'] = filenames

In [47]:
targets = pd.read_csv('pno_filename_to_diagnoses.csv')['diagnosis']
targets = list(targets)
targets

In [48]:
spectrograms_dB_base_flattened_df['diagnosis'] = targets
mfcc_base_flattened_df['diagnosis'] = targets
chromagrams_base_flattened_df['diagnosis'] = targets
cens_base_flattened_df['diagnosis'] = targets

In [49]:
# cens_base_flattened_df = cens_base_flattened_df.dropna(axis=1)

In [50]:
# spectrograms_dB_base_flattened_df.to_csv('spectrograms_dB_base_flattened.csv', index=False)
# mfcc_base_flattened_df.to_csv('mfcc_base_flattened.csv', index=False)
# chromagrams_base_flattened_df.to_csv('chromagrams_base_flattened.csv', index=False)
# cens_base_flattened_df.to_csv('cens_base_flattened.csv', index=False)

In [51]:
spectrograms_dB_aug_flattened_df = spectrograms_dB_base_flattened_df.copy()
mfcc_aug_flattened_df = mfcc_base_flattened_df.copy()
chromagrams_aug_flattened_df = chromagrams_base_flattened_df.copy()
cens_aug_flattened_df = cens_base_flattened_df.copy()

In [52]:
categories_all = {'noisy':generate_noisy_audio, 'timeshifted':generate_timeshifted_audio, 'generate_timestretched_audio':generate_timestretched_audio, 'pitchshifted':generate_pitchshifted_audio}
categories = {'noisy':generate_noisy_audio, 'timeshifted':generate_timeshifted_audio, 'pitchshifted':generate_pitchshifted_audio}

for category in categories.keys():
    print(f'Generating {category} data...')
    data = categories[category](audio_padded_base, audio_files_sr, audio_file_names)
    print(f'Data generated.')
    
    spectrograms_aug, spectrograms_dB_aug, mfcc_aug = generate_mel_features(data, audio_files_sr, audio_file_names)
    chromagrams_aug, cens_aug = generate_chroma_features(data, audio_files_sr, audio_file_names)

    spectrograms_dB_aug_flattened = {}
    mfcc_aug_flattened = {}
    chromagrams_aug_flattened = {}
    cens_aug_flattened = {}

    for file_name in audio_file_names:
        spectrograms_dB_aug_flattened[file_name] = spectrograms_dB_aug[file_name].reshape(-1,)
        mfcc_aug_flattened[file_name] = mfcc_aug[file_name].reshape(-1,)
        chromagrams_aug_flattened[file_name] = chromagrams_aug[file_name].reshape(-1,)
        cens_aug_flattened[file_name] = cens_aug[file_name].reshape(-1,)
    
    spectrograms_dB_aug_flattened_df_cur = pd.DataFrame(spectrograms_dB_aug_flattened).T
    mfcc_aug_flattened_df_cur = pd.DataFrame(mfcc_aug_flattened).T
    chromagrams_aug_flattened_df_cur = pd.DataFrame(chromagrams_aug_flattened).T
    cens_aug_flattened_df_cur = pd.DataFrame(cens_aug_flattened).T

    spectrograms_dB_aug_flattened_df_cur['diagnosis'] = targets
    mfcc_aug_flattened_df_cur['diagnosis'] = targets
    chromagrams_aug_flattened_df_cur['diagnosis'] = targets
    cens_aug_flattened_df_cur['diagnosis'] = targets

    spectrograms_dB_aug_flattened_df = pd.concat([spectrograms_dB_aug_flattened_df, spectrograms_dB_aug_flattened_df_cur], axis=0)
    mfcc_aug_flattened_df = pd.concat([mfcc_aug_flattened_df, mfcc_aug_flattened_df_cur], axis=0)
    chromagrams_aug_flattened_df = pd.concat([chromagrams_aug_flattened_df, chromagrams_aug_flattened_df_cur], axis=0)
    cens_aug_flattened_df = pd.concat([cens_aug_flattened_df, cens_aug_flattened_df_cur], axis=0)

Generating noisy data...


100%|██████████| 920/920 [00:11<00:00, 78.73it/s]


Data generated.


100%|██████████| 920/920 [00:31<00:00, 29.37it/s]
  return pitch_tuning(
100%|██████████| 920/920 [01:17<00:00, 11.88it/s]


Generating timeshifted data...


100%|██████████| 920/920 [00:00<00:00, 1478.97it/s]


Data generated.


100%|██████████| 920/920 [00:31<00:00, 29.34it/s]
  return pitch_tuning(
100%|██████████| 920/920 [01:05<00:00, 14.10it/s]


Generating pitchshifted data...


100%|██████████| 920/920 [03:23<00:00,  4.52it/s]


Data generated.


100%|██████████| 920/920 [00:27<00:00, 33.73it/s]
  return pitch_tuning(
100%|██████████| 920/920 [00:58<00:00, 15.74it/s]


In [53]:
spectrograms_dB_aug_flattened_df.shape

(3680, 20689)

In [55]:
spectrograms_dB_aug_flattened_df = (spectrograms_dB_aug_flattened_df -spectrograms_dB_aug_flattened_df.mean())/spectrograms_dB_aug_flattened_df.std()
mfcc_aug_flattened_df = (mfcc_aug_flattened_df -mfcc_aug_flattened_df.mean())/mfcc_aug_flattened_df.std()
chromagrams_aug_flattened_df = (chromagrams_aug_flattened_df -chromagrams_aug_flattened_df.mean())/chromagrams_aug_flattened_df.std()
cens_aug_flattened_df = (cens_aug_flattened_df -cens_aug_flattened_df.mean())/cens_aug_flattened_df.std()

  spectrograms_dB_aug_flattened_df = (spectrograms_dB_aug_flattened_df -spectrograms_dB_aug_flattened_df.mean())/spectrograms_dB_aug_flattened_df.std()
  spectrograms_dB_aug_flattened_df = (spectrograms_dB_aug_flattened_df -spectrograms_dB_aug_flattened_df.mean())/spectrograms_dB_aug_flattened_df.std()
  mfcc_aug_flattened_df = (mfcc_aug_flattened_df -mfcc_aug_flattened_df.mean())/mfcc_aug_flattened_df.std()
  mfcc_aug_flattened_df = (mfcc_aug_flattened_df -mfcc_aug_flattened_df.mean())/mfcc_aug_flattened_df.std()
  chromagrams_aug_flattened_df = (chromagrams_aug_flattened_df -chromagrams_aug_flattened_df.mean())/chromagrams_aug_flattened_df.std()
  chromagrams_aug_flattened_df = (chromagrams_aug_flattened_df -chromagrams_aug_flattened_df.mean())/chromagrams_aug_flattened_df.std()
  cens_aug_flattened_df = (cens_aug_flattened_df -cens_aug_flattened_df.mean())/cens_aug_flattened_df.std()
  cens_aug_flattened_df = (cens_aug_flattened_df -cens_aug_flattened_df.mean())/cens_aug_flattened_d

In [60]:
spectrograms_dB_aug_flattened_df = spectrograms_dB_aug_flattened_df.dropna(axis=1)
mfcc_aug_flattened_df = mfcc_aug_flattened_df.dropna(axis=1)
chromagrams_aug_flattened_df = chromagrams_aug_flattened_df.dropna(axis=1)
cens_aug_flattened_df = cens_aug_flattened_df.dropna(axis=1)

In [64]:
targets4 = targets + targets + targets + targets

In [65]:
spectrograms_dB_aug_flattened_df['diagnosis'] = targets4
mfcc_aug_flattened_df['diagnosis'] = targets4
chromagrams_aug_flattened_df['diagnosis'] = targets4
cens_aug_flattened_df['diagnosis'] = targets4

In [66]:
cens_aug_flattened_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4301,4302,4303,4304,4305,4306,4307,4308,4309,diagnosis
101_1b1_Al_sc_Meditron.wav,-0.644952,-0.681464,-0.71455,-0.74419,-0.770331,-0.79267,-0.811053,-0.825705,-0.836818,-0.844299,...,-0.549495,-0.542072,-0.528636,-0.511734,-0.490573,-0.469935,-0.445536,-0.421808,-0.394536,URTI
101_1b1_Pr_sc_Meditron.wav,-0.057968,-0.070735,-0.081269,-0.090314,-0.098492,-0.106362,-0.114396,-0.122967,-0.132374,-0.142513,...,-0.332613,-0.319726,-0.304935,-0.288412,-0.268456,-0.248629,-0.225605,-0.202658,-0.176072,URTI
102_1b1_Ar_sc_Meditron.wav,-0.157594,-0.16324,-0.169198,-0.175774,-0.183228,-0.191821,-0.201769,-0.213131,-0.225932,-0.240281,...,0.119933,0.155839,0.189829,0.221491,0.250889,0.276517,0.298781,0.31797,0.337209,Healthy
103_2b2_Ar_mc_LittC2SE.wav,-0.485933,-0.508909,-0.530397,-0.550472,-0.569241,-0.586844,-0.603305,-0.61851,-0.632439,-0.64517,...,-1.533696,-1.548045,-1.561627,-1.575201,-1.586271,-1.607632,-1.629972,-1.661775,-1.692703,Asthma
104_1b1_Al_sc_Litt3200.wav,0.213622,0.235877,0.2573,0.278909,0.299577,0.318335,0.334682,0.34691,0.351904,0.348482,...,-4.297328,-4.280075,-4.259767,-4.236438,-4.198872,-4.169642,-4.127113,-4.093511,-4.052101,COPD


In [67]:
spectrograms_dB_aug_flattened_df.to_csv('spectrograms_dB_aug_flattened.csv', index=False)
mfcc_aug_flattened_df.to_csv('mfcc_aug_flattened.csv', index=False)
chromagrams_aug_flattened_df.to_csv('chromagrams_aug_flattened.csv', index=False)
cens_aug_flattened_df.to_csv('cens_aug_flattened.csv', index=False)

In [68]:
df_targets = pd.DataFrame(targets)