## Coverting Mel Spectogram back to Audio Signal and calculating SNR
- 19/08/2019
- By Karl Michel Koerich

In [36]:
#Imports

import librosa
import math
import librosa.display
import IPython.display
import numpy as np
import os

In [37]:

nb_classes = 10
sr = 22050
frame_size = 110250


In [68]:
#Indicate folds
train_fold = [1, 2]
test_fold = [3]

In [69]:
# Put all filenames into a list

test_filename_list  = []
base_path = "gtzan_3f_Win_110250_75/"

j  = 0 
ii = 0
for ii in test_fold:

    path = base_path+"fold"+str(ii)

    for path, subdirs, files in os.walk( path ):
        for name in files:
            test_filename_list.append( os.path.join(path, name) )
            j += 1
    print("Test files processed: "+"fold"+str(ii)+" "+str(j) )

Test files processed: fold3 6929


In [70]:
# Load 5 second Original Mel Spec

X_valid         = np.load( "folds_mf/2_GTzan_Xs_test_fold"+str(test_fold[0])+"_110250_75_frozen.npy")
X_sample = X_valid[9:10]  #Arbitrary value


In [71]:

X_sample = np.squeeze(X_sample)
print(X_sample.shape)

len_X = X_valid.shape[0]
print(len_X)


(64, 431)
6929


In [72]:
#Load MF spectrogram wave paths

mf_list  = []
base_path = "gtzan_reconstruct/mf/"

j  = 0 
ii = 0
for ii in test_fold:

    path = base_path+"fold"+str(ii)

    for path, subdirs, files in os.walk( path ):
        for name in files:
            mf_list.append(os.path.join(path, name))
            j += 1
    print("Reconstructed waves processed: "+"fold "+str(ii)+" - "+str(j) )

Reconstructed waves processed: fold 3 - 6929


In [73]:
#Load MF BIM spectrogram wave paths

mf_bim_list  = []
base_path = "gtzan_reconstruct/mf_BIM/"

j  = 0 
ii = 0
for ii in test_fold:

    path = base_path+"fold"+str(ii)

    for path, subdirs, files in os.walk( path ):
        for name in files:
            mf_bim_list.append(os.path.join(path, name))
            j += 1
    print("Reconstructed waves processed: "+"fold "+str(ii)+" - "+str(j) )

Reconstructed waves processed: fold 3 - 6929


In [74]:
#Load MF FGSM spectrogram wave paths

mf_fgsm_list  = []
base_path = "gtzan_reconstruct/mf_FGSM/"

j  = 0 
ii = 0
for ii in test_fold:

    path = base_path+"fold"+str(ii)

    for path, subdirs, files in os.walk( path ):
        for name in files:
            mf_fgsm_list.append(os.path.join(path, name))
            j += 1
    print("Reconstructed waves processed: "+"fold "+str(ii)+" - "+str(j) )

Reconstructed waves processed: fold 3 - 6929


In [75]:
#Power function

def power(v):
    n = 0
    s = 0
    for vn in v:
        square = vn*vn
        s += square
        n += 1
    return math.sqrt(s/n)

In [76]:
#Reconstructing original mel and attacked mel specs

snr_wave_mf   = np.ones((len_X, 1), dtype=float)
snr_mf_bim    = np.ones((len_X, 1), dtype=float)
snr_mf_fgsm   = np.ones((len_X, 1), dtype=float)
snr_wave_bim  = np.ones((len_X, 1), dtype=float)
snr_wave_fgsm = np.ones((len_X, 1), dtype=float)

for i in range(0, len_X):
    
    #POWER ORIGINAL
    file_wave, sr = librosa.load(test_filename_list[i])
    power_wave = power(file_wave[:110080])
        
    #POWER MF
    audio_rec_mf, sr = librosa.load(mf_list[i])
    power_mf = power(audio_rec_mf)
    
    #POWER MF_BIM
    audio_rec_bim, sr = librosa.load(mf_bim_list[i])
    power_mf_bim = power(audio_rec_bim)
    
    #POWER MF_FGSM
    audio_rec_fgsm, sr = librosa.load(mf_fgsm_list[i])
    power_mf_fgsm = power(audio_rec_fgsm)
    
    #POWER NOISE ORIGINAL-MF
    power_wave_m_mf = abs(power_wave-power_mf)
    quo = power_wave/power_wave_m_mf    
    snr = 20*math.log(quo, 10)
    snr_wave_mf[i] = snr
    
    #POWER NOISE MF-BIM
    power_mf_m_bim = abs(power_mf-power_mf_bim)
    quo = power_mf/power_mf_m_bim    
    snr = 20*math.log(quo, 10)
    snr_mf_bim[i] = snr
    
    #POWER NOISE MF-FGSM
    power_mf_m_fgsm = abs(power_mf-power_mf_fgsm)
    quo = power_mf/power_mf_m_fgsm   
    snr = 20*math.log(quo, 10)
    snr_mf_fgsm[i] = snr
    
    #POWER NOISE ORIGINAL-BIM
    power_wave_m_bim = abs(power_wave-power_mf_bim)
    quo = power_wave/power_wave_m_bim    
    snr = 20*math.log(quo, 10)
    snr_wave_bim[i] = snr
    
    #POWER NOISE ORIGINAL-FGSM
    power_wave_m_fgsm = abs(power_wave-power_mf_fgsm)
    quo = power_wave/power_wave_m_fgsm   
    snr = 20*math.log(quo, 10)
    snr_wave_fgsm[i] = snr
    
    if (i%50 == 0.000):
        print("Files read: " + str(i) ) 
        print("SNR WAVE-MF:", snr_wave_mf[i])
        print("SNR MF-BIM :", snr_mf_bim[i])
        print("SNR MF-FGSM:", snr_mf_fgsm[i])
        print("SNR WAVE-BIM :", snr_wave_bim[i])
        print("SNR WAVE-FGSM:", snr_wave_fgsm[i])


Files read: 0
SNR WAVE-MF: [11.84858773]
SNR MF-BIM : [39.21224621]
SNR MF-FGSM: [37.45661833]
SNR WAVE-BIM : [12.13007092]
SNR WAVE-FGSM: [12.19439351]
Files read: 50
SNR WAVE-MF: [35.34789873]
SNR MF-BIM : [40.74938575]
SNR MF-FGSM: [39.24684604]
SNR WAVE-BIM : [41.86476208]
SNR WAVE-FGSM: [43.92385862]
Files read: 100
SNR WAVE-MF: [10.87503057]
SNR MF-BIM : [55.70334667]
SNR MF-FGSM: [51.04883435]
SNR WAVE-BIM : [10.83952831]
SNR WAVE-FGSM: [10.93603954]
Files read: 150
SNR WAVE-MF: [2.85915282]
SNR MF-BIM : [47.34989578]
SNR MF-FGSM: [44.09846705]
SNR WAVE-BIM : [2.94867285]
SNR WAVE-FGSM: [2.73061286]
Files read: 200
SNR WAVE-MF: [9.65110672]
SNR MF-BIM : [40.97541694]
SNR MF-FGSM: [35.47548849]
SNR WAVE-BIM : [9.81076248]
SNR WAVE-FGSM: [9.95432411]
Files read: 250
SNR WAVE-MF: [30.83693234]
SNR MF-BIM : [43.29957284]
SNR MF-FGSM: [48.94056565]
SNR WAVE-BIM : [33.27798167]
SNR WAVE-FGSM: [32.02632284]
Files read: 300
SNR WAVE-MF: [10.98149658]
SNR MF-BIM : [36.15170005]
SNR MF-FG

Files read: 2700
SNR WAVE-MF: [5.701724]
SNR MF-BIM : [38.0396079]
SNR MF-FGSM: [30.58040881]
SNR WAVE-BIM : [5.80331986]
SNR WAVE-FGSM: [5.94345244]
Files read: 2750
SNR WAVE-MF: [5.78714396]
SNR MF-BIM : [35.84036638]
SNR MF-FGSM: [35.34609359]
SNR WAVE-BIM : [5.92094786]
SNR WAVE-FGSM: [5.92884706]
Files read: 2800
SNR WAVE-MF: [13.00211428]
SNR MF-BIM : [60.23599713]
SNR MF-FGSM: [45.47788447]
SNR WAVE-BIM : [13.03147845]
SNR WAVE-FGSM: [13.163933]
Files read: 2850
SNR WAVE-MF: [26.0837201]
SNR MF-BIM : [45.3075554]
SNR MF-FGSM: [43.0292553]
SNR WAVE-BIM : [27.03678018]
SNR WAVE-FGSM: [27.34426773]
Files read: 2900
SNR WAVE-MF: [1.33063791]
SNR MF-BIM : [42.16292455]
SNR MF-FGSM: [38.67105255]
SNR WAVE-BIM : [1.18522717]
SNR WAVE-FGSM: [1.11416313]
Files read: 2950
SNR WAVE-MF: [6.6040119]
SNR MF-BIM : [45.76982521]
SNR MF-FGSM: [33.99294557]
SNR WAVE-BIM : [6.65507457]
SNR WAVE-FGSM: [6.80384055]
Files read: 3000
SNR WAVE-MF: [17.09637927]
SNR MF-BIM : [50.67168397]
SNR MF-FGSM: [

Files read: 5400
SNR WAVE-MF: [5.56600464]
SNR MF-BIM : [43.08167155]
SNR MF-FGSM: [41.15498381]
SNR WAVE-BIM : [5.39123931]
SNR WAVE-FGSM: [5.34837699]
Files read: 5450
SNR WAVE-MF: [8.39966429]
SNR MF-BIM : [37.30415789]
SNR MF-FGSM: [60.2745898]
SNR WAVE-BIM : [8.59496858]
SNR WAVE-FGSM: [8.41339395]
Files read: 5500
SNR WAVE-MF: [16.12914654]
SNR MF-BIM : [48.61941354]
SNR MF-FGSM: [42.34921896]
SNR WAVE-BIM : [16.30491988]
SNR WAVE-FGSM: [16.49489925]
Files read: 5550
SNR WAVE-MF: [18.39351351]
SNR MF-BIM : [42.26317135]
SNR MF-FGSM: [48.43845469]
SNR WAVE-BIM : [18.89723462]
SNR WAVE-FGSM: [18.63727984]
Files read: 5600
SNR WAVE-MF: [0.29256592]
SNR MF-BIM : [40.55085281]
SNR MF-FGSM: [36.98179907]
SNR WAVE-BIM : [0.1282937]
SNR WAVE-FGSM: [0.04598975]
Files read: 5650
SNR WAVE-MF: [9.39468475]
SNR MF-BIM : [41.84109776]
SNR MF-FGSM: [44.24151447]
SNR WAVE-BIM : [9.12150806]
SNR WAVE-FGSM: [9.18668645]
Files read: 5700
SNR WAVE-MF: [9.10671098]
SNR MF-BIM : [50.70447738]
SNR MF-F

In [77]:
# SNR WAVE/MF

print("Fold: " + str(test_fold[0]))
print("Mean SNR: " + str(np.mean(snr_wave_mf)))
print("Stnd dev: " + str(np.std(snr_wave_mf)))

Fold: 3
Mean SNR: 8.420988704047799
Stnd dev: 11.242556100784293


In [78]:
# SNR MF/BIM

print("Fold: " + str(test_fold[0]))
print("Mean SNR: " + str(np.mean(snr_mf_bim)))
print("Stnd dev: " + str(np.std(snr_mf_bim)))

Fold: 3
Mean SNR: 44.47626363974939
Stnd dev: 9.634049154741913


In [79]:
# SNR MF/FGSM

print("Fold: " + str(test_fold[0]))
print("Mean SNR: " + str(np.mean(snr_mf_fgsm)))
print("Stnd dev: " + str(np.std(snr_mf_fgsm)))

Fold: 3
Mean SNR: 43.046974037856955
Stnd dev: 9.157862517892855


In [80]:
# SNR WAVE/BIM

print("Fold: " + str(test_fold[0]))
print("Mean SNR: " + str(np.mean(snr_wave_bim)))
print("Stnd dev: " + str(np.std(snr_wave_bim)))

Fold: 3
Mean SNR: 8.365710494981252
Stnd dev: 11.219738212407641


In [81]:
# SNR WAVE/FGSM

print("Fold: " + str(test_fold[0]))
print("Mean SNR: " + str(np.mean(snr_wave_fgsm)))
print("Stnd dev: " + str(np.std(snr_wave_fgsm)))

Fold: 3
Mean SNR: 8.354530837127609
Stnd dev: 11.288328700484334


In [None]:
#Play sound from original reconstructed signal

IPython.display.Audio(data=audio_rec_original, rate=sr)


In [None]:
#Play sound from attacked reconstructed signal

IPython.display.Audio(data=audio_rec_attacked_bim, rate=sr)

In [None]:
#Difference

IPython.display.Audio(data=audio_rec_original-audio_rec_attacked_bim, rate=sr)