In [None]:
import sys
import numpy as np
import matplotlib
import librosa
from matplotlib import pyplot as plt
import librosa.display
from scipy import signal
from scipy.fft import fftshift
import IPython
import winsound
import soundfile


Ts = 0.01
Tf = 0.02
cmap_plot = plt.cm.bone_r

def getFIRFFT(H, order):
    NFFT = (len(H)-1)*2
    H = np.concatenate((H, H[-2:0:-1])) + np.zeros(NFFT)*(1j)
    h = np.fft.ifft(H, NFFT//2)
    order = min(order, NFFT-1)
    h = np.concatenate((h[len(h)-order//2:], h[0:order//2-1]))
    win = librosa.filters.get_window('hamming', len(h), fftbins=False)
    h = h*win
    return h.real

def drawFIRFFT(h, NFFT):
    plt.subplot(1,2,1)
    plt.plot(h)
    h_fft = np.abs(np.fft.fft(h, NFFT))
    plt.subplot(1,2,2)
    plt.plot(h_fft)
    plt.show()
    return h

# 오디오 파일 부르기
wavefile = 'D:/YYN/audio_/digitrec_ye/segmented/YouYeNa/3/kdigits0-3.wav'
x , sr = librosa.load(wavefile , sr=16000)
Ns = int(sr*Ts)  # 10ms 샘플 수
Nf = int(sr*Tf)  # 20ms 샘플 수

speechfile = 'D:/YYN/audio_/digitrec_ye/segmented/YouYeNa/3/kdigits0-3.wav'
noisefile  = 'D:/YYN/audio_/lab04_car.wav'
speech, sr_s = librosa.load(speechfile, sr=16000)
noise, sr_n = librosa.load(noisefile, sr=16000)

plt.subplot(2,1,1)
plt.plot(speech)
plt.title('speech')
plt.subplot(2,1,2)
plt.plot(noise)
plt.title('car noise')

speech_f = librosa.feature.melspectrogram(y=speech, sr=16000, hop_length=10, n_fft=1024)
noise_f = librosa.feature.melspectrogram(y=noise, sr=16000, hop_length=10, n_fft=1024)
plt.figure()
plt.subplot(2,1,1)
librosa.display.specshow(librosa.power_to_db(speech_f, ref=np.max), y_axis='mel', sr=sr, hop_length=10, x_axis='time')
plt.subplot(2,1,2)
librosa.display.specshow(librosa.power_to_db(noise_f, ref=np.max), y_axis='mel', sr=sr, hop_length=10, x_axis='time')

noise2 = noise[0:34774]
mix = speech + noise2
mix_f = librosa.feature.melspectrogram(y=noise, sr=16000, hop_length=10, n_fft=1024)
plt.figure()
plt.subplot(2,1,1)
plt.plot(mix)
plt.subplot(2,1,2)
librosa.display.specshow(librosa.power_to_db(mix_f, ref=np.max), y_axis='mel', sr=16000, hop_length=10, x_axis='time')

n = int(sr / 200)
H = np.array(([1.0] * n) + ([0.0] * int(Nf - 2 * n)) + ([1.0] * n))
filter = getFIRFFT(H, order=30)
fl = int(len(filter))

# wiener filter
mu = 0.3
delta = 1.0e-6
mix2= [0]
for i in range(0,int(sr/2),fl):
    mix1 = mix[i:fl+i]
    fix = filter * mix1
    error = speech[i:fl+i]-fix
    filter = filter + (mu/(delta+(mix1.T*mix1)))*mix1*error
    mix2.extend(mix1)

mix2 = np.array(mix2)
# mix_end = filter * mix1
mix_end_f = librosa.feature.melspectrogram(y=mix2, sr=16000, hop_length=10, n_fft=1024)

plt.figure()
plt.subplot(2,1,1)
plt.plot(mix2)
plt.title('noise remove')
plt.subplot(2, 1, 2)
librosa.display.specshow(librosa.power_to_db(mix_end_f, ref=np.max), y_axis='mel', sr=16000, hop_length=10, x_axis='time')
plt.title('noise remove')
plt.show()
