In [None]:
import asyncio
import pyaudio
from numpy.typing import NDArray
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from scipy import signal
import time
import enlighten

loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)

plt.rcParams["figure.figsize"] = (12, 9)


def background(f):
    def wrapped(*args, **kwargs):
        global loop
        return loop.run_in_executor(None, f, *args, **kwargs)

    return wrapped


def next_power_of_2(x):
    return 1 if x == 0 else 2 ** (x - 1).bit_length()


def spectral_subtraction(rate: int, noisy: NDArray):
    fft = abs(np.fft.fft(noisy))
    len_ = 20 * rate // 1000  # frame size in samples
    PERC = 50  # window overlap in percent of frame
    len1 = len_ * PERC // 100  # overlap'length
    len2 = len_ - len1  # window'length - overlap'length

    # setting default parameters
    Thres = 3  # VAD threshold in dB SNRseg
    Expnt = 1.0  # exp(Expnt)
    G = 0.9

    # initial Hamming window
    win = np.hamming(len_)
    # normalization gain for overlap+add with 50% overlap
    winGain = len2 / sum(win)

    # nFFT = 2 * 2 ** (nextpow2.nextpow2(len_))
    nFFT = 2 * next_power_of_2(len_)
    noise_mean = np.zeros(nFFT)
    j = 1
    for k in range(1, 6):
        noise_mean = noise_mean + abs(np.fft.fft(win * noisy[j : j + len_], nFFT))
        j = j + len_
    noise_mu = noise_mean / 5

    # initialize various variables
    k = 1
    img = 1j
    x_old = np.zeros(len1)
    Nframes = len(noisy) // len2 - 1
    xfinal = np.zeros(noisy.shape[0])

    # === Start Processing === #
    for n in range(0, Nframes):
        # Windowing
        insign = win * noisy[k - 1 : k + len_ - 1]
        # compute fourier transform of a frame
        spec = np.fft.fft(insign, nFFT)
        # compute the magnitude
        sig = abs(spec)
        # save the noisy phase information
        theta = np.angle(spec)
        # SNR
        SNRseg = 10 * np.log10(
            np.linalg.norm(sig, 2) ** 2 / np.linalg.norm(noise_mu, 2) ** 2
        )

        # --- spectral subtraction --- #
        sub_speech = sig**Expnt - noise_mu**Expnt
        # the pure signal is less than the noise signal power
        diffw = sig**Expnt - noise_mu**Expnt

        # beta negative components
        def find_index(x_list):
            index_list = []
            for i in range(len(x_list)):
                if x_list[i] < 0:
                    index_list.append(i)
            return index_list

        z = find_index(diffw)
        if len(z) > 0:
            sub_speech[z] = 0

        # --- implement a simple VAD detector --- #
        if SNRseg < Thres:  # Update noise spectrum
            noise_temp = (
                G * noise_mu**Expnt + (1 - G) * sig**Expnt
            )  # Smoothing processing noise power spectrum
            noise_mu = noise_temp ** (1 / Expnt)  # New noise amplitude spectrum

        # add phase
        x_phase = (sub_speech ** (1 / Expnt)) * np.exp(img * theta)
        # take the IFFT
        xi = np.fft.ifft(x_phase).real

        # --- Overlap and add --- #
        xfinal[k - 1 : k + len2 - 1] = x_old + xi[0:len1]
        x_old = xi[0 + len1 : len_]

        k = k + len2

    xfinal[k - 1 : k + len2 - 1] = x_old

    return (winGain * xfinal).astype(noisy.dtype)


chunk_seconds = 5

p = pyaudio.PyAudio()
rate, data = wavfile.read("./Noise2_mono.wav")
data = np.trim_zeros(data, "f")
chunk = rate * chunk_seconds
nsamples = len(data)
data_seconds = nsamples / rate

result = np.zeros(data.shape, dtype=np.int16)
current_frame = 0
next_chunk_frame = 0

spec_t_offset = 0
spec_t = np.array([])
spec_sxx = np.zeros(shape=(129, 0))

bar_format1 = (
    "{desc}{desc_pad}{percentage:3.0f}%|{bar}| "
    + "{count:{len_total}d} "
    + "[{rate:.2f}{unit_pad}{unit}/s]"
)
manager = enlighten.get_manager()
processed = manager.counter(
    total=nsamples,
    desc="Processing",
    unit="frames",
    color="green",
    bar_format=bar_format1,
)
bar_format2 = (
    "{desc}{desc_pad}{percentage:3.0f}%|{bar}| "
    + "{count:{len_total}d} "
    + "[{rate:.2f}{unit_pad}{unit}/s]"
)
playing = manager.counter(
    total=nsamples,
    desc="Playing",
    unit="frames",
    color="white",
    bar_format=bar_format2,
)


@background
def process(rate, data):
    global result
    # global spec_t_offset
    # global spec_t
    # global spec_sxx
    global processed
    global playing
    result[current_frame : current_frame + chunk] = spectral_subtraction(
        rate, (data[current_frame : current_frame + chunk]).astype(np.int16)
    )
    processed.update(chunk)

    # f, t, sxx = signal.spectrogram(
    #     result[current_frame : current_frame + chunk], fs=rate
    # )
    # spec_t = np.append(spec_t, t + spec_t_offset)
    # spec_t_offset += chunk_seconds
    # spec_sxx = np.append(spec_sxx, sxx, axis=1)
    # plt.pcolormesh(spec_t, f, 10 * np.log10(spec_sxx), shading="gouraud")
    # plt.xlim((0, 60))
    # display(plt.gcf())
    # plt.close("all")


def callback(in_data, frame_count, time_info, status):
    global current_frame
    global result
    global next_chunk_frame
    global current_frame
    global playing

    while current_frame + chunk >= next_chunk_frame:
        process(rate, data)
        next_chunk_frame += chunk
    playing.update(frame_count)
    current_frame += frame_count
    return (
        result[current_frame - frame_count : current_frame],
        pyaudio.paContinue,
    )


player = p.open(
    format=pyaudio.paInt16,
    channels=1,
    rate=rate,
    output=True,
    stream_callback=callback,
)
while player.is_active():
    time.sleep(0.1)

player.close()

p.terminate()

In [90]:

player.close()
p.terminate()