In [4]:
from datetime import datetime

import numpy as np
import noisereduce as nr
from scipy.signal import istft, stft
import sounddevice as sd

### Default Devices

In [5]:
print(sd.query_devices())
print(f'\nDefault Devices for SD library:\n{sd.default.device}')

  0 RTK UHD HDR, Core Audio (0 in, 2 out)
  1 External Headphones, Core Audio (0 in, 2 out)
  2 MacBook Pro Microphone, Core Audio (1 in, 0 out)
< 3 MacBook Pro Speakers, Core Audio (0 in, 2 out)
> 4 krisp microphone, Core Audio (1 in, 0 out)
  5 krisp speaker, Core Audio (0 in, 1 out)

Default Devices for SD library:
[4, 3]


In [6]:
sd.default.device = [4, 1]

### Set Defaults

In [7]:
sd.default.samplerate = 44100
sd.default.channels = 1
sd.default.blocksize = 0

### Latency Analysis

Changing the blocksize in the stream directly affects the latency. I ran a few simulations with different blocksizes and its clear that it needs to be 512 or smaller when processing each chunk on the fly.

|blocksize| block processing time |
|:---:|:---------:|
|  0  |     0.997 |
| 512 |    11.587 |
|1024|    23.184 |
|2048|    46.188 |

### Collect frequencies of Background Noise

In [28]:
def get_noise(duration=5):
    noise = sd.rec(int(duration * sd.default.samplerate), blocking=True).reshape(-1)
    return noise

def calculate_noise_spectrum(noise=None, duration=5):
    if noise is None:
        noise = sd.rec(int(duration * sd.default.samplerate), blocking=True).reshape(-1)
    _, _, Zxx = stft(noise, fs=sd.default.samplerate, nperseg=512, noverlap=256)
    noise_spectrum = np.mean(np.abs(Zxx), axis=1)
    return noise_spectrum

noise = get_noise()
noise_spectrum = calculate_noise_spectrum(noise)

# Remove Background Noise - Spectral Subtraction Algorithm

In [11]:
def callback(indata, outdata, frames, time, status):
    global noise_spectrum
    if status:
        print(status)
    _, _, Zxx = stft(indata.reshape(-1), fs=sd.default.samplerate, nperseg=512, noverlap=256)
    enhanced = np.maximum(0, np.abs(Zxx) - 5. * noise_spectrum[:, np.newaxis])
    _, out = istft(enhanced, fs=sd.default.samplerate, nperseg=512, noverlap=256)
    outdata[:] = out.reshape(-1,1)
    # outdata[:] = indata

# can try to set latency to low if things don't work
with sd.Stream(callback=callback, latency='low', blocksize=1024):
    sd.sleep(int(5 * 1000))

Exception ignored from cffi callback <function _StreamBase.__init__.<locals>.callback_ptr at 0x168d4b740>:
Traceback (most recent call last):
  File "/Users/scottcronin/.pyenv/versions/3.11.2/envs/noise_reduction/lib/python3.11/site-packages/sounddevice.py", line 886, in callback_ptr
    return _wrap_callback(
           ^^^^^^^^^^^^^^^
  File "/Users/scottcronin/.pyenv/versions/3.11.2/envs/noise_reduction/lib/python3.11/site-packages/sounddevice.py", line 2687, in _wrap_callback
    callback(*args)
  File "/var/folders/38/31kdf54n52s1xyf2vrlp12qh0000gn/T/ipykernel_71893/43209452.py", line 6, in callback
NameError: name 'noise_spectrum' is not defined


input overflow


# Remove Background Noise - NoiseReduce library with active noise reduction

Cannot run this with low latency

In [None]:
def callback(indata, outdata, frames, time, status):
    global noise_spectrum
    if status:
        print(status)
    red = nr.reduce_noise(indata.reshape(-1), sr=sd.default.samplerate, n_fft=512)
    # outdata[:] = indata.reshape(-1,1)
    outdata[:] = indata

# can try to set latency to low if things don't work
with sd.Stream(callback=callback, latency='low', blocksize=2048):
    sd.sleep(int(duration * 1000))

# Using a noise reduction app

In [12]:
print(sd.query_devices())

  0 RTK UHD HDR, Core Audio (0 in, 2 out)
< 1 External Headphones, Core Audio (0 in, 2 out)
  2 MacBook Pro Microphone, Core Audio (1 in, 0 out)
  3 MacBook Pro Speakers, Core Audio (0 in, 2 out)
> 4 krisp microphone, Core Audio (1 in, 0 out)
  5 krisp speaker, Core Audio (0 in, 1 out)


In [15]:
def callback(indata, outdata, frames, time, status):
    outdata[:] = indata

duration = 10.
with sd.Stream(callback=callback, latency='low', blocksize=2048, device=[4,1]):
    sd.sleep(int(duration * 1000))

In [14]:
def callback(indata, outdata, frames, time, status):
    outdata[:] = indata

duration = 10.
with sd.Stream(callback=callback, latency='low', blocksize=2048, device=[2,1]):
    sd.sleep(int(duration * 1000))