In [None]:
# automatically reload solution_augmentation if it is altered and used in the
# notebook
%load_ext autoreload
%autoreload 2

%matplotlib widget


import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
import sounddevice as sd

from lib_augmentation import *

data_path = os.path.expandvars('C:/Users/$USERNAME/Desktop/data/06_data_augmentation/')

path_clean_speech = os.path.join(data_path, 'test/clean/')
path_rirs = os.path.join(data_path, 'test/rir/')

clean_speech, _ = librosa.load(os.path.join(path_clean_speech, 'p227_024.wav'), sr=16E3)
clean_speech -= np.mean(clean_speech)

noise1, _ = librosa.load(os.path.join(data_path, 'test/noise/1-137-A-32.wav'), sr=16E3)
noise2, _ = librosa.load(os.path.join(data_path, 'test/noise/1-17092-A-27.wav'), sr=16E3)
noise = np.concatenate((noise1, noise2))
noise -= np.mean(noise)
noise = noise[:len(clean_speech)]

rir1, _ = librosa.load(os.path.join(path_rirs, '1221.wav'), sr=16E3)
rir2, _ = librosa.load(os.path.join(path_rirs, '1201.wav'), sr=16E3)

## 6.1 Voice Activity Detection

##### Plot Histogram of Log Power

In [None]:
# YOUR CODE HERE

##### Fit GMM and Show Mean & Variances

In [None]:
# YOUR CODE HERE

##### Plot Gaussians in Histogram

In [None]:
def gaussian(x, m, cov):
    """ Return probability for value(s) *x* of gaussian / normal distribution 
        with mean *m* and standard deviation *cov*
    """
    std = np.sqrt(cov)
    y = np.exp(-((x - m) / std)**2 / 2) / np.sqrt(2*np.pi) / std
    return y

# YOUR CODE HERE

##### Test Function `vad_extraction`

In [None]:
vad = vad_extraction(clean_speech)

_, axis= plt.subplots(1)

axis.plot(clean_speech)

xs = np.arange(len(vad)) * 160
axis.plot(xs, vad * np.max(np.abs(vad)))

## 6.2 Test 'Mix Noise and Speech'

In [None]:
clean_speech_changed, noisy_speech = mix(clean_speech, noise, 5, [rir1, rir2])

_, axis = plt.subplots(1)
axis.plot(noisy_speech)
axis.plot(clean_speech_changed)

In [None]:
sr = 16000
sd.play(np.concatenate((clean_speech, noisy_speech)), samplerate=sr)

## 6.3 Test 'Feature Extraction'

In [None]:
features = feature_extraction(noisy_speech)
print(features.shape, '\n\n', features)

## 6.4 Test 'Generators'

In [None]:
# test init, i.e. creating generator
gen_speech = GenSpeech(path_clean_speech)

# test __next__() two times
clean_speech_1 = next(gen_speech)
clean_speech_2 = next(gen_speech)

# plot retrieved example speech files
_, axes = plt.subplots(1,2)
axes[0].plot(clean_speech_1)
axes[1].plot(clean_speech_2)

# test __len__()
print(len(gen_speech))

# test if the generator runs until completion & stops
for speech in gen_speech:
    pass

In [None]:
# test init, i.e. creating generator
gen_rirs = create_gen_rir(path_rirs)

# test __next__() two times
rir1 = next(gen_rirs)
rir2 = next(gen_rirs)

# plot retrieved example rirs
_, axes = plt.subplots(1,2)
axes[0].plot(rir1)
axes[1].plot(rir2)

## 6.5 Run Augmentation

In [None]:
for split in ['train', 'val', 'test']:
    path = os.path.join(data_path, split)
    run_augmentation(path, debug=True)