In [39]:
import os
import scipy.io.wavfile as sciwav
import numpy as np
import random
from scipy.fftpack import dct, idct
from numpy.fft import fft, ifft

import matplotlib
import matplotlib.pyplot as plt

# for reproducibility
np.random.seed(1337)
random.seed(1337)

In [40]:
# external custom code I wrote
from load_data import *
from windowing import *
from utility import *
from pesq import *
from consts import *

In [41]:
# number of speech files for train, val, and test
TRAIN_SIZE = 50
VAL_SIZE = 50
TEST_SIZE = 50

In [42]:
[train_paths, val_paths, test_paths], \
[train_waveforms, val_waveforms, test_waveforms], \
[train_procwave, val_procwave, test_procwave], \
[train_wparams, val_wparams, test_wparams], \
[train_windows, val_windows, test_windows] = load_data(TRAIN_SIZE, VAL_SIZE, TEST_SIZE)

0: /home/sri/Desktop/timit/TIMIT/TRAIN/DR1/FSJK1/SI1025.WAV1: /home/sri/Desktop/timit/TIMIT/TRAIN/DR2/MPPC0/SX152.WAV2: /home/sri/Desktop/timit/TIMIT/TRAIN/DR3/FLAC0/SI1339.WAV3: /home/sri/Desktop/timit/TIMIT/TRAIN/DR4/MLBC0/SX339.WAV4: /home/sri/Desktop/timit/TIMIT/TRAIN/DR5/FLOD0/SX117.WAV5: /home/sri/Desktop/timit/TIMIT/TRAIN/DR6/FSDJ0/SA1.WAV6: /home/sri/Desktop/timit/TIMIT/TRAIN/DR7/FCJS0/SA1.WAV7: /home/sri/Desktop/timit/TIMIT/TRAIN/DR8/MCXM0/SI721.WAV8: /home/sri/Desktop/timit/TIMIT/TRAIN/DR1/FDML0/SA1.WAV9: /home/sri/Desktop/timit/TIMIT/TRAIN/DR2/FLMC0/SI1372.WAV10: /home/sri/Desktop/timit/TIMIT/TRAIN/DR3/MAKR0/SX272.WAV11: /home/sri/Desktop/timit/TIMIT/TRAIN/DR4/FJWB1/SI2055.WAV12: /home/sri/Desktop/timit/TIMIT/TRAIN/DR5/FLOD0/SA1.WAV13: /home/sri/Desktop/timit/TIMIT/TRAIN/DR6/FSDJ0/SX305.WAV14: /home/sri/Desktop/timit/TIMIT/TRAIN/DR7/FLEH0/SI2311.WAV15: /home/sri/Desktop/timit/TIMIT/TRAIN/DR8/MKRG0/SX411.WAV16: /home/sri/Desktop/timit/TIMIT/TRAIN/DR1/MMRP0/SX5

In [43]:
# different types of noise
def identity(window, param):
    return window

def additive_noise(window, param):
    corrupted = np.copy(window)
    corrupted += np.random.uniform(-param, param, corrupted.shape)
    corrupted = np.clip(corrupted, -1.0, 1.0)
    return corrupted

def mult_noise(window, param):
    corrupted = np.copy(window)
    corrupted *= np.random.normal(1.0, param, corrupted.shape)
    corrupted = np.clip(corrupted, -1.0, 1.0)
    return corrupted

def freq_additive_noise(window, param):
    corrupted = np.copy(window)
    corrupted = dct(corrupted, norm = 'ortho')
    corrupted += np.random.uniform(-param, param, corrupted.shape)
    corrupted = idct(corrupted, norm = 'ortho')
    corrupted = np.clip(corrupted, -1.0, 1.0)
    return corrupted
    
def high_freq_additive_noise(window, param):
    crange = WINDOW_SIZE / 2
    
    corrupted = np.copy(window)
    corrupted = dct(corrupted, norm = 'ortho')
    corrupted[:, crange:] += np.random.uniform(-param, param, (crange,))
    corrupted = idct(corrupted, norm = 'ortho')
    corrupted = np.clip(corrupted, -1.0, 1.0)
    return corrupted

def low_freq_additive_noise(window, param):
    crange = WINDOW_SIZE / 2
    
    corrupted = np.copy(window)
    corrupted = dct(corrupted, norm = 'ortho')
    corrupted[:, :crange] += np.random.uniform(-param, param, (crange,))
    corrupted = idct(corrupted, norm = 'ortho')
    corrupted = np.clip(corrupted, -1.0, 1.0)
    return corrupted

def freq_mult_noise(window, param):
    corrupted = np.copy(window)
    corrupted = fft(corrupted)
    corrupted *= np.random.normal(1.0, param, corrupted.shape)
    corrupted = np.real(ifft(corrupted))
    corrupted = np.clip(corrupted, -1.0, 1.0)
    return corrupted

def low_freq_mult_noise(window, param):
    crange = WINDOW_SIZE / 2
    
    corrupted = np.copy(window)
    corrupted = fft(corrupted)
    corrupted[:, :crange] *= np.random.normal(1.0, param, (crange,))
    corrupted = np.real(ifft(corrupted))
    corrupted = np.clip(corrupted, -1.0, 1.0)
    return corrupted

def high_freq_mult_noise(window, param):
    crange = WINDOW_SIZE / 2
    
    corrupted = np.copy(window)
    corrupted = fft(corrupted)
    corrupted[:, crange:] *= np.random.normal(1.0, param, (crange,))
    corrupted = np.real(ifft(corrupted))
    corrupted = np.clip(corrupted, -1.0, 1.0)
    return corrupted

# list of noise functions, and parameters for each
noise_types = [
               (identity,
                   [None]),
               (additive_noise,
                   [1.0 / 1024, 1.0 / 256, 1.0 / 64]),
               (freq_additive_noise,
                   [1.0 / 512, 1.0 / 128, 1.0 / 32]),
               (high_freq_additive_noise,
                   [1.0 / 512, 1.0 / 128, 1.0 / 32]),
               (low_freq_additive_noise,
                   [1.0 / 512, 1.0 / 128, 1.0 / 32]),
               (mult_noise,
                   [1.0 / 64, 1.0 / 16, 1.0 / 8]),
               (freq_mult_noise,
                   [1.0 / 8, 1.0 / 4, 1.0]),
               (high_freq_mult_noise,
                   [1.0 / 4, 1.0 / 2, 1.0]),
               (low_freq_mult_noise,
                   [1.0 / 4, 1.0 / 2, 1.0])
              ]

In [44]:
waveform = train_windows[0]
corrupted = low_freq_mult_noise(waveform, 1.0 / 4)

print run_pesq_windows(waveform, corrupted, train_wparams[0], train_wparams[0])

clean = reconstruct_from_windows(waveform, OVERLAP_SIZE, OVERLAP_FUNC)
clean = unpreprocess_waveform(clean, train_wparams[0])
clean = np.clip(clean, -32767, 32767)

dirty = reconstruct_from_windows(corrupted, OVERLAP_SIZE, OVERLAP_FUNC)
dirty = unpreprocess_waveform(dirty, train_wparams[0])
dirty = np.clip(dirty, -32767, 32767)

sciwav.write("./pesq_clean.wav", SAMPLE_RATE, clean.astype(np.int16))
sciwav.write("./pesq_dirty.wav", SAMPLE_RATE, dirty.astype(np.int16))

3.875


In [45]:
def corrupt_waveforms(dataset, wparams, keras_format = True):
    if (not keras_format):
        X = []
        y = []
    else:
        X = [[], []]
        y = []
    
    for tup in noise_types:
        func = tup[0]
        params = tup[1]
        
        print "---", "Corrupting with", func.__name__, "---"
        
        for param in params:
            avg_pesq = 0.0
            avg_l1 = 0.0
            
            for i in xrange(0, len(dataset)):
                waveform = dataset[i]
                corrupted = func(waveform, param)
                
                pesq = run_pesq_windows(waveform, corrupted, wparams[i], wparams[i])
                avg_pesq += pesq
                avg_l1 += (avgErr(waveform, corrupted) * 32768.0)
                               
                # add the corrupted waveform
                if (not keras_format):
                    X.append([waveform, corrupted])
                    y.append(pesq)
                else:
                    for j in xrange(0, waveform.shape[0]):
                        X[0].append(waveform[j])
                        X[1].append(corrupted[j])
                        y.append(pesq)
                
            avg_pesq /= len(dataset)
            avg_l1 /= len(dataset)
            print "Avg. PESQ / L1 for param", param, ":", avg_pesq, avg_l1
        
        print ""
        
    X = np.array(X)
    y = np.array(y)
    
    return X, y

In [46]:
print "!!! TRAIN !!!"
train_X, train_y = corrupt_waveforms(train_windows, train_wparams)
np.save('./pesq_dataset/train_X.npy', train_X)
np.save('./pesq_dataset/train_y.npy', train_y)
print ""

print "!!! VAL !!!"
val_X, val_y = corrupt_waveforms(val_windows, val_wparams)
np.save('./pesq_dataset/val_X.npy', val_X)
np.save('./pesq_dataset/val_y.npy', val_y)
print ""

print "!!! TEST !!!"
test_X, test_y = corrupt_waveforms(test_windows, test_wparams)
np.save('./pesq_dataset/test_X.npy', test_X)
np.save('./pesq_dataset/test_y.npy', test_y)
print ""

!!! TRAIN !!!
--- Corrupting with identity ---
Avg. PESQ / L1 for param None : 4.644 0.0

--- Corrupting with additive_noise ---
Avg. PESQ / L1 for param 0.0009765625 : 4.45274 16.0015501595
Avg. PESQ / L1 for param 0.00390625 : 3.47538 63.9846630859
Avg. PESQ / L1 for param 0.015625 : 2.00672 255.904379272

--- Corrupting with freq_additive_noise ---
Avg. PESQ / L1 for param 0.001953125 : 4.07186 29.4797626495
Avg. PESQ / L1 for param 0.0078125 : 2.7266 117.883444672
Avg. PESQ / L1 for param 0.03125 : 1.47004 471.83914856

--- Corrupting with high_freq_additive_noise ---
Avg. PESQ / L1 for param 0.001953125 : 4.26224 20.8383267593
Avg. PESQ / L1 for param 0.0078125 : 3.0001 83.3107820129
Avg. PESQ / L1 for param 0.03125 : 1.73766 330.804644775

--- Corrupting with low_freq_additive_noise ---
Avg. PESQ / L1 for param 0.001953125 : 4.22702 20.9196828079
Avg. PESQ / L1 for param 0.0078125 : 3.26016 83.4740370178
Avg. PESQ / L1 for param 0.03125 : 1.94638 334.136337891

--- Corrupting wit