In [1]:
import os
import scipy.io.wavfile as sciwav
import numpy as np
import random

# for reproducibility
np.random.seed(1337)
random.seed(1337)

In [2]:
# external custom code I wrote
from load_data import *
from windowing import *
from utility import *
from pesq import *
from noise import *
from consts import *

In [3]:
# number of speech files for train, val, and test
TRAIN_SIZE = 100
VAL_SIZE = 100
TEST_SIZE = 100

In [4]:
[train_paths, val_paths, test_paths], \
[train_waveforms, val_waveforms, test_waveforms], \
[train_procwave, val_procwave, test_procwave], \
[train_wparams, val_wparams, test_wparams], \
[train_windows, val_windows, test_windows] = load_data(TRAIN_SIZE, VAL_SIZE, TEST_SIZE)

0: /home/sri/Desktop/timit/TIMIT/TRAIN/DR1/FSJK1/SI1025.WAV1: /home/sri/Desktop/timit/TIMIT/TRAIN/DR2/MPPC0/SX152.WAV2: /home/sri/Desktop/timit/TIMIT/TRAIN/DR3/FLAC0/SI1339.WAV3: /home/sri/Desktop/timit/TIMIT/TRAIN/DR4/MLBC0/SX339.WAV4: /home/sri/Desktop/timit/TIMIT/TRAIN/DR5/FLOD0/SX117.WAV5: /home/sri/Desktop/timit/TIMIT/TRAIN/DR6/FSDJ0/SA1.WAV6: /home/sri/Desktop/timit/TIMIT/TRAIN/DR7/FCJS0/SA1.WAV7: /home/sri/Desktop/timit/TIMIT/TRAIN/DR8/MCXM0/SI721.WAV8: /home/sri/Desktop/timit/TIMIT/TRAIN/DR1/FDML0/SA1.WAV9: /home/sri/Desktop/timit/TIMIT/TRAIN/DR2/FLMC0/SI1372.WAV10: /home/sri/Desktop/timit/TIMIT/TRAIN/DR3/MAKR0/SX272.WAV11: /home/sri/Desktop/timit/TIMIT/TRAIN/DR4/FJWB1/SI2055.WAV12: /home/sri/Desktop/timit/TIMIT/TRAIN/DR5/FLOD0/SA1.WAV13: /home/sri/Desktop/timit/TIMIT/TRAIN/DR6/FSDJ0/SX305.WAV14: /home/sri/Desktop/timit/TIMIT/TRAIN/DR7/FLEH0/SI2311.WAV15: /home/sri/Desktop/timit/TIMIT/TRAIN/DR8/MKRG0/SX411.WAV16: /home/sri/Desktop/timit/TIMIT/TRAIN/DR1/MMRP0/SX5

In [5]:
def corrupt_waveforms(dataset, wparams, keras_format = True):
    if (not keras_format):
        X = []
        y = []
    else:
        X = [[], []]
        y = []
    
    for tup in noise_types:
        func = tup[0]
        params = tup[1]
        
        print "---", "Corrupting with", func.__name__, "---"
        
        for param in params:
            avg_pesq = 0.0
            avg_l1 = 0.0
            
            for i in xrange(0, len(dataset)):
                waveform = dataset[i]
                corrupted = func(waveform, param)
                
                pesq = run_pesq_windows(waveform, corrupted, wparams[i], wparams[i])
                avg_pesq += pesq
                avg_l1 += (avgErr(waveform, corrupted) * 32768.0)
                               
                # add the corrupted waveform
                if (not keras_format):
                    X.append([waveform, corrupted])
                    y.append(pesq)
                else:
                    for j in xrange(0, waveform.shape[0]):
                        X[0].append(waveform[j])
                        X[1].append(corrupted[j])
                        y.append(pesq)
                
            avg_pesq /= len(dataset)
            avg_l1 /= len(dataset)
            print "Avg. PESQ / L1 for param", param, ":", avg_pesq, avg_l1
        
        print ""
        
    if (keras_format):
        X = np.array(X)
        y = np.array(y)
    
    return X, y

In [6]:
print "!!! TRAIN !!!"
train_X, train_y = corrupt_waveforms(train_windows, train_wparams)
np.save('./pesq_dataset/train_X.npy', train_X)
np.save('./pesq_dataset/train_y.npy', train_y)
print ""

print "!!! VAL !!!"
val_X, val_y = corrupt_waveforms(val_windows, val_wparams)
np.save('./pesq_dataset/val_X.npy', val_X)
np.save('./pesq_dataset/val_y.npy', val_y)
print ""

print "!!! TEST !!!"
test_X, test_y = corrupt_waveforms(test_windows, test_wparams)
np.save('./pesq_dataset/test_X.npy', test_X)
np.save('./pesq_dataset/test_y.npy', test_y)
print ""

!!! TRAIN !!!
--- Corrupting with identity ---
Avg. PESQ / L1 for param None : 4.644 0.0

--- Corrupting with additive_noise ---
Avg. PESQ / L1 for param 0.0009765625 : 4.45991 15.9981290722
Avg. PESQ / L1 for param 0.00390625 : 3.44371 63.9812351227
Avg. PESQ / L1 for param 0.015625 : 1.95306 255.885167084

--- Corrupting with mult_noise ---
Avg. PESQ / L1 for param 0.015625 : 4.32926 21.9295038557
Avg. PESQ / L1 for param 0.0625 : 2.93722 87.668964653
Avg. PESQ / L1 for param 0.125 : 2.0655 175.064230881

--- Corrupting with high_freq_additive_noise ---
Avg. PESQ / L1 for param 0.001953125 : 4.26096 20.8718095589
Avg. PESQ / L1 for param 0.0078125 : 2.9279 83.6501049805
Avg. PESQ / L1 for param 0.03125 : 1.69413 332.707108459

--- Corrupting with low_freq_additive_noise ---
Avg. PESQ / L1 for param 0.001953125 : 4.23907 20.8307619667
Avg. PESQ / L1 for param 0.0078125 : 3.23888 83.3919275665
Avg. PESQ / L1 for param 0.03125 : 1.89552 334.146542358


!!! VAL !!!
--- Corrupting with id

In [7]:
print train_X.shape
print val_X.shape

(2, 129597, 512)
(2, 135616, 512)
