In [1]:
!THEANO_FLAGS=optimizer=fast_compile,device=cuda*

In [2]:
import sys
import logging
import optparse
import json
import os
import models
import datasets
import util
import denoise

%load_ext autoreload
%autoreload 2

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters
Using Theano backend.
  return f(*args, **kwds)


In [3]:
def set_system_settings():
    sys.setrecursionlimit(50000)
    logging.getLogger().setLevel(logging.INFO)

set_system_settings()

In [4]:
# Helper functions
def load_config(config_filepath):
    try:
        config_file = open(config_filepath, 'r')
    except IOError:
        logging.error('No readable config file at path: ' + config_filepath)
        exit()
    else:
        with config_file:
            return json.load(config_file)

def get_valid_output_folder_path(outputs_folder_path):
    j = 1
    while True:
        output_folder_name = 'samples_%d' % j
        output_folder_path = os.path.join(outputs_folder_path, output_folder_name)
        if not os.path.isdir(output_folder_path):
            os.mkdir(output_folder_path)
            break
        j += 1
    return output_folder_path

First replicate the inference mode. The example shows the following parameters passed to the program:

THEANO_FLAGS=optimizer=fast_compile,device=gpu 
python main.py 
--mode inference 
--config sessions/001/config.json 
--noisy_input_path data/NSDTSEA/noisy_testset_wav 
--clean_input_path data/NSDTSEA/clean_testset_wav

In [None]:
# Parameters

class Params():
    def __init__(self):
        self.batch_size=None
        self.config='sessions/001/config.json'
        self.mode='inference'
        self.load_checkpoint=None #'sessions/002/checkpoints/checkpoint.00144.hdf5'
        self.condition_value=0
        self.batch_size=None
        self.one_shot=False
        #self.clean_input_path='/home/david/data/CSTR_VCTK_Corpus/clean_trainset_28spk_wav/p226_001.wav'
        #self.noisy_input_path='/home/david/data/CSTR_VCTK_Corpus/noisy_trainset_28spk_wav/p226_001.wav'
        #self.clean_input_path='/home/david/data/CSTR_VCTK_Corpus/clean_testset_wav/p232_104.wav'
        #self.noisy_input_path='/home/david/data/CSTR_VCTK_Corpus/noisy_testset_wav/p232_104.wav'
        #self.clean_input_path='/home/david/data/CSTR_VCTK_Corpus/clean_testset_28spk_wav/p232_001.wav'
        #self.noisy_input_path='/home/david/data/CSTR_VCTK_Corpus/noisy_testset_28spk_wav/p232_001.wav'        
        self.clean_input_path='data/CSTR_VCTK_Corpus/clean_testset_wav/p232_104.wav'
        self.noisy_input_path='data/CSTR_VCTK_Corpus/noisy_testset_wav/p232_104.wav'        
        self.print_model_summary=False
        self.target_field_length=None

params = Params()

In [None]:
# Load config        
config = load_config(params.config)

In [None]:
# Resolve params and config
if params.batch_size is not None:
    batch_size = int(params.batch_size)
else:
    batch_size = config['training']['batch_size']

if params.target_field_length is not None:
    params.target_field_length = int(params.target_field_length)

if not bool(params.one_shot):
    model = models.DenoisingWavenet(config, target_field_length=params.target_field_length,
                                    load_checkpoint=params.load_checkpoint, 
                                    print_model_summary=params.print_model_summary)
    print('Performing inference..')
else:
    print('Performing one-shot inference..')
        

In [None]:
# recursively search session folders to retrieve files for inference
samples_folder_path = os.path.join(config['training']['path'], 'samples')
output_folder_path = get_valid_output_folder_path(samples_folder_path)

#If input_path is a single wav file, then set filenames to single element with wav filename
if params.noisy_input_path.endswith('.wav'):
    filenames = [params.noisy_input_path.rsplit('/', 1)[-1]]
    params.noisy_input_path = params.noisy_input_path.rsplit('/', 1)[0] + '/'
    if params.clean_input_path is not None:
        params.clean_input_path = params.clean_input_path.rsplit('/', 1)[0] + '/'
else:
    if not params.noisy_input_path.endswith('/'):
        params.noisy_input_path += '/'
    filenames = [filename for filename in os.listdir(params.noisy_input_path) if filename.endswith('.wav')]

clean_input = None


In [None]:
print(samples_folder_path)
print(output_folder_path)
print(filenames)

In [None]:
# Perfrom the inference
for filename in filenames:
    noisy_input = util.load_wav(params.noisy_input_path + filename, config['dataset']['sample_rate'])
    if params.clean_input_path is not None:
        if not params.clean_input_path.endswith('/'):
            params.clean_input_path += '/'
        clean_input = util.load_wav(params.clean_input_path + filename, config['dataset']['sample_rate'])

    input = {'noisy': noisy_input, 'clean': clean_input}

    output_filename_prefix = filename[0:-4] + '_'

    if config['model']['condition_encoding'] == 'one_hot':
        condition_input = util.one_hot_encode(int(params.condition_value), 29)[0]
    else:
        condition_input = util.binary_encode(int(params.condition_value), 29)[0]

    if bool(params.one_shot):
        if len(input['noisy']) % 2 == 0:  # If input length is even, remove one sample
            input['noisy'] = input['noisy'][:-1]
            if input['clean'] is not None:
                input['clean'] = input['clean'][:-1]
        model = models.DenoisingWavenet(config, 
                                        load_checkpoint=params.load_checkpoint, 
                                        input_length=len(input['noisy']), 
                                        print_model_summary=params.print_model_summary)

    print("Denoising: ",filename)
    denoise.denoise_sample(model, input, condition_input, batch_size, output_filename_prefix,
                                        config['dataset']['sample_rate'], output_folder_path)


In [None]:
print("receptive field length: ",model.receptive_field_length, " samples, ",model.receptive_field_length*1000/16000, " ms")
print("target field length: ",model.target_field_length, " samples, ",model.target_field_length*1000/16000, " ms")
print("input length: ",model.input_length, " samples, ", model.input_length*1000/16000," ms")
print("target padding: ",model.target_padding, "samples, ")

## Model Training

In [5]:
class Params():
    def __init__(self):
        self.batch_size=4
        self.config='sessions/003/config.json'
        self.mode='training'
        self.load_checkpoint=None
        self.condition_value=0
        self.batch_size=None
        self.one_shot=False        
        self.clean_input_path=None
        self.noisy_input_path=None        
        self.print_model_summary=False
        self.target_field_length=None


In [6]:
# load params and then config (dependency)
params = Params()
config = load_config(params.config)

In [7]:
# Instantiate Model
model = models.DenoisingWavenet(config, 
                                load_checkpoint=params.load_checkpoint, 
                                print_model_summary=params.print_model_summary)

Building new model...


In [8]:
print("receptive field length: ",model.receptive_field_length, " samples, ",model.receptive_field_length*1000/16000, " ms")
print("target field length: ",model.target_field_length, " samples, ",model.target_field_length*1000/16000, " ms")
print("input length: ",model.input_length, " samples, ", model.input_length*1000/16000," ms")
print("target padding: ",model.target_padding, "samples, ")
print("dilations: ", model.dilations)

receptive field length:  3067  samples,  191.6875  ms
target field length:  1601  samples,  100.0625  ms
input length:  4667  samples,  291.6875  ms
target padding:  1 samples, 
dilations:  [1, 2, 4, 8, 16, 32, 64, 128, 256]


In [9]:
def get_dataset(config, model):

    if config['dataset']['type'] == 'vctk+demand':
        return datasets.VCTKAndDEMANDDataset(config, model).load_dataset()
    elif config['dataset']['type'] == 'nsdtsea':
        return datasets.NSDTSEADataset(config, model).load_dataset()

dataset = get_dataset(config, model)
print("in memory percentage: ", dataset.in_memory_percentage)

Loading NSDTSEA dataset...


  return y[keep]


AttributeError: module 'datasets' has no attribute 'in_memory_percentage'

In [None]:
# Perfrom Training
num_train_samples = config['training']['num_train_samples']
batch_size = config['training']['batch_size']
steps_per_epoch = num_train_samples//batch_size
num_test_samples = config['training']['num_test_samples']
validation_steps = num_test_samples//batch_size
train_set_generator = dataset.get_random_batch_generator('train')
test_set_generator = dataset.get_random_batch_generator('test')

model.fit_model(train_set_generator, steps_per_epoch, test_set_generator, validation_steps,
                      config['training']['num_epochs'])

Fitting model with  1000  training samples and  100  test samples...


  initial_epoch=self.epoch_num)
  initial_epoch=self.epoch_num)


Epoch 1/250


  return y[keep]


