In [1]:
import tensorflow as tf
import wave, os, glob
from IPython.display import display, Audio
from scipy.io.wavfile import read
import numpy as np

from utils import *

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [2]:
AUDIO_LENGTH = 16384*2
PATH_DATA = './test_data/'

# Creating a dataset to train on

In [3]:
# Choosing the 3 tokens
water = normalize(np.array(read(PATH_DATA + 'water_40.wav')[1]))
greasy = normalize(np.array(read(PATH_DATA + 'greasy_2.wav')[1]))
suit = normalize(np.array(read(PATH_DATA + 'suit_16.wav')[1]))
tokens = [water, greasy, suit]
for token in tokens:
    play(token)

In [4]:
# Generates 1 second long audio samples from water, padded randomly
water_pad = pad_audio_random(audio = water, desired_length = AUDIO_LENGTH//2)
play(water_pad)
print('audio starts at index: ', np.nonzero(water_pad)[0][0])

audio starts at index:  4111


The dataset will consist of various 2 second samples of: 

'water', 'greasy', 'suit' by themselves  
'water' then 'greasy' together  
'greasy' then 'water' together  
'water' then 'suit' together  
'suit' then 'water' together  

Crucially, the network will never see 'greasy' and 'suit' together in any order. The goal is to test whether or not the ciwGAN architecture will learn to concatenate two words that it has never seen concatented together before, by manipulating the learned categorical variables in same way (addition for example).

In [5]:
train_dict = {
    'train_data_water': generate_data(water, AUDIO_LENGTH, 100),
    'train_data_greasy': generate_data(greasy, AUDIO_LENGTH, 100),
    'train_data_suit': generate_data(suit, AUDIO_LENGTH, 100),
    'train_data_water_greasy': generate_data((water, greasy), AUDIO_LENGTH, 100),
    'train_data_greasy_water': generate_data((greasy, water), AUDIO_LENGTH, 100),
    'train_data_water_suit': generate_data((water, suit), AUDIO_LENGTH, 100),
    'train_data_suit_water': generate_data((suit,water), AUDIO_LENGTH, 100)
}

In [6]:
for key in train_dict.keys():
    print(key)
    for sample in train_dict[key][0:10]:
        play(sample)

train_data_water


train_data_greasy


train_data_suit


train_data_water_greasy


train_data_greasy_water


train_data_water_suit


train_data_suit_water
