In [60]:
# python vocalsynthesis 
# python vocalsynthesis 20

import sys
import matplotlib.pyplot as plt
import scipy.io.wavfile as wave
import cPickle as pickle
import numpy as np
import theano
import theano.tensor as tensor

from blocks.model import Model
from blocks.bricks import Linear, Tanh, Logistic #@list: changed from Sigmoid
from blocks.bricks.cost import SquaredError
from blocks.initialization import IsotropicGaussian, Constant
from fuel.datasets import IterableDataset
from fuel.streams import DataStream
from fuel.transformers import Transformer
from blocks.algorithms import (GradientDescent, Scale, StepClipping, CompositeRule)
from blocks.extensions.monitoring import TrainingDataMonitoring
from blocks.main_loop import MainLoop
from blocks.extensions import FinishAfter, Printing
from blocks.bricks.recurrent import LSTM
from blocks.graph import ComputationGraph


# =========================================
# SETTING UP DATA
# =========================================

# ==============
# things that should be argparsed maybe
# ==============
datafile = 'song.wav'
train_test_val_split = [80,10,10]
data_start = 2000
data_end = -150000
window_shift = 1000
x_length = 8000
seq_length = 25
batch_size = 100
truncated_BPTT_length = 100
secs_to_generate = 60


In [61]:

# =============
# load data and cut it up
# =============
print '\nLoading data...\n'

data = wave.read(datafile)
sec = data[0]
minute = sec*60
raw = data[1]
track = raw[data_start:data_end]
num_frames = len(track)
length_in_seconds = num_frames/sec
length_in_minutes = length_in_seconds/60
length_in_hours = length_in_minutes/60

print datafile
print str(sec) + ' fps, '+ str(num_frames) + ' frames'
print str(length_in_seconds) +' seconds (' + str(length_in_minutes) + ' minutes)'

print '\n\nPreprocessing...\n'

test_start = int(num_frames*(train_test_val_split[0]/100.0))
val_start = int(num_frames*(train_test_val_split[1]/100.0)+ test_start)

train_data = np.asarray(raw[0:test_start], dtype="float32")
test_data = np.asarray(raw[test_start:val_start], dtype="float32")
val_data = np.asarray(raw[val_start: ], dtype="float32")

print 'cut up in ' + ':'.join([str(x) for x in train_test_val_split]) +' train:test:val split'



Loading data...

song.wav
16000 fps, 174077537 frames
10879 seconds (181 minutes)


Preprocessing...

cut up in 80:10:10 train:test:val split


In [62]:
# =============
# subtract mean and normalize 
# =============
train_mean = np.mean(train_data)
train_min = np.min(train_data)
train_max = np.max(train_data)
train_range = train_max-train_min

test_mean = np.mean(test_data)
test_min = np.min(test_data)
test_max = np.max(test_data)
if test_min > train_min:
    test_min = train_min
    print "test min greater than train min; using train min to normalize"
if test_max < train_max:
    test_max = train_max
    print "test max less than train max; using train max to normalize"
test_range = test_max-test_min

val_mean = np.mean(val_data)
val_min = np.min(val_data)
val_max = np.max(val_data)
val_range = val_max-val_min
if val_min > train_min:
    val_min = train_min
    print "val min greater than train min; using train min to normalize"
if val_max < train_max:
    val_max = train_max
    print "val max less than train max; using train max to normalize"
val_range = val_max-val_min

#print '\nTrain'
#print train_mean 
#print train_min  
#print train_max
#print train_range

#print '\nTest'
#print test_mean
#print test_min 
#print test_max 
#print test_range

#print '\nVal'
#print val_mean
#print val_min 
#print val_max
#print val_range

#http://stackoverflow.com/questions/5294955/how-to-scale-down-a-range-of-numbers-with-a-known-min-and-max-value
def rescale(unscaled_x, min_allowed, max_allowed, data_min, data_max):
    return (max_allowed-min_allowed)*(unscaled_x-data_min)/(data_max-data_min) + min_allowed

processed_train_data = rescale(train_data, -1,1, train_min, train_max)
processed_val_data = rescale(val_data, -1,1, train_min, train_max)
processed_test_data = rescale(test_data, -1,1, train_min, train_max)

print "subtracted mean and normalized data to [-1,1]"

test min greater than train min; using train min to normalize
test max less than train max; using train max to normalize
val min greater than train min; using train min to normalize
val max less than train max; using train max to normalize
subtracted mean and normalized data to [-1,1]


In [68]:
train_test_val = [processed_train_data, processed_test_data, processed_val_data]
# =============
# make examples by taking overlapping slices of data
# =============

print '\n\nSlicing into examples...\n'
full_dataset = []
for what_dataset,dataset in enumerate(train_test_val):
    if what_dataset == 0:
        wha = 'train'
    elif what_dataset == 1:
        wha = 'test'
    elif what_dataset == 2:
        wha = 'val'
    else:
        print 'well something went wrong'
        break;
    print 'slicing '+ wha
    i = 0
    examples = []
    current_x = []
    current_seq = []
    while not i*x_length >= len(dataset)-(x_length + seq_length*x_length):
        current_x = np.asarray(dataset[i*x_length : (i+1)*x_length])
        current_seq.append(current_x)
        if len(current_seq) == seq_length:
            examples.append([current_x, current_seq])
            current_seq = []
        i+=1
    full_dataset.append(examples)
    print 'examples generated'

with open('dataset.pkl', "wb") as f:
    pickle.dump(full_dataset, f, pickle.HIGHEST_PROTOCOL )
    print 'pickle dumped'



Slicing into examples...

slicing train
examples generated
slicing test
examples generated
slicing val
examples generated
pickle dumped


In [55]:
# =============
# make theano tensors and cut into minibatches
# =============
full_dataset.side
train = theano.tensor()
print "made batches, wished for cookies"

dataset = [processed_train_data, processed_test_data, processed_val_data]


Loading data...

song.wav
16000 fps, 174229537 frames
10889 seconds (181 minutes)


Preprocessing...

cut up in 80:10:10 train:test:val split
test min greater than train min; using train min to normalize
test max less than train max; using train max to normalize
val min greater than train min; using train min to normalize
val max less than train max; using train max to normalize
subtracted mean and normalized data to [-1,1]


In [49]:
def scaleBetween(unscaledNum, minAllowed, maxAllowed, min, max):
    return (maxAllowed-minAllowed)*(unscaledNum-min)/(max - min) + minAllowed

In [51]:
scaleBetween(59.0,-1,1,0,1000)

-0.882

In [6]:

print '\n\nSetting up network...'

print '\nHyperparameters:'

n_epochs = 10
print "    Epochs: "+ str(n_epochs)


# T x batches x F


25.000000 seconds


In [None]:
test_start = 0.9
valid_start = 0.8

#load data
track = wave.read('song.wav')
raw_data = track[1]

num_frames = len(raw)
data = np.asarray(data, dtype="float32")

# if x_length == 0.5, then
# x_length*fs = 16000*0.5 = 8000
# so an x^i represents 0.5 of a second
x_length = float(sys.argv[1])
print "each x will represent %f samples (%f seconds)" % (x_length*fs, x_length)
# a sequence is x_length*fs*seq_length long
seq_length = int(sys.argv[2])
print "each sequence will represent %f seconds" % (x_length*seq_length)

how_many_seconds = int(sys.argv[3]) # 60*20 = 20 minutes

if how_many_seconds > 0:
    sys.stderr.write("truncating data to %i seconds\n" % how_many_seconds)
    data = data[0: how_many_seconds*fs]

# e.g. 0 -> 0.8
train_data = data[ 0 : len(data)*valid_start ]
# e.g. 0.8 -> 0.9 
valid_data = data[ len(data)*valid_start : len(data)*test_start ]
# e.g. 0.9 ::
test_data = data[ len(data)*test_start :: ]

mean_ = np.mean(data)
min_ = np.min(data)
max_ = np.max(data)

print "min and max calculated: %i, %i" % (min_, max_)

train_data = (train_data - mean_) / (max_ - min_)
valid_data = (valid_data - mean_) / (max_ - min_)
test_data = (test_data - mean_) / (max_ - min_)

dd = [train_data, valid_data, test_data]

for i in range(0, len(dd)):
    b = 0
    x_size = int(x_length*fs)
    batches = []
    seq = []
    while True:
        if b*x_size >= dd[i].shape[0]:
            break
        this_x = dd[i][b*x_size : (b+1)*x_size]
        seq.append(this_x)
        if len(seq) == seq_length:
            batches.append(seq)
            seq = []
        b += 1
    dd[i] = np.asarray(batches, dtype="float32")
    print "the shape of this array: %s" % (str(dd[i].shape))

with open(sys.argv[4], "wb") as f:
    pickle.dump( (dd, min_, max_), f, pickle.HIGHEST_PROTOCOL )