In [1]:
import math
import pywt
import inspect
import numpy as np
from keras.preprocessing import sequence
from keras.optimizers import RMSprop
from keras.models import Sequential, Model
from keras.layers import Dense, Embedding, LSTM, Activation, Input, Dropout, Flatten
from keras.layers import Convolution2D, MaxPooling2D
# from keras.layers import LSTM
import scipy.io.wavfile as wavfile
from scipy.interpolate import interp1d

Using TensorFlow backend.


# PRE-PROCESSING

In [2]:
# input_rate, input_signal = wavfile.read('data/1clean_Selection.wav')
# output_rate, output_signal = wavfile.read('data/1Selection.wav')
# input_rate, input_signal = wavfile.read('data/beatzzz/Clean Beat 01.wav')
# output_rate, output_signal = wavfile.read('data/beatzzz/Processed Beat 03.wav')
input_rate, input_signal = wavfile.read('data/beat2/Clean Beat 02.wav')
output_rate, output_signal = wavfile.read('data/beat2/Processed Beat 02_02.wav')

## Testing out pywt functions and reconstruction

In [3]:
# some settings:
wavetype = 'db10'
# wavelevel = 15

print("input rate: " + str(input_rate))
sample_length = input_rate//200
print("for 5 ms samples, input sample length is: " + str(sample_length))

w = pywt.Wavelet(wavetype)
wavelevel = pywt.dwt_max_level(data_len=sample_length, filter_len=w.dec_len)

# set a little lower:
# wavelevel = 1
print("Max wave level decomposition: " + str(wavelevel))

# these are floats, original is int16
input_coeffs1 = pywt.wavedec(input_signal[:,0].T, wavetype, level=wavelevel)
input_coeffs2 = pywt.wavedec(input_signal[:,1].T, wavetype, level=wavelevel)
# output_coeffs1 = pywt.wavedec(output_signal[:,0].T, wavetype, level=wavelevel)
# output_coeffs2 = pywt.wavedec(output_signal[:,1].T, wavetype, level=wavelevel)
print("input signal shape: " + str(input_signal.shape))

# reconstruction for left and right channel
recons1 = np.array([pywt.waverec(input_coeffs1, wavetype)]).astype('int16')
recons2 = np.array([pywt.waverec(input_coeffs2, wavetype)]).astype('int16')

# print(recons1.shape)
print("coefficients shapes: " + str(len(input_coeffs1)) + ", " + str([len(j) for j in input_coeffs1]))
print("coefficients type: " + str(type(input_coeffs1[0])))

write_array = np.concatenate((recons1,recons2),axis=0).T
print('reconstruction shape: ' + str(write_array.shape))

# output wav for auditory test
wavfile.write('output/recons.wav', input_rate, write_array)

# get available wavelets
print(pywt.wavelist())

input rate: 44100
for 5 ms samples, input sample length is: 220
Max wave level decomposition: 3
input signal shape: (2419200, 2)
coefficients shapes: 4, [302416, 302416, 604814, 1209609]
coefficients type: <type 'numpy.ndarray'>
reconstruction shape: (2419200, 2)
['bior1.1', 'bior1.3', 'bior1.5', 'bior2.2', 'bior2.4', 'bior2.6', 'bior2.8', 'bior3.1', 'bior3.3', 'bior3.5', 'bior3.7', 'bior3.9', 'bior4.4', 'bior5.5', 'bior6.8', 'cgau1', 'cgau2', 'cgau3', 'cgau4', 'cgau5', 'cgau6', 'cgau7', 'cgau8', 'cmor', 'coif1', 'coif2', 'coif3', 'coif4', 'coif5', 'coif6', 'coif7', 'coif8', 'coif9', 'coif10', 'coif11', 'coif12', 'coif13', 'coif14', 'coif15', 'coif16', 'coif17', 'db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8', 'db9', 'db10', 'db11', 'db12', 'db13', 'db14', 'db15', 'db16', 'db17', 'db18', 'db19', 'db20', 'db21', 'db22', 'db23', 'db24', 'db25', 'db26', 'db27', 'db28', 'db29', 'db30', 'db31', 'db32', 'db33', 'db34', 'db35', 'db36', 'db37', 'db38', 'dmey', 'fbsp', 'gaus1', 'gaus2', 

In [4]:
print(input_signal[:10,0].T)
print(recons1[0,:10])

[ 0 -1 -1 -2 -2 -3 -2 -2 -1 -1]
[ 0  0 -1 -2 -2 -3 -2 -2 -1 -1]


## prepare data for keras

In [5]:
# functions for pre-processing and reconstruction of training vectors
# first function is over a batch - need smaller size

In [6]:
# DEPRECATED
def wavelet_to_vector_all(input_raw, output_raw, chunk_size, wavelet_level, wavelet_type):
    current_set = 'input'
    
    w = pywt.Wavelet(wavelet_type)
    max_level = pywt.dwt_max_level(data_len=chunk_size, filter_len=w.dec_len)
    if wavelet_level > max_level:
        print('wavelet level too high. set to max level: ' + str(max_level))
        wavelet_level = max_level
    
    # short hacky loop
    while True:
        # select the correct set
        if current_set == 'input':
            data = input_raw
            amount_of_chunks = int(math.floor(data.shape[0]/chunk_size))
            input_list = []
            print('Amount of samples: ' + str(amount_of_chunks))
        else:
            data = output_raw
            amount_of_chunks = int(math.floor(data.shape[0]/chunk_size))
            output_list = []
        
        index_range = (np.arange(amount_of_chunks) * chunk_size)
        indp = chunk_size - 1
        # for all chunks do this:
        for ind in index_range:
#             print(ind)
            sample1 = data[ind:ind+indp,0].T
            sample2 = data[ind:ind+indp,1].T
            coeffs1 = pywt.wavedec(sample1, wavelet_type, level=wavelet_level)
            coeffs2 = pywt.wavedec(sample2, wavelet_type, level=wavelet_level)
            unfolded1 = np.array([item for sublist in coeffs1 for item in sublist])
            unfolded2 = np.array([item for sublist in coeffs2 for item in sublist])
            # POSSIBLE MISTAKE: puts l/r in one big sequence
            # run on mono first
            vector = np.concatenate((unfolded1,unfolded2),axis=0)
            
            if current_set == 'input':
                input_list.append(vector)
            else:
                output_list.append(vector)
#             unf_arr = np.array(unfolded1)
#             print(unf_arr.shape)
#             print(len(unfolded))
            
            # for all coeff levels:
#             for i in range(len(coeffs1)):
#                 print(i)
            
        
        
        if current_set == 'output':
            break
        current_set = 'output'

    # convert lists to arrays
    input_arr = np.array(input_list)
    output_arr = np.array(output_list)
    
    # print stats for converting back to audio
    wavelet_lengths = [len(j) for j in coeffs1]
    ind = 0
    index_list = [0]
    for w_l in wavelet_lengths:
        ind += w_l
        index_list.append(ind)
    print("length of different wavelet bands: " + str(wavelet_lengths))
    print("indexes: " + str(index_list))
    print("more sizes? " + str([len(sublist) for sublist in coeffs1]))
    # return level sizes for reconstruction
    level_sizes = []
    for cf in coeffs1:
        level_sizes.append(cf.shape[0])
    return [input_arr, output_arr, level_sizes]
# input_coeffs1[16].shape

# rows=samples, cols=dim
# [input_matrix, output_matrix, level_sizes] = wavelet_to_vector_all(input_signal, output_signal, sample_length, wavelevel, 'db4')
# print(input_matrix.shape)
# print(level_sizes)

In [7]:
# some info
wav_lengths = [44, 44, 69, 119]
wav_indices = [0]
for length in wav_lengths:
    wav_indices.append(wav_indices[-1]+length)
print("indices where vector should be sliced: " + str(wav_indices))

indices where vector should be sliced: [0, 44, 88, 157, 276]


In [8]:
# for a single audio slice (mono)
def audio_to_vector(input_mono, wavelet_level, wavelet_type):
    coeffs = pywt.wavedec(input_mono, wavelet_type, level=wavelet_level)
#     print([len(co) for co in coeffs])
#     vector = np.array([item for sublist in coeffs for item in sublist])
#     vector = np.array([])
    vector = np.concatenate(coeffs)
#     for band in coeffs:
#         vector.append
    return vector

In [9]:
# for a single output vector (mono)
def vector_to_list(input_vector, indexes=wav_indices):
    # unflatten the vector
#     for i in range(len(indexes)-1):
#         wavelet_list.append(input_vector[indexes[i]:indexes[i+1]])
    # use np.split
    wavelet_list = np.split(input_vector, indexes[1:-1])
    return wavelet_list
#     wavelet_list = [input_vector[]]

In [10]:
# create new (mono) data set
# for both channels, we just handle mono samples:
mono_input = []
mono_output = []
for side in (0,1):
    total_raw_in = input_signal[:,side]
    total_raw_out = output_signal[:,side]
    number_of_chunks = total_raw_in.shape[0]//sample_length
#     print(number_of_chunks)
    for ind in range(number_of_chunks):
        start_ind = ind*sample_length
        end_ind = ind*sample_length + sample_length
        curr_in_sample = total_raw_in[start_ind:end_ind]
        curr_out_sample = total_raw_out[start_ind:end_ind]
        in_vec = audio_to_vector(curr_in_sample, wavelevel, wavetype)
        out_vec = audio_to_vector(curr_out_sample, wavelevel, wavetype)
        mono_input.append(in_vec)
        mono_output.append(out_vec)
#     print(curr_in_sample.shape)
vec_length = in_vec.shape[0]
print("vector length: " +str(vec_length))
mono_input = np.array(mono_input)
mono_output = np.array(mono_output)

print(np.max(mono_input))
print(np.min(mono_input))
print(np.max(mono_output))
print(np.min(mono_output))
# max seems to be about 100000, so scale it to that range:
# not necessary for linear activation i think...
# mono_input = mono_input/100000
# mono_output = mono_output/100000
print(np.max(mono_input))

print(mono_input.shape)

vector length: 276
113236.704732
-112970.705528
85259.6801722
-84449.9845834
113236.704732
(21992, 276)


In [11]:
# now put some of it back into audio form to test the representation's audio quality
reconstruction_array = []
for i in range(mono_output.shape[0]):
    coeffs = vector_to_list(mono_output[i,:])
    rec_arr = np.array([pywt.waverec(coeffs, wavetype)]).astype('int16')
    reconstruction_array.append(rec_arr)

reconstruction_array = np.concatenate(reconstruction_array,axis=1)
print(reconstruction_array.shape)
# write_array = np.reshape(reconstruction_array, (-1,2))
rec_split = np.split(reconstruction_array.T, 2)
print(len(rec_split))

write_array = np.concatenate((rec_split[0], rec_split[1]),axis=1)
print('reconstruction shape: ' + str(write_array.shape))

# output wav for auditory test
wavfile.write('output/recons_new.wav', input_rate, write_array)
# scientific analysis: sounds fine to me

(1, 4838240)
2
reconstruction shape: (2419120, 2)


# build the first model

In [12]:
model = Sequential()
model.add(Dense(200, activation='relu', input_dim=vec_length))
model.add(Dense(200, activation='relu'))
model.add(Dense(vec_length, activation='linear'))

# For a mean squared error regression problem
model.compile(optimizer='rmsprop', loss='mse')
model.fit(mono_input, mono_output, epochs=170, batch_size=128)

# print(np.max(mono_input))
# print(np.min(mono_input))
# print(np.max(mono_output))
# print(np.min(mono_output))
# print(type(mono_input[0,1]))

# fi64 = np.finfo(np.float64)
# print(fi64.min)
# print(fi64.max)

# test_a = np.array([1,2,3,4])
# test_b = np.array([1,2,3,4,5])+8
# test_list = [test_a, test_b]
# test_vec = np.concatenate(test_list)
# print(test_vec)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Epoch 1/170
Epoch 2/170
Epoch 3/170
Epoch 4/170
Epoch 5/170
Epoch 6/170
Epoch 7/170
Epoch 8/170
Epoch 9/170
Epoch 10/170
Epoch 11/170
Epoch 12/170
Epoch 13/170
Epoch 14/170
Epoch 15/170
Epoch 16/170
Epoch 17/170
Epoch 18/170
Epoch 19/170
Epoch 20/170
Epoch 21/170
Epoch 22/170
Epoch 23/170
Epoch 24/170
Epoch 25/170
Epoch 26/170
Epoch 27/170
Epoch 28/170
Epoch 29/170
Epoch 30/170
Epoch 31/170
Epoch 32/170
Epoch 33/170
Epoch 34/170
Epoch 35/170
Epoch 36/170
Epoch 37/170
Epoch 38/170
Epoch 39/170
Epoch 40/170
Epoch 41/170
Epoch 42/170
Epoch 43/170
Epoch 44/170
Epoch 45/170
Epoch 46/170
Epoch 47/170
Epoch 48/170
Epoch 49/170
Epoch 50/170
Epoch 51/170
Epoch 52/170
Epoch 53/170
Epoch 54/170
Epoch 55/170
Epoch 56/170
Epoch 57/170
Epoch 58/170
Epoch 59/170
Epoch 60/170
Epoch 61/170
Epoch 62/170
Epoch 63/170
Epoch 64/170
Epoch 65/170
Epoch 66/170
Epoch 67/170
Epoch 68/170
Epoch 69/170
Epoch 70/170
Epoch 71/170
Epoch 72/170


Epoch 89/170
Epoch 90/170
Epoch 91/170
Epoch 92/170
Epoch 93/170
Epoch 94/170
Epoch 95/170
Epoch 96/170
Epoch 97/170
Epoch 98/170
Epoch 99/170
Epoch 100/170
Epoch 101/170
Epoch 102/170
Epoch 103/170
Epoch 104/170
Epoch 105/170
Epoch 106/170
Epoch 107/170
Epoch 108/170
Epoch 109/170
Epoch 110/170
Epoch 111/170
Epoch 112/170
Epoch 113/170
Epoch 114/170
Epoch 115/170
Epoch 116/170
Epoch 117/170
Epoch 118/170
Epoch 119/170
Epoch 120/170
Epoch 121/170
Epoch 122/170
Epoch 123/170
Epoch 124/170
Epoch 125/170
Epoch 126/170
Epoch 127/170
Epoch 128/170
Epoch 129/170
Epoch 130/170
Epoch 131/170
Epoch 132/170
Epoch 133/170
Epoch 134/170
Epoch 135/170
Epoch 136/170
Epoch 137/170
Epoch 138/170
Epoch 139/170
Epoch 140/170
Epoch 141/170
Epoch 142/170
Epoch 143/170
Epoch 144/170
Epoch 145/170
Epoch 146/170
Epoch 147/170
Epoch 148/170
Epoch 149/170
Epoch 150/170
Epoch 151/170
Epoch 152/170
Epoch 153/170
Epoch 154/170
Epoch 155/170
Epoch 156/170
Epoch 157/170
Epoch 158/170
Epoch 159/170
Epoch 160/170
Epo

<keras.callbacks.History at 0x7f3558a7da50>

In [13]:
predictions = model.predict(mono_input)
print(predictions.shape)

reconstruction_array = []
for i in range(predictions.shape[0]):
    # don't forget to scale up again
    coeffs = vector_to_list(predictions[i,:]) # *100000
    rec_arr = np.array([pywt.waverec(coeffs, wavetype)]).astype('int16')
    reconstruction_array.append(rec_arr)

reconstruction_array = np.concatenate(reconstruction_array,axis=1)
print(reconstruction_array.shape)
# write_array = np.reshape(reconstruction_array, (-1,2))
rec_split = np.split(reconstruction_array.T, 2)
print("coeffs shape: " + str([len(j) for j in coeffs]))
print("coeffs type: " + str(type(coeffs[0][0])))
print(len(rec_split))

write_array = np.concatenate((rec_split[0], rec_split[1]),axis=1)
print('reconstruction shape: ' + str(write_array.shape))

# output wav for auditory test
wavfile.write('output/recons_network1.wav', input_rate, write_array)

(21992, 276)
(1, 4838240)
coeffs shape: [44, 44, 69, 119]
coeffs type: <type 'numpy.float32'>
2
reconstruction shape: (2419120, 2)


In [14]:
for i in range(vec_length):
    print(predictions[10,i], mono_output[10,i])

(-12546.151, -21841.895537893095)
(-12300.193, -25994.390528711374)
(-13096.056, -25919.847542833704)
(-11806.011, -21543.315618397464)
(-12554.108, -21505.014979520016)
(-12203.805, -25559.021594466827)
(-13296.866, -26435.884076893097)
(-11588.147, -21556.617028473964)
(-12344.221, -21099.510140276274)
(-12327.898, -25397.922479057059)
(-13052.554, -26409.344135687548)
(-11975.097, -22186.119901553058)
(-12353.179, -21030.754211483098)
(-12393.01, -25005.230420325308)
(-12885.297, -26537.149251561692)
(-12378.774, -22657.534608385726)
(-11328.92, -21891.075191750744)
(-10446.564, -16759.623124997368)
(-9580.1895, -11632.321997361958)
(-9562.8027, -7238.5900644988033)
(-11810.074, -7480.411365728598)
(-10432.397, -7410.5474593182189)
(-11999.791, -7879.4811802494678)
(-12263.876, -5969.4594229938266)
(-12586.295, -2633.9942673266751)
(-10762.395, 578.61539950483518)
(-10507.652, 3993.4295463815024)
(-9639.9551, 6811.2403770892724)
(-8441.5664, 7713.4200542717281)
(-4737.3506, 5994.939

# now for the second (LSTM) model

In [15]:
95000*2

190000

In [16]:
def scale_down(data):
    # -95000, 95000 just fits over the max and min
    return (data/190000.0)+0.5

def scale_up(data):
#     print(np.min(data))
    return (data-0.5)*190000.0

In [17]:
# pre-process into sequences
# first we need to scale (lstm doesn't like huge inputs)
scaled_in = scale_down(mono_input)
scaled_out = scale_down(mono_output)

print(np.min(scaled_out))
print(np.max(scaled_out))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 1
vec_sequences = []
next_vecs = []
for i in range(0, len(scaled_in) - maxlen, step):
#     vec_sequences.append(scaled_in[i: i + maxlen])
#     next_vecs.append(scaled_out[i + maxlen])
    vec_sequences.append(mono_input[i: i + maxlen])
    next_vecs.append(mono_output[i + maxlen])
vec_sequences = np.array(vec_sequences)
next_vecs = np.array(next_vecs)
print('nb sequences:', len(vec_sequences))
# print(next_vecs[0].shape)
# print(vec_sequences[0].shape)
print(next_vecs.shape)
print(vec_sequences.shape)
print(type(next_vecs[0,0]))

# x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
# y = np.zeros((len(sentences), len(chars)), dtype=np.bool)




0.0555263969294
0.948735158801
('nb sequences:', 21952)
(21952, 276)
(21952, 40, 276)
<type 'numpy.float64'>


In [18]:
# # Define an input sequence and process it.
# encoder_inputs = Input(shape=(None, num_encoder_tokens))
# encoder = LSTM(latent_dim, return_state=True)
# encoder_outputs, state_h, state_c = encoder(encoder_inputs)
# # We discard `encoder_outputs` and only keep the states.
# encoder_states = [state_h, state_c]

# # Set up the decoder, using `encoder_states` as initial state.
# decoder_inputs = Input(shape=(None, num_decoder_tokens))
# # We set up our decoder to return full output sequences,
# # and to return internal states as well. We don't use the 
# # return states in the training model, but we will use them in inference.
# decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
# decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
#                                      initial_state=encoder_states)
# decoder_dense = Dense(num_decoder_tokens, activation='softmax')
# decoder_outputs = decoder_dense(decoder_outputs)

# # Define the model that will turn
# # `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
# model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

lstm_model = Sequential()
lstm_model.add(LSTM(300, input_shape=(maxlen, vec_length)))

lstm_model.add(Dense(200, activation='relu', input_dim=vec_length))
lstm_model.add(Dense(200, activation='relu'))
lstm_model.add(Dense(vec_length, activation='linear'))

# For a mean squared error regression problem
lstm_model.compile(optimizer='rmsprop', loss='mse')

# lstm_model.add(Dense(vec_length))
# # lstm_model.add(Activation('softmax'))
# lstm_model.add(Activation("linear"))


# # lstm_model.add(Activation("sigmoid")) 

# optimizer = RMSprop(lr=0.1)
# lstm_model.compile(loss='mse', optimizer=optimizer)

lstm_model.fit(vec_sequences, next_vecs, batch_size=128, epochs=20)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f34cb859bd0>

In [19]:
e_test = np.array([[1000,-234235,54666,-123,222],[1000,-234235,54666,-123,222]])
low_e = scale_down(e_test)
high_e = scale_up(low_e)
print(high_e)

[[  1.00000000e+03  -2.34235000e+05   5.46660000e+04  -1.23000000e+02
    2.22000000e+02]
 [  1.00000000e+03  -2.34235000e+05   5.46660000e+04  -1.23000000e+02
    2.22000000e+02]]


In [20]:
predictions = lstm_model.predict(vec_sequences)
print("--RAW PREDICTION DATA--")
print(predictions.shape)
print("min, max")
print(np.min(predictions),np.max(predictions))
print("mean")
print(np.mean(predictions))

print("--TARGET DATA--")
print(next_vecs.shape)
print("min, max")
print(np.min(next_vecs),np.max(next_vecs))
print("mean")
print(np.mean(next_vecs))

scaled_up_predictions = scale_up(predictions)
print("--SCALED UP PREDICTION--")
print(scaled_up_predictions.shape)
print("min, max")
print(np.min(scaled_up_predictions),np.max(scaled_up_predictions))

print("--RAW TARGET--")
print(mono_output.shape)
print("min, max")
print(np.min(mono_output),np.max(mono_output))

scaled_up_target = scale_up(next_vecs)
print("--SCALED UP TARGET--")
print(scaled_up_target.shape)
print("min, max")
print(np.min(scaled_up_target),np.max(scaled_up_target))




reconstruction_array = []
for i in range(predictions.shape[0]):
    # don't forget to scale up again
#     coeffs = vector_to_list(np.array(scale_up(predictions[i,:]),dtype=np.float64))
#     coeffs = vector_to_list(scaled_up_predictions[i,:])
    coeffs = vector_to_list(predictions[i,:])
    rec_arr = np.array([pywt.waverec(coeffs, wavetype)]).astype('int16')
    reconstruction_array.append(rec_arr)

print("reconstruction done")
print("coeffs shape: " + str([len(j) for j in coeffs]))
print("coeffs type: " + str(type(coeffs[0][0])))

# scale it up before writing
reconstruction_array = np.concatenate(reconstruction_array,axis=1)
# scaled_up_out = scale_up(reconstruction_array)
# print("max, min for original output")
# print("max, min for scaled up recon")
# print(np.max(coeffs[0]))
# print(np.min(coeffs[0]))
# print(reconstruction_array.shape)
# print("max, min for original output")
# print(np.max(mono_output))
# print(np.min(mono_output))
# print(scaled_up_out.shape)
# write_array = np.reshape(reconstruction_array, (-1,2))
rec_split = np.split(reconstruction_array.T, 2)
print(len(rec_split))

write_array = np.concatenate((rec_split[0], rec_split[1]),axis=1)
print('reconstruction shape: ' + str(write_array.shape))

# output wav for auditory test
wavfile.write('output/recons_network2.wav', input_rate, write_array)

--RAW PREDICTION DATA--
(21952, 276)
min, max
(-26901.053, 29176.084)
mean
134.324
--TARGET DATA--
(21952, 276)
min, max
(-84449.9845834062, 85259.680172221881)
mean
102.791788898
--SCALED UP PREDICTION--
(21952, 276)
min, max
(-5.111295e+09, 5.543361e+09)
--RAW TARGET--
(21992, 276)
min, max
(-84449.9845834062, 85259.680172221881)
--SCALED UP TARGET--
(21952, 276)
min, max
(-16045592070.847178, 16199244232.722157)
reconstruction done
coeffs shape: [44, 44, 69, 119]
coeffs type: <type 'numpy.float32'>
2
reconstruction shape: (2414720, 2)


In [21]:
up_again_test = scale_up(next_vecs)
for i in range(vec_length):
    print(predictions[0,i], next_vecs[0,i])
#     print(next_vecs[10000,i])
# print(predictions[10000,90:100])


(3604.0366, -7490.6828929353187)
(2507.4595, -5912.2729415973463)
(2594.085, -7516.8136176023463)
(3550.6235, -6028.3686456463065)
(3649.1428, -7459.7528668926698)
(2584.5444, -6136.8458790794875)
(2537.9558, -7198.6403564400789)
(3476.6135, -6424.2201194014115)
(3685.7178, -7596.4223618263886)
(2661.4863, -6092.0151177218213)
(2468.5278, -7316.7494177943527)
(3408.7156, -6248.7613679890674)
(3758.1333, -7179.5513755415159)
(2715.5466, -6529.9556985545269)
(2471.4807, -6612.8048456565784)
(3214.1348, -7512.2625524997884)
(4068.0237, -4481.5947393439701)
(4859.4004, 1346.0467971081464)
(5561.0234, 9071.2642407207986)
(6201.1562, 15294.536346507992)
(6802.6401, 23511.451919276547)
(7583.8374, 30222.238597602904)
(8100.0815, 37505.944793658389)
(8560.4141, 48117.050679664681)
(9152.9619, 57053.184544768454)
(9606.917, 56969.372515066039)
(10164.006, 53179.053714181107)
(10841.852, 45535.228353236977)
(11335.428, 32094.745798399421)
(11951.838, 19543.509182392063)
(12484.518, 8109.53916816

In [22]:
print("predictions shape, max, min, type, example")
print(predictions.shape)
print(np.max(predictions))
print(np.min(predictions))
print(type(predictions[0,0]))
print(predictions[0,0])

print("scaled down output shape, max, min, type, example")
print(next_vecs.shape)
print(np.max(next_vecs))
print(np.min(next_vecs))
print(type(next_vecs[0,0]))
print(next_vecs[0,0])

# reconstruct target output for bug


reconstruction_array = []
for i in range(predictions.shape[0]):
    # don't forget to scale up again
#     coeffs = vector_to_list(scale_up(next_vecs[i,:]))
    coeffs = vector_to_list(next_vecs[i,:])
    rec_arr = np.array([pywt.waverec(coeffs, wavetype)]).astype('int16')
    reconstruction_array.append(rec_arr)

print("reconstruction done")
print("coeffs shape: " + str([len(j) for j in coeffs]))
print("coeffs type: " + str(type(coeffs[0][0])))

# scale it up before writing
reconstruction_array = np.concatenate(reconstruction_array,axis=1)

# write_array = np.reshape(reconstruction_array, (-1,2))
rec_split = np.split(reconstruction_array.T, 2)


write_array = np.concatenate((rec_split[0], rec_split[1]),axis=1)
print('reconstruction shape: ' + str(write_array.shape))

# output wav for auditory test
wavfile.write('output/recons_target_lstm.wav', input_rate, write_array)

predictions shape, max, min, type, example
(21952, 276)
29176.1
-26901.1
<type 'numpy.float32'>
3604.04
scaled down output shape, max, min, type, example
(21952, 276)
85259.6801722
-84449.9845834
<type 'numpy.float64'>
-7490.68289294
reconstruction done
coeffs shape: [44, 44, 69, 119]
coeffs type: <type 'numpy.float64'>
reconstruction shape: (2414720, 2)


# Convolution time!

In [23]:
# let's try 2D convolutional first
# turn vec_sequences into 2D-'image'
# target is mono_output
print(vec_sequences[1,:].shape)
sequences_as_mat = np.array(vec_sequences)
next_as_mat = np.array(next_vecs)
print(sequences_as_mat.shape)
# conv_in_shape = (40, 276)

seq_added_dim = sequences_as_mat.reshape(sequences_as_mat.shape[0], 40, 276, 1)
# next_added_dim = next_as_mat.reshape(next_as_mat.shape[0], 40, 276, 1)
# X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)

print(seq_added_dim.shape)
# print(next_added_dim.shape)
# (60000, 1, 28, 28)

(40, 276)
(21952, 40, 276)
(21952, 40, 276, 1)


In [27]:


# (OLD) build the model
conv_model = Sequential()
# conv(no.filters, filter_x, filter_y)
conv_model.add(Convolution2D(32, (3, 3), activation='relu', input_shape=(40,276,1)))
conv_model.add(MaxPooling2D(pool_size=(2,2)))
conv_model.add(Convolution2D(20, (7, 7), activation='relu'))
conv_model.add(MaxPooling2D(pool_size=(2,2)))
conv_model.add(Dropout(0.25))
conv_model.add(Flatten())
conv_model.add(Dense(200, activation='relu'))
conv_model.add(Dropout(0.5))
conv_model.add(Dense(200, activation='relu'))
conv_model.add(Dropout(0.5))
conv_model.add(Dense(200, activation='relu'))
conv_model.add(Dropout(0.5))
# conv_model.add(Dense(10, activation='softmax'))
conv_model.add(Dense(vec_length, activation='linear'))

# For a mean squared error regression problem
conv_model.compile(optimizer='rmsprop', loss='mse')


conv_model.fit(seq_added_dim, next_as_mat, batch_size=128, epochs=80)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


<keras.callbacks.History at 0x7f34465a4290>

In [29]:
conv_predictions = conv_model.predict(seq_added_dim)

conv_reconstruction_array = []
for i in range(conv_predictions.shape[0]):
    # don't forget to scale up again
#     coeffs = vector_to_list(np.array(scale_up(predictions[i,:]),dtype=np.float64))
#     coeffs = vector_to_list(scaled_up_predictions[i,:])
    coeffs = vector_to_list(conv_predictions[i,:])
    rec_arr = np.array([pywt.waverec(coeffs, wavetype)]).astype('int16')
    conv_reconstruction_array.append(rec_arr)

print("reconstruction done")
print("coeffs shape: " + str([len(j) for j in coeffs]))
print("coeffs type: " + str(type(coeffs[0][0])))

# scale it up before writing
conv_reconstruction_array = np.concatenate(conv_reconstruction_array,axis=1)

rec_split = np.split(conv_reconstruction_array.T, 2)
print(len(rec_split))

write_array = np.concatenate((rec_split[0], rec_split[1]),axis=1)
print('reconstruction shape: ' + str(write_array.shape))

# output wav for auditory test
wavfile.write('output/recons_network3.wav', input_rate, write_array)

reconstruction done
coeffs shape: [44, 44, 69, 119]
coeffs type: <type 'numpy.float32'>
2
reconstruction shape: (2414720, 2)


# experimental model time

In [26]:
# define this without using sequential
# this is necessary for residual connections

# define input (same as conv approach)
exp_input1 = Input(shape=(40,276,1), dtype='int32', name='main_input')
# add convolutions
conv1 = Convolution2D(32, (3, 3), activation='relu', input_shape=(40,276,1))(exp_input1)
asdfsd =

# concatenate original input and cov out, feed into dense layers

SyntaxError: invalid syntax (<ipython-input-26-8d40c74352c9>, line 4)

## LEFTOVERS

In [None]:
# a = []
# a.append('asdsd')
# a.append('adfgbsdfgbdfg')
# a
range(1,6,2)

In [None]:
# DEPRECATED
# reshapes total sequence into batches
def get_batches(batch_size, sequence_length, input_sequence, output_sequence):
    batch_amount = int(math.floor(total_samples/float(batch_size*sequence_length)))
    input_batches = []
    output_batches = []
    print('creating ' + str(batch_amount) + ' batches')
    for batch_index in range(1, batch_amount + 1):
        print(batch_index)
        for sequence_index in range(0,sequence_length*batch_size,sequence_length):
            print(sequence_index)
            sequence = input_sequence[sequence_index:sequence_index+sequence_length,:]
            
    
# https://keras.io/layers/wrappers/

# Consider a batch of 32 samples, where each sample is a sequence of 10 vectors of 16 dimensions. 
# The batch input shape of the layer is then (32, 10, 16), and the input_shape, not including 
# the samples dimension, is (10, 16)

# batch_size = 32
# sequence_length = 20   # no. of vectors in each sequence
# vector_length = input_matrix.shape[1]
# total_samples = input_matrix.shape[0]


# input_batches, output_batches = get_batches(batch_size, sequence_length, input_matrix, output_matrix)

In [None]:

# build model
# model = Sequential()
# model.add(LSTM(1, input_shape=(timesteps, data_dim), return_sequences=True))
# model.add(TimeDistributed(Dense(vector_length), input_shape=(sequence_length, vector_length)))

In [None]:
# def group_list(l, group_size):
#     """
#     :param l:           list
#     :param group_size:  size of each group
#     :return:            Yields successive group-sized lists from l.
#     """
# #     res_arr = 
#     for i in xrange(0, len(l), group_size):
#         yield l[i:i+group_size,:]

# def get_np_batch()

In [None]:
print('leftovers')

# max_features = 2124
# maxlen = 80  # cut texts after this number of words (among top max_features most common words)
# batch_size = 32
# lstm_units = 250

# # output_res = output_matrix #.reshape((-1, 1))

# input_batches = group_list(input_matrix, batch_size)
# output_batches = group_list(output_matrix, batch_size)

# model = Sequential()

# model.add(LSTM(max_features, input_dim=max_features))
# model.compile(loss='mean_squared_error', optimizer='rmsprop')
# model.fit(input_batches,output_batches, nb_epoch=10)



# model.add(Embedding(max_features, lstm_units))
# model.add(LSTM(lstm_units, dropout=0.2, recurrent_dropout=0.2, input_shape=(max_features, )))
# model.add(Dense(max_features, activation='sigmoid'))

# # try using different optimizers and different optimizer configs
# model.compile(loss='sparse_categorical_crossentropy',
#               optimizer='adam',
#               metrics=['accuracy'])

# print('Train...')
# model.fit(input_matrix, output_res,
# #           batch_size=batch_size,
#           epochs=15,
#           validation_data=(input_matrix, output_res))
# score, acc = model.evaluate(input_matrix, output_res) #, batch_size=batch_size)
# print('Test score:', score)
# print('Test accuracy:', acc)