In [1]:
import math
import pywt
import inspect
import numpy as np
from keras.preprocessing import sequence
from keras.optimizers import RMSprop
from keras.models import Sequential, Model
from keras.layers import Dense, Embedding, LSTM, Activation, Input
# from keras.layers import LSTM
import scipy.io.wavfile as wavfile
from scipy.interpolate import interp1d

Using TensorFlow backend.


# PRE-PROCESSING

In [2]:
input_rate, input_signal = wavfile.read('data/1clean_Selection.wav')
output_rate, output_signal = wavfile.read('data/1Selection.wav')

## Testing out pywt functions and reconstruction

In [3]:
# some settings:
wavetype = 'db10'
# wavelevel = 15

print("input rate: " + str(input_rate))
sample_length = input_rate//200
print("for 5 ms samples, input sample length is: " + str(sample_length))

w = pywt.Wavelet(wavetype)
wavelevel = pywt.dwt_max_level(data_len=sample_length, filter_len=w.dec_len)

# set a little lower:
# wavelevel = 1
print("Max wave level decomposition: " + str(wavelevel))

# these are floats, original is int16
input_coeffs1 = pywt.wavedec(input_signal[:,0].T, wavetype, level=wavelevel)
input_coeffs2 = pywt.wavedec(input_signal[:,1].T, wavetype, level=wavelevel)
# output_coeffs1 = pywt.wavedec(output_signal[:,0].T, wavetype, level=wavelevel)
# output_coeffs2 = pywt.wavedec(output_signal[:,1].T, wavetype, level=wavelevel)
print("input signal shape: " + str(input_signal.shape))

# reconstruction for left and right channel
recons1 = np.array([pywt.waverec(input_coeffs1, wavetype)]).astype('int16')
recons2 = np.array([pywt.waverec(input_coeffs2, wavetype)]).astype('int16')

# print(recons1.shape)
print("coefficients shapes: " + str(len(input_coeffs1)) + ", " + str([len(j) for j in input_coeffs1]))
print("coefficients type: " + str(type(input_coeffs1[0])))

write_array = np.concatenate((recons1,recons2),axis=0).T
print('reconstruction shape: ' + str(write_array.shape))

# output wav for auditory test
wavfile.write('output/recons.wav', input_rate, write_array)

# get available wavelets
print(pywt.wavelist())

input rate: 44100
for 5 ms samples, input sample length is: 220
Max wave level decomposition: 3
input signal shape: (1810432, 2)
coefficients shapes: 4, [226320, 226320, 452622, 905225]
coefficients type: <type 'numpy.ndarray'>
reconstruction shape: (1810432, 2)
['bior1.1', 'bior1.3', 'bior1.5', 'bior2.2', 'bior2.4', 'bior2.6', 'bior2.8', 'bior3.1', 'bior3.3', 'bior3.5', 'bior3.7', 'bior3.9', 'bior4.4', 'bior5.5', 'bior6.8', 'cgau1', 'cgau2', 'cgau3', 'cgau4', 'cgau5', 'cgau6', 'cgau7', 'cgau8', 'cmor', 'coif1', 'coif2', 'coif3', 'coif4', 'coif5', 'coif6', 'coif7', 'coif8', 'coif9', 'coif10', 'coif11', 'coif12', 'coif13', 'coif14', 'coif15', 'coif16', 'coif17', 'db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8', 'db9', 'db10', 'db11', 'db12', 'db13', 'db14', 'db15', 'db16', 'db17', 'db18', 'db19', 'db20', 'db21', 'db22', 'db23', 'db24', 'db25', 'db26', 'db27', 'db28', 'db29', 'db30', 'db31', 'db32', 'db33', 'db34', 'db35', 'db36', 'db37', 'db38', 'dmey', 'fbsp', 'gaus1', 'gaus2', '

In [4]:
print(input_signal[:10,0].T)
print(recons1[0,:10])

[ 0  0  0 -1 -2 -2 -3 -2 -1 -1]
[ 0  0  0 -1 -2 -2 -3 -2 -1 -1]


## prepare data for keras

In [5]:
# functions for pre-processing and reconstruction of training vectors
# first function is over a batch - need smaller size

In [6]:
# DEPRECATED
def wavelet_to_vector_all(input_raw, output_raw, chunk_size, wavelet_level, wavelet_type):
    current_set = 'input'
    
    w = pywt.Wavelet(wavelet_type)
    max_level = pywt.dwt_max_level(data_len=chunk_size, filter_len=w.dec_len)
    if wavelet_level > max_level:
        print('wavelet level too high. set to max level: ' + str(max_level))
        wavelet_level = max_level
    
    # short hacky loop
    while True:
        # select the correct set
        if current_set == 'input':
            data = input_raw
            amount_of_chunks = int(math.floor(data.shape[0]/chunk_size))
            input_list = []
            print('Amount of samples: ' + str(amount_of_chunks))
        else:
            data = output_raw
            amount_of_chunks = int(math.floor(data.shape[0]/chunk_size))
            output_list = []
        
        index_range = (np.arange(amount_of_chunks) * chunk_size)
        indp = chunk_size - 1
        # for all chunks do this:
        for ind in index_range:
#             print(ind)
            sample1 = data[ind:ind+indp,0].T
            sample2 = data[ind:ind+indp,1].T
            coeffs1 = pywt.wavedec(sample1, wavelet_type, level=wavelet_level)
            coeffs2 = pywt.wavedec(sample2, wavelet_type, level=wavelet_level)
            unfolded1 = np.array([item for sublist in coeffs1 for item in sublist])
            unfolded2 = np.array([item for sublist in coeffs2 for item in sublist])
            # POSSIBLE MISTAKE: puts l/r in one big sequence
            # run on mono first
            vector = np.concatenate((unfolded1,unfolded2),axis=0)
            
            if current_set == 'input':
                input_list.append(vector)
            else:
                output_list.append(vector)
#             unf_arr = np.array(unfolded1)
#             print(unf_arr.shape)
#             print(len(unfolded))
            
            # for all coeff levels:
#             for i in range(len(coeffs1)):
#                 print(i)
            
        
        
        if current_set == 'output':
            break
        current_set = 'output'

    # convert lists to arrays
    input_arr = np.array(input_list)
    output_arr = np.array(output_list)
    
    # print stats for converting back to audio
    wavelet_lengths = [len(j) for j in coeffs1]
    ind = 0
    index_list = [0]
    for w_l in wavelet_lengths:
        ind += w_l
        index_list.append(ind)
    print("length of different wavelet bands: " + str(wavelet_lengths))
    print("indexes: " + str(index_list))
    print("more sizes? " + str([len(sublist) for sublist in coeffs1]))
    # return level sizes for reconstruction
    level_sizes = []
    for cf in coeffs1:
        level_sizes.append(cf.shape[0])
    return [input_arr, output_arr, level_sizes]
# input_coeffs1[16].shape

# rows=samples, cols=dim
[input_matrix, output_matrix, level_sizes] = wavelet_to_vector_all(input_signal, output_signal, sample_length, wavelevel, 'db4')
print(input_matrix.shape)
print(level_sizes)

Amount of samples: 8229
length of different wavelet bands: [33, 33, 60, 113]
indexes: [0, 33, 66, 126, 239]
more sizes? [33, 33, 60, 113]
(8229, 478)
[33, 33, 60, 113]


In [7]:
# some info
wav_lengths = [44, 44, 69, 119]
wav_indices = [0]
for length in wav_lengths:
    wav_indices.append(wav_indices[-1]+length)
print("indices where vector should be sliced: " + str(wav_indices))

indices where vector should be sliced: [0, 44, 88, 157, 276]


In [8]:
# for a single audio slice (mono)
def audio_to_vector(input_mono, wavelet_level, wavelet_type):
    coeffs = pywt.wavedec(input_mono, wavelet_type, level=wavelet_level)
#     print([len(co) for co in coeffs])
#     vector = np.array([item for sublist in coeffs for item in sublist])
#     vector = np.array([])
    vector = np.concatenate(coeffs)
#     for band in coeffs:
#         vector.append
    return vector

In [9]:
# for a single output vector (mono)
def vector_to_list(input_vector, indexes=wav_indices):
    # unflatten the vector
#     for i in range(len(indexes)-1):
#         wavelet_list.append(input_vector[indexes[i]:indexes[i+1]])
    # use np.split
    wavelet_list = np.split(input_vector, indexes[1:-1])
    return wavelet_list
#     wavelet_list = [input_vector[]]

In [10]:
# create new (mono) data set
# for both channels, we just handle mono samples:
mono_input = []
mono_output = []
for side in (0,1):
    total_raw_in = input_signal[:,side]
    total_raw_out = output_signal[:,side]
    number_of_chunks = total_raw_in.shape[0]//sample_length
#     print(number_of_chunks)
    for ind in range(number_of_chunks):
        start_ind = ind*sample_length
        end_ind = ind*sample_length + sample_length
        curr_in_sample = total_raw_in[start_ind:end_ind]
        curr_out_sample = total_raw_out[start_ind:end_ind]
        in_vec = audio_to_vector(curr_in_sample, wavelevel, wavetype)
        out_vec = audio_to_vector(curr_out_sample, wavelevel, wavetype)
        mono_input.append(in_vec)
        mono_output.append(out_vec)
#     print(curr_in_sample.shape)
vec_length = in_vec.shape[0]
print("vector length: " +str(vec_length))
mono_input = np.array(mono_input)
mono_output = np.array(mono_output)

print(np.max(mono_input))
print(np.min(mono_input))
print(np.max(mono_output))
print(np.min(mono_output))
# max seems to be about 100000, so scale it to that range:
# not necessary for linear activation i think...
# mono_input = mono_input/100000
# mono_output = mono_output/100000
print(np.max(mono_input))

print(mono_input.shape)

vector length: 276
92415.9154495
-83612.7180552
87949.0011249
-90682.166482
92415.9154495
(16458, 276)


In [11]:
# now put some of it back into audio form to test the representation's audio quality
reconstruction_array = []
for i in range(mono_output.shape[0]):
    coeffs = vector_to_list(mono_output[i,:])
    rec_arr = np.array([pywt.waverec(coeffs, wavetype)]).astype('int16')
    reconstruction_array.append(rec_arr)

reconstruction_array = np.concatenate(reconstruction_array,axis=1)
print(reconstruction_array.shape)
# write_array = np.reshape(reconstruction_array, (-1,2))
rec_split = np.split(reconstruction_array.T, 2)
print(len(rec_split))

write_array = np.concatenate((rec_split[0], rec_split[1]),axis=1)
print('reconstruction shape: ' + str(write_array.shape))

# output wav for auditory test
wavfile.write('output/recons_new.wav', input_rate, write_array)
# scientific analysis: sounds fine to me

(1, 3620760)
2
reconstruction shape: (1810380, 2)


# build the first model

In [12]:
model = Sequential()
model.add(Dense(200, activation='relu', input_dim=vec_length))
model.add(Dense(200, activation='relu'))
model.add(Dense(vec_length, activation='linear'))

# For a mean squared error regression problem
model.compile(optimizer='rmsprop', loss='mse')
model.fit(mono_input, mono_output, epochs=70, batch_size=128)

# print(np.max(mono_input))
# print(np.min(mono_input))
# print(np.max(mono_output))
# print(np.min(mono_output))
# print(type(mono_input[0,1]))

# fi64 = np.finfo(np.float64)
# print(fi64.min)
# print(fi64.max)

# test_a = np.array([1,2,3,4])
# test_b = np.array([1,2,3,4,5])+8
# test_list = [test_a, test_b]
# test_vec = np.concatenate(test_list)
# print(test_vec)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Epoch 1/70
Epoch 2/70
Epoch 3/70
Epoch 4/70
Epoch 5/70
Epoch 6/70
Epoch 7/70
Epoch 8/70
Epoch 9/70
Epoch 10/70
Epoch 11/70
Epoch 12/70
Epoch 13/70
Epoch 14/70
Epoch 15/70
Epoch 16/70
Epoch 17/70
Epoch 18/70
Epoch 19/70
Epoch 20/70
Epoch 21/70
Epoch 22/70
Epoch 23/70
Epoch 24/70
Epoch 25/70
Epoch 26/70
Epoch 27/70
Epoch 28/70
Epoch 29/70
Epoch 30/70
Epoch 31/70
Epoch 32/70
Epoch 33/70
Epoch 34/70
Epoch 35/70
Epoch 36/70
Epoch 37/70
Epoch 38/70
Epoch 39/70
Epoch 40/70
Epoch 41/70
Epoch 42/70
Epoch 43/70
Epoch 44/70
Epoch 45/70
Epoch 46/70
Epoch 47/70
Epoch 48/70
Epoch 49/70
Epoch 50/70
Epoch 51/70
Epoch 52/70
Epoch 53/70
Epoch 54/70
Epoch 55/70
Epoch 56/70
Epoch 57/70
Epoch 58/70
Epoch 59/70
Epoch 60/70
Epoch 61/70
Epoch 62/70
Epoch 63/70
Epoch 64/70
Epoch 65/70
Epoch 66/70
Epoch 67/70
Epoch 68/70
Epoch 69/70
Epoch 70/70


<keras.callbacks.History at 0x7fa6930e4990>

In [27]:
predictions = model.predict(mono_input)
print(predictions.shape)

reconstruction_array = []
for i in range(predictions.shape[0]):
    # don't forget to scale up again
    coeffs = vector_to_list(predictions[i,:]) # *100000
    rec_arr = np.array([pywt.waverec(coeffs, wavetype)]).astype('int16')
    reconstruction_array.append(rec_arr)

reconstruction_array = np.concatenate(reconstruction_array,axis=1)
print(reconstruction_array.shape)
# write_array = np.reshape(reconstruction_array, (-1,2))
rec_split = np.split(reconstruction_array.T, 2)
print("coeffs shape: " + str([len(j) for j in coeffs]))
print("coeffs type: " + str(type(coeffs[0][0])))
print(len(rec_split))

write_array = np.concatenate((rec_split[0], rec_split[1]),axis=1)
print('reconstruction shape: ' + str(write_array.shape))

# output wav for auditory test
wavfile.write('output/recons_network1.wav', input_rate, write_array)

(16458, 276)
(1, 3620760)
coeffs shape: [44, 44, 69, 119]
coeffs type: <type 'numpy.float32'>
2
reconstruction shape: (1810380, 2)


# now for the second (LSTM) model

In [14]:
95000*2

190000

In [50]:
def scale_down(data):
    # -95000, 95000 just fits over the max and min
    return (data/190000.0)+0.5

def scale_up(data):
#     print(np.min(data))
    return (data-0.5)*190000.0

In [52]:
# pre-process into sequences
# first we need to scale (lstm doesn't like huge inputs)
scaled_in = scale_down(mono_input)
scaled_out = scale_down(mono_output)

print(np.min(scaled_out))
print(np.max(scaled_out))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 1
vec_sequences = []
next_vecs = []
for i in range(0, len(scaled_in) - maxlen, step):
    vec_sequences.append(scaled_in[i: i + maxlen])
    next_vecs.append(scaled_out[i + maxlen])
#     next_vecs.append(mono_output[i + maxlen])
vec_sequences = np.array(vec_sequences)
next_vecs = np.array(next_vecs)
print('nb sequences:', len(vec_sequences))
# print(next_vecs[0].shape)
# print(vec_sequences[0].shape)
print(next_vecs.shape)
print(vec_sequences.shape)
print(type(next_vecs[0,0]))

# x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
# y = np.zeros((len(sentences), len(chars)), dtype=np.bool)




0.0227254395685
0.962889479605
('nb sequences:', 16418)
(16418, 276)
(16418, 40, 276)
<type 'numpy.float64'>


In [81]:
# # Define an input sequence and process it.
# encoder_inputs = Input(shape=(None, num_encoder_tokens))
# encoder = LSTM(latent_dim, return_state=True)
# encoder_outputs, state_h, state_c = encoder(encoder_inputs)
# # We discard `encoder_outputs` and only keep the states.
# encoder_states = [state_h, state_c]

# # Set up the decoder, using `encoder_states` as initial state.
# decoder_inputs = Input(shape=(None, num_decoder_tokens))
# # We set up our decoder to return full output sequences,
# # and to return internal states as well. We don't use the 
# # return states in the training model, but we will use them in inference.
# decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
# decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
#                                      initial_state=encoder_states)
# decoder_dense = Dense(num_decoder_tokens, activation='softmax')
# decoder_outputs = decoder_dense(decoder_outputs)

# # Define the model that will turn
# # `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
# model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

lstm_model = Sequential()
lstm_model.add(LSTM(300, input_shape=(maxlen, vec_length)))

lstm_model.add(Dense(200, activation='relu', input_dim=vec_length))
lstm_model.add(Dense(200, activation='relu'))
lstm_model.add(Dense(vec_length, activation='relu'))

# For a mean squared error regression problem
lstm_model.compile(optimizer='rmsprop', loss='mse')

# lstm_model.add(Dense(vec_length))
# # lstm_model.add(Activation('softmax'))
# lstm_model.add(Activation("linear"))


# # lstm_model.add(Activation("sigmoid")) 

# optimizer = RMSprop(lr=0.1)
# lstm_model.compile(loss='mse', optimizer=optimizer)

lstm_model.fit(vec_sequences, next_vecs, batch_size=128, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fa5f5cae850>

In [69]:
e_test = np.array([[1000,-234235,54666,-123,222],[1000,-234235,54666,-123,222]])
low_e = scale_down(e_test)
high_e = scale_up(low_e)
print(high_e)

[[  1.00000000e+03  -2.34235000e+05   5.46660000e+04  -1.23000000e+02
    2.22000000e+02]
 [  1.00000000e+03  -2.34235000e+05   5.46660000e+04  -1.23000000e+02
    2.22000000e+02]]


In [82]:
predictions = lstm_model.predict(vec_sequences)
print("--RAW PREDICTION DATA--")
print(predictions.shape)
print("min, max")
print(np.min(predictions),np.max(predictions))

print("--TARGET DATA--")
print(next_vecs.shape)
print("min, max")
print(np.min(next_vecs),np.max(next_vecs))

scaled_up_predictions = scale_up(predictions)
print("--SCALED UP PREDICTION--")
print(scaled_up_predictions.shape)
print("min, max")
print(np.min(scaled_up_predictions),np.max(scaled_up_predictions))

print("--RAW TARGET--")
print(mono_output.shape)
print("min, max")
print(np.min(mono_output),np.max(mono_output))

scaled_up_target = scale_up(next_vecs)
print("--SCALED UP TARGET--")
print(scaled_up_target.shape)
print("min, max")
print(np.min(scaled_up_target),np.max(scaled_up_target))




reconstruction_array = []
for i in range(predictions.shape[0]):
    # don't forget to scale up again
#     coeffs = vector_to_list(np.array(scale_up(predictions[i,:]),dtype=np.float64))
    coeffs = vector_to_list(sclaed_up_predictions[i,:])
    rec_arr = np.array([pywt.waverec(coeffs, wavetype)]).astype('int16')
    reconstruction_array.append(rec_arr)

print("reconstruction done")
print("coeffs shape: " + str([len(j) for j in coeffs]))
print("coeffs type: " + str(type(coeffs[0][0])))

# scale it up before writing
reconstruction_array = np.concatenate(reconstruction_array,axis=1)
# scaled_up_out = scale_up(reconstruction_array)
# print("max, min for original output")
# print("max, min for scaled up recon")
# print(np.max(coeffs[0]))
# print(np.min(coeffs[0]))
# print(reconstruction_array.shape)
# print("max, min for original output")
# print(np.max(mono_output))
# print(np.min(mono_output))
# print(scaled_up_out.shape)
# write_array = np.reshape(reconstruction_array, (-1,2))
rec_split = np.split(reconstruction_array.T, 2)
print(len(rec_split))

write_array = np.concatenate((rec_split[0], rec_split[1]),axis=1)
print('reconstruction shape: ' + str(write_array.shape))

# output wav for auditory test
wavfile.write('output/recons_network2.wav', input_rate, write_array)

--RAW PREDICTION DATA--
(16418, 276)
min, max
(0.0, 0.50962889)
--TARGET DATA--
(16418, 276)
min, max
(0.022725439568491657, 0.96288947960481064)
--SCALED UP PREDICTION--
(16418, 276)
min, max
(-95000.0, 1829.4895)
--RAW TARGET--
(16458, 276)
min, max
(-90682.166481986584, 87949.001124914023)
--SCALED UP TARGET--
(16418, 276)
min, max
(-90682.166481986584, 87949.001124914023)
reconstruction done
coeffs shape: [44, 44, 69, 119]
coeffs type: <type 'numpy.float32'>
2
reconstruction shape: (1805980, 2)


In [78]:
for i in range(150,200):
    print(predictions[1003,i], next_vecs[1003,i])
#     print(next_vecs[10000,i])
# print(predictions[10000,90:100])


(0.49083388, 0.49998259269456513)
(0.49014306, 0.50001232054436817)
(0.49065757, 0.5000209328511025)
(0.49058855, 0.50001488950860762)
(0.48991048, 0.50001346089001653)
(0.49096224, 0.50000368411431551)
(0.49101716, 0.50001706517046862)
(0.48992157, 0.50001358466243806)
(0.48961061, 0.49997704666606835)
(0.49075043, 0.49999569294121593)
(0.48955449, 0.50000156172726429)
(0.49045569, 0.50002705724033902)
(0.49036357, 0.49997686044744033)
(0.49035597, 0.49999624832033479)
(0.48974282, 0.50000880581480334)
(0.49089664, 0.50001774595722626)
(0.48977643, 0.49998900824396575)
(0.49041963, 0.50002394174554921)
(0.49065423, 0.50000331564987543)
(0.48976395, 0.49999149466988901)
(0.49084002, 0.50000439142194331)
(0.4911465, 0.49996972578703808)
(0.49071544, 0.49998242931699227)
(0.48989797, 0.49999722009805697)
(0.48986825, 0.49997012119216966)
(0.49077523, 0.49998880444316951)
(0.49032211, 0.50001444482778079)
(0.49044919, 0.50000588558443015)
(0.49013022, 0.50000658255666908)
(0.49043936, 0.5

In [None]:
print("predictions shape, max, min, type, example")
print(predictions.shape)
print(np.max(predictions))
print(np.min(predictions))
print(type(predictions[0,0]))
print(predictions[0,0])

print("scaled down output shape, max, min, type, example")
print(next_vecs.shape)
print(np.max(next_vecs))
print(np.min(next_vecs))
print(type(next_vecs[0,0]))
print(next_vecs[0,0])

# reconstruct target output for bug


reconstruction_array = []
for i in range(predictions.shape[0]):
    # don't forget to scale up again
    coeffs = vector_to_list(scale_up(next_vecs[i,:]))
    rec_arr = np.array([pywt.waverec(coeffs, wavetype)]).astype('int16')
    reconstruction_array.append(rec_arr)

print("reconstruction done")
print("coeffs shape: " + str([len(j) for j in coeffs]))
print("coeffs type: " + str(type(coeffs[0][0])))

# scale it up before writing
reconstruction_array = np.concatenate(reconstruction_array,axis=1)

# write_array = np.reshape(reconstruction_array, (-1,2))
rec_split = np.split(reconstruction_array.T, 2)


write_array = np.concatenate((rec_split[0], rec_split[1]),axis=1)
print('reconstruction shape: ' + str(write_array.shape))

# output wav for auditory test
wavfile.write('output/recons_target_lstm.wav', input_rate, write_array)

## LEFTOVERS

In [19]:
# a = []
# a.append('asdsd')
# a.append('adfgbsdfgbdfg')
# a
range(1,6,2)

[1, 3, 5]

In [20]:
# DEPRECATED
# reshapes total sequence into batches
def get_batches(batch_size, sequence_length, input_sequence, output_sequence):
    batch_amount = int(math.floor(total_samples/float(batch_size*sequence_length)))
    input_batches = []
    output_batches = []
    print('creating ' + str(batch_amount) + ' batches')
    for batch_index in range(1, batch_amount + 1):
        print(batch_index)
        for sequence_index in range(0,sequence_length*batch_size,sequence_length):
            print(sequence_index)
            sequence = input_sequence[sequence_index:sequence_index+sequence_length,:]
            
    
# https://keras.io/layers/wrappers/

# Consider a batch of 32 samples, where each sample is a sequence of 10 vectors of 16 dimensions. 
# The batch input shape of the layer is then (32, 10, 16), and the input_shape, not including 
# the samples dimension, is (10, 16)

# batch_size = 32
# sequence_length = 20   # no. of vectors in each sequence
# vector_length = input_matrix.shape[1]
# total_samples = input_matrix.shape[0]


# input_batches, output_batches = get_batches(batch_size, sequence_length, input_matrix, output_matrix)

In [21]:

# build model
# model = Sequential()
# model.add(LSTM(1, input_shape=(timesteps, data_dim), return_sequences=True))
# model.add(TimeDistributed(Dense(vector_length), input_shape=(sequence_length, vector_length)))

In [22]:
# def group_list(l, group_size):
#     """
#     :param l:           list
#     :param group_size:  size of each group
#     :return:            Yields successive group-sized lists from l.
#     """
# #     res_arr = 
#     for i in xrange(0, len(l), group_size):
#         yield l[i:i+group_size,:]

# def get_np_batch()

In [23]:
print('leftovers')

# max_features = 2124
# maxlen = 80  # cut texts after this number of words (among top max_features most common words)
# batch_size = 32
# lstm_units = 250

# # output_res = output_matrix #.reshape((-1, 1))

# input_batches = group_list(input_matrix, batch_size)
# output_batches = group_list(output_matrix, batch_size)

# model = Sequential()

# model.add(LSTM(max_features, input_dim=max_features))
# model.compile(loss='mean_squared_error', optimizer='rmsprop')
# model.fit(input_batches,output_batches, nb_epoch=10)



# model.add(Embedding(max_features, lstm_units))
# model.add(LSTM(lstm_units, dropout=0.2, recurrent_dropout=0.2, input_shape=(max_features, )))
# model.add(Dense(max_features, activation='sigmoid'))

# # try using different optimizers and different optimizer configs
# model.compile(loss='sparse_categorical_crossentropy',
#               optimizer='adam',
#               metrics=['accuracy'])

# print('Train...')
# model.fit(input_matrix, output_res,
# #           batch_size=batch_size,
#           epochs=15,
#           validation_data=(input_matrix, output_res))
# score, acc = model.evaluate(input_matrix, output_res) #, batch_size=batch_size)
# print('Test score:', score)
# print('Test accuracy:', acc)

leftovers
