## This is my exploration of Keras models. See second half for non-sequential.

In [83]:
from keras.preprocessing import sequence
from keras.models import Sequential, Model
from keras.layers import *
from keras.optimizers import Adam
import numpy as np

In [13]:
model = Sequential()

# Dimension: (288, 5, 1)
model.add(TimeDistributed(Conv2D(filters=10,
								 kernel_size=(9, 2),
								 strides=(1, 1),
								 padding='SAME',
								 activation='relu',
								 input_shape=(288, 5, 1))))
# Dimension: (288, 5, 10)
model.add(TimeDistributed(MaxPooling2D(pool_size=(4, 2), strides=(2, 1))))
# Dimension: (144, 4, 10)
model.add((LSTM(1, input_shape=(141, 3, 20), return_sequences=True)))
# Dimension: (144, 4, 10)
model.add(TimeDistributed(Conv2D(20, kernel_size=(5, 2), strides=(1, 1), activation='relu')))
# Dimension: (140, 3, 20)
model.add(TimeDistributed(MaxPooling2D(pool_size=(4, 1), strides=(4, 1))))
# Dimension: (35, 3, 20)
model.add((LSTM(1, input_shape=(35, 3, 20), return_sequences=True)))
# Dimension: (35, 3, 20)
model.add(TimeDistributed(Flatten()))
# Dimension: (2100)
model.add(TimeDistributed(Dense(1000)))
model.add(TimeDistributed(Dense(500)))
model.add(Dense(88))
model.add(Activation('sigmoid'))

# try using different optimizers and different optimizer configs
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

In [20]:
import os
cqt_slice_filenames = sorted(os.listdir("sample-cqt-aitan\\sample-cqt-aitan"))
pr_slice_filenames = sorted(os.listdir("sample-pianoroll-aitan\\sample-pianoroll-aitan"))
num_slices = len(cqt_slice_filenames)
assert(len(cqt_slice_filenames) == len(pr_slice_filenames))

In [28]:
# # Prepare x and y data
# x_train = np.zeros((1, num_slices, 288, 5, 1))
# y_train = np.zeros((1, num_slices, 88, 1))
# for i in range(num_slices):
#     cqt_slice = np.fromfile("sample-cqt-aitan\\sample-cqt-aitan\\" + cqt_slice_filenames[i]).reshape((288, 5, 1))
#     pr_slice = np.fromfile("sample-pianoroll-aitan\\sample-pianoroll-aitan\\" + pr_slice_filenames[i]).reshape((88, 1))
#     x_train[0, i, :, :, :] = cqt_slice
#     y_train[0, i, :, :] = pr_slice

In [None]:
model.fit(x_train, y_train, epochs=5)

In [None]:
score, acc = model.evaluate(x_train, y_train)

## Abort and try non-sequential

In [41]:
from sklearn.model_selection import train_test_split

In [42]:
# Prepare x and y data
x = np.zeros((num_slices, 288, 5, 1))
y = np.zeros((num_slices, 88))
for i in range(num_slices):
    cqt_slice = np.fromfile("sample-cqt-aitan\\sample-cqt-aitan\\" + cqt_slice_filenames[i]).reshape((288, 5, 1))
    pr_slice = np.fromfile("sample-pianoroll-aitan\\sample-pianoroll-aitan\\" + pr_slice_filenames[i]).reshape((88))
    x[i, :, :, :] = cqt_slice
    y[i, :] = pr_slice

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [62]:
model = Sequential()

# Dimension: (288, 5, 1)
model.add(Conv2D(filters=10,
                 kernel_size=(9, 2),
                 strides=(1, 1),
                 padding='SAME',
                 activation='relu',
                 input_shape=(288, 5, 1)))
# Dimension: (288, 5, 10)
model.add(MaxPooling2D(pool_size=(4, 2), strides=(2, 1)))
# Dimension: (143, 4, 10)
model.add(Conv2D(20, kernel_size=(5, 2), strides=(1, 1), activation='relu'))
# Dimension: (139, 3, 20)
model.add(MaxPooling2D(pool_size=(4, 1), strides=(4, 1)))
# Dimension: (34, 3, 20)
model.add(Flatten())
# Dimension: (2100)
model.add(Dense(1000))
model.add(Dense(500))
model.add(Dense(88))
model.add(Activation('sigmoid'))

# try using different optimizers and different optimizer configs
model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_22 (Conv2D)           (None, 288, 5, 10)        190       
_________________________________________________________________
max_pooling2d_21 (MaxPooling (None, 143, 4, 10)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 139, 3, 20)        2020      
_________________________________________________________________
max_pooling2d_22 (MaxPooling (None, 34, 3, 20)         0         
_________________________________________________________________
flatten_11 (Flatten)         (None, 2040)              0         
_________________________________________________________________
dense_31 (Dense)             (None, 1000)              2041000   
_________________________________________________________________
dense_32 (Dense)             (None, 500)             

In [63]:
model.fit(x_train[:10], y_train[:10], epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x27319d414c8>

In [64]:
score, acc = model.evaluate(x_test, y_test)



In [65]:
acc

1.0

## Big deep dive into many-to-many

Start by defining input/output pairs for a simple model.

In [44]:
# Initialize empty data structures
num_samples = 1
sequence_len = 100
input_slice_width = 5
input_slice_height = 264
output_slice_height = 88

X = np.zeros((num_samples, sequence_len, input_slice_height, input_slice_width))
Y = np.zeros((num_samples, sequence_len, output_slice_height))

In [45]:
# Randomly populate the data structures
for i in range(sequence_len):
    random_note = np.random.randint(88)
    X[0, i, random_note*3:random_note*3 + 3, :] = np.full((3, input_slice_width), 1)
    Y[0, i, random_note] = 1

In [47]:
print(X[0, 44, :, 0])
print(Y[0, 44, :])

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0

### Test out the model with a for-loop (a la machine translation assignment) 

In [60]:
# Dimension: (264, 5, 10)
conv1 = Conv2D(filters=10,
                kernel_size=(9, 2),
                strides=(1, 1),
                padding='SAME',
                activation='relu',
                input_shape=(1, 1, 264, 5, 1))
# Dimension: (264, 5, 10)
maxpool1 = MaxPooling2D(pool_size=(4, 2), strides=(2, 1))
# Dimension: (131, 4, 10)
conv2 = Conv2D(20, kernel_size=(5, 2), strides=(3, 2), activation='relu')
# Dimension: (43, 2, 20)
maxpool2 = MaxPooling2D(pool_size=(4, 1), strides=(4, 1))
# Dimension: (10, 2, 20)
flat = Flatten()
# Dimension: (400)

# lstm = LSTM(211, input_shape=(sequence_len, 400), return_sequences=True)
lstm = LSTM(211, return_state=True)

densor = Dense(88)

activator = Activation('sigmoid', name='attention_weights')

In [61]:
# X shape is (num_samples, sequence_len, input_slice_height, input_slice_width)
model = Sequential()

Xs = [X[0, i, :, :].reshape((input_slice_height, input_slice_width, 1)) for i in range(sequence_len)]

inputs = []
outputs = []

# Loop before LSTM layer
for t in range(sequence_len):
    a = Input(shape=(input_slice_height, input_slice_width, 1))
    inputs.append(a)
    
    a = conv1(a)
    a = maxpool1(a)
    a = conv2(a)
    a = maxpool2(a)
    a = flat(a)
    a = lstm(a)
    a = densor(a)
    a = activator(a)
    
    outputs.append(a)
    
model = Model(inputs=inputs, outputs=outputs)

model.summary()

ValueError: Input 0 is incompatible with layer lstm_7: expected ndim=3, found ndim=2

Couldn't figure out how to get the dimensions to work, so let's try a completely flat/FC model.

In [73]:
n_s = 100

flat_ = Flatten()
dense1_ = Dense(500)
dense2_ = Dense(250)
dense3_ = Dense(125)
reshape_ = Reshape((1, 1, 125), input_shape=(125,))
lstm_ = LSTM(n_s, return_state=True)

In [74]:
# X shape is (num_samples, sequence_len, input_slice_height, input_slice_width)
model = Sequential()

Xs = [X[0, i, :, :] for i in range(sequence_len)]

s0 = Input(shape=(n_s,), name='s0')
c0 = Input(shape=(n_s,), name='c0')
s = s0
c = c0
inputs = []
outputs = []

# Loop before LSTM layer
for t in range(sequence_len):
    a = Input(shape=(input_slice_height, input_slice_width))
    inputs.append(a)

    a = flat_(a)
    a = dense1_(a)
    a = dense2_(a)
    a = dense3_(a)
#     a = reshape_(a)
    s, _, c = lstm(inputs=a, initial_state=[s, c])
    s = Dense(88)(s)
    
    outputs.append(s)
    
model = Model(inputs=(inputs + [s0, c0]), outputs=outputs)

model.summary()

ValueError: Layer lstm_7 expects 15 inputs, but it received 3 input tensors. Input received: [<tf.Tensor 'dense_41/BiasAdd:0' shape=(None, 125) dtype=float32>, <tf.Tensor 's0_8:0' shape=(None, 100) dtype=float32>, <tf.Tensor 'c0_8:0' shape=(None, 100) dtype=float32>]

`ValueError: Layer lstm_7 expects 15 inputs, but it received 3 input tensors.` And the 15 keep climbing...

We're trying to do something that hasn't quite been done in the homeworks, which is pass the result of a single NN to an LSTM, even though LSTMs really want to be inputted a time-distributed vector. Instead of figuring out how to merge a bunch of layers, let's see if we can just cut to the chase and use TimeDistributed.

In [127]:
inp = Input(shape=(sequence_len, input_slice_height, input_slice_width))

a = TimeDistributed(Reshape((sequence_len, (input_slice_height * input_slice_width)), input_shape=(sequence_len, input_slice_height, input_slice_width)))(inp)
a = TimeDistributed(Dense(500))(a)
a = TimeDistributed(Dense(250))(a)
a = TimeDistributed(Dense(125, name="a"))(a)
a = LSTM(100, input_shape=(sequence_len, input_slice_height * input_slice_width), return_sequences=True)(a)
a = TimeDistributed(Dense(88, activation = "sigmoid"))(a)

model = Model(inputs=inp, outputs=a)

ValueError: total size of new array must be unchanged

Incredible! Reshape doesn't work...

But it does work if we use a Sequential model and use the model.add() syntax!

## Most promising approach so far:

In [141]:
model = Sequential()
model.add(Reshape((sequence_len, (input_slice_height * input_slice_width)), input_shape=(sequence_len, input_slice_height, input_slice_width)))
model.add(TimeDistributed(Dense(500)))
model.add(LSTM(100, input_shape=(sequence_len, 500), return_sequences=True))
model.add(TimeDistributed(Dense(88, activation = "sigmoid")))

opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.01)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["accuracy"])

model.summary()

Model: "sequential_26"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_26 (Reshape)         (None, 100, 1320)         0         
_________________________________________________________________
time_distributed_86 (TimeDis (None, 100, 500)          660500    
_________________________________________________________________
lstm_37 (LSTM)               (None, 100, 100)          240400    
_________________________________________________________________
time_distributed_87 (TimeDis (None, 100, 88)           8888      
Total params: 909,788
Trainable params: 909,788
Non-trainable params: 0
_________________________________________________________________


In [138]:
model.fit(X, Y, batch_size = 10, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x174053d5f08>

## Now try adding a convolutional layer or two

In [203]:
model = Sequential()

conv = Conv2D(filters=10,
             kernel_size=(9, 2),
             strides=(1, 1),
             padding='SAME',
             activation='relu')
model.add(TimeDistributed(conv, input_shape=(sequence_len, input_slice_height, input_slice_width, 1)))
model.add(TimeDistributed(MaxPooling2D(pool_size=(4, 2), strides=(2, 1))))
model.add(TimeDistributed(Conv2D(20, kernel_size=(5, 2), strides=(3, 2), activation='relu')))
model.add(TimeDistributed(Flatten()))
model.add(TimeDistributed(Dense(500)))
model.add(LSTM(500, input_shape=(sequence_len, 500), return_sequences=True))
model.add(LSTM(200, input_shape=(sequence_len, 500), return_sequences=True))
model.add(TimeDistributed(Dense(88, activation = "sigmoid")))

opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.01)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["accuracy"])

In [211]:
X_copy = np.expand_dims(X, axis=4)

In [212]:
X_copy = np.array([X_copy[0] for i in range(70)])
Y_copy = np.array([Y[0] for i in range(70)])
X_copy.shape, Y_copy.shape

((70, 100, 264, 5, 1), (70, 100, 88))

In [213]:
model.fit(X_copy, Y_copy, batch_size = 10, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x1741a2e4588>

In [205]:
model.summary()

Model: "sequential_53"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
time_distributed_157 (TimeDi (None, 100, 264, 5, 10)   190       
_________________________________________________________________
time_distributed_158 (TimeDi (None, 100, 131, 4, 10)   0         
_________________________________________________________________
time_distributed_159 (TimeDi (None, 100, 43, 2, 20)    2020      
_________________________________________________________________
time_distributed_160 (TimeDi (None, 100, 1720)         0         
_________________________________________________________________
time_distributed_161 (TimeDi (None, 100, 500)          860500    
_________________________________________________________________
lstm_54 (LSTM)               (None, 100, 500)          2002000   
_________________________________________________________________
lstm_55 (LSTM)               (None, 100, 200)        