# Model with transposed data augmentation testing

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM, Dropout
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import pandas as pd
import numpy as np
import random

from utilities import sample
from data_preprocessing import get_chorales_as_list_with_transpositions
from data_preprocessing import array_is_nan
from chorale_player import play_chorale

# Get the preprocessed data
data_array = get_chorales_as_list_with_transpositions()
# Reshape the data into a single array
data_array = data_array.reshape(1,data_array.size)[0]
# Identify all of the (nan, nan) entries
index = np.argwhere(array_is_nan(data_array))
# Convert the corpus into a list and delete the (nan,nan) entries
corpus = np.ndarray.tolist(np.delete(data_array, index))

notes = sorted(list(set(corpus)))
print('total notes:', len(notes))
note_indices = dict((n, i) for i, n in enumerate(notes))
indices_note = dict((i, n) for i, n in enumerate(notes))

# cut the corpus in semi-redundant sequences of maxlen notes
maxlen = 10
step = 3


In [None]:

phrases = []
next_notes = []
for i in range(0, len(corpus) - maxlen, step):
    phrases.append(corpus[i: i + maxlen])
    next_notes.append(corpus[i + maxlen])
print('nb sequences:', len(phrases))

print('Vectorization...')
x = np.zeros((len(phrases), maxlen, len(notes)), dtype=np.bool)
y = np.zeros((len(phrases), len(notes)), dtype=np.bool)
for i, phrase in enumerate(phrases):
    for t, note in enumerate(phrase):
        x[i, t, note_indices[note]] = 1
    y[i, note_indices[next_notes[i]]] = 1


In [None]:

# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(notes))))
keras.layers.Dropout(0.5)
model.add(Dense(len(notes)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

