# Overfitting Example
We try to overfit a single simple song with a encoder-decoder LSTM.


In [4]:
from music21 import stream, note, metadata

def get():
    piece = stream.Score()
    p1 = stream.Part()
    p1.id = 'part1'

    notes = [note.Note('C4', type='quarter'),
             note.Note('D4', type='quarter'),
             note.Note('E4', type='quarter'),
             note.Note('F4', type='quarter'),
             note.Note('G4', type='half'),
             note.Note('G4', type='half'),
    
             note.Note('A4', type='quarter'),
             note.Note('A4', type='quarter'),
             note.Note('A4', type='quarter'),
             note.Note('A4', type='quarter'),
             note.Note('G4', type='half'),

             note.Note('A4', type='quarter'),
             note.Note('A4', type='quarter'),
             note.Note('A4', type='quarter'),
             note.Note('A4', type='quarter'),
             note.Note('G4', type='half'),

             note.Note('F4', type='quarter'),
             note.Note('F4', type='quarter'),
             note.Note('F4', type='quarter'),
             note.Note('F4', type='quarter'),
             note.Note('E4', type='half'),
             note.Note('E4', type='half'),

             note.Note('D4', type='quarter'),
             note.Note('D4', type='quarter'),
             note.Note('D4', type='quarter'),
             note.Note('D4', type='quarter'),
             note.Note('C4', type='half')
            ]
    p1.append(notes)
    piece.insert(0, metadata.Metadata())
    piece.metadata.title = 'Alle meine Entchen'
    piece.insert(0, p1)
    return piece, notes

In [5]:
piece, notes = get()
piece.show('midi')
#piece.show() # doesn't work inside the notebook for me

![alle.PNG](attachment:alle.PNG)

# Part 1: Encoding & Data Preparation

We use 128 midi notes and 3 additional symbols (Start, Stop, EndOfFrame). Therefore we encode our notes as 131-dimensional vector.

* The encoder get's hald of the song as input
* the decoder has to produce the missing half
* EndOfFrame (EOF) symbol is currently not used

In [8]:
import music21
from music21 import pitch, interval, stream
import numpy as np

In [11]:
def generateInput(notes, split=0.5, delta=0.25):
    splitIndex = int(len(notes)*split)
    input = notes[:splitIndex]
    target = ['start'] + notes[splitIndex:] + ['stop']

    encoderInput = encode(input, delta)
    decoderInput = encode(target, delta)

    # decoder_target_data will be ahead by one timestep and will not include the start character.
    decoderTarget = np.roll(decoderInput, -1, axis=0)
    decoderTarget[-1, :] = 0
    decoderTarget[-1, getStopIndex()] = 1

    return encoderInput, decoderInput, decoderTarget

def encode(notes, delta):
    '''

        :param notes: List of notes (single Part of a piece)
        :param delta: smallest note (quantization)
        :return: 2d array with shape (131, timesteps)
    '''

    for x in notes:
        if type(x) == music21.note.Note:
            assert x.quarterLength >= delta

    vectorSize = getTotalTokens()

    totalTimesteps = 0
    for x in notes:
        if type(x) == music21.note.Note:
            totalTimesteps += x.quarterLength / delta
        elif x is 'start':
            totalTimesteps += 1
        elif x is 'stop':
            totalTimesteps += 1
    totalTimesteps = int(totalTimesteps) 

    x = np.zeros((totalTimesteps, vectorSize ))

    currentTimestep = 0
    for n in notes:
        if n is 'start':
            x[currentTimestep:currentTimestep + 1, getStartIndex()] = 1
            currentTimestep += 1

        elif n is 'stop':
            x[currentTimestep:currentTimestep + 1, getStopIndex()] = 1
            currentTimestep += 1

        elif (n.isNote):
            stepsOn = int(n.quarterLength * (1 / delta))  # todo: rounding issues?
            end = currentTimestep + stepsOn

            x[currentTimestep:end, getNoteIndex(n)] = 1
            currentTimestep = end

        elif n.isChord:
            raise NotImplementedError  # no chords at the moment
        else:
            raise NotImplementedError

    return x

def getTotalTokens():
    return 131  # 128 midi notes + Start + Stop + EndOfFrame


def getNoteIndex(n):
    # todo: tied?
    return n.pitch.midi

def getStartIndex():
    return 128

def getStopIndex():
    return 129

def getEOFIndex():
    return 130

In [10]:
encoderInput, decoderInput, decoderTarget = generateInput(notes, delta=1)
#print(encoderInput.shape, decoderInput.shape, decoderTarget.shape)
encoderInput = encoderInput.reshape( (1,encoderInput.shape[0], encoderInput.shape[1]) )
decoderInput = decoderInput.reshape( (1,decoderInput.shape[0], decoderInput.shape[1]) )
decoderTarget = decoderTarget.reshape( (1,decoderTarget.shape[0], decoderTarget.shape[1]) )
print("encoderInput:", encoderInput.shape)
print("decoderInput:", decoderInput.shape)
print("decoderTarget:", decoderTarget.shape)

encoderInput: (1, 16, 131)
decoderInput: (1, 20, 131)
decoderTarget: (1, 20, 131)


# Part 2: Model definition & Training
We use a encoder-decoder model from here: [https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html](https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html) 

In [12]:
num_encoder_tokens = 131
num_decoder_tokens = num_encoder_tokens
epochs = 50
batch_size = 1
hidden_state_size = 100

encoder_input_data = encoderInput
decoder_input_data = decoderInput
decoder_target_data = decoderTarget


In [14]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense

encoder_inputs = Input(shape=(None, num_encoder_tokens))
encoder = LSTM(hidden_state_size, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_inputs)
encoder_states = [state_h, state_c]

decoder_inputs = Input(shape=(None, num_decoder_tokens))
decoder_lstm = LSTM(hidden_state_size, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

# Run training
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size,
          epochs=epochs,
          validation_split=0)


Using TensorFlow backend.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1f7bdce1080>

# Part 3: Inference
* Use the trained model and predict the second ahlf of the training data
* Represent the generated as a music21 piece in order to display and play it

In [15]:
encoder_model = Model(encoder_inputs, encoder_states)

decoder_state_input_h = Input(shape=(hidden_state_size,))
decoder_state_input_c = Input(shape=(hidden_state_size,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_outputs, state_h, state_c = decoder_lstm(
    decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs,[decoder_outputs] + decoder_states)


In [20]:
def decode_sequence(input_seq):

    states_value = encoder_model.predict(input_seq)
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, num_decoder_tokens))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0, getStartIndex()] = 1.

    stop_condition = False
    decoded_sentence = []
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])

        sampled_char = sampled_token_index
        #print(sampled_token_index)
        decoded_sentence.append(sampled_char)

        # todo: set max length
        if (sampled_char == getStopIndex() or len(decoded_sentence) > 50):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.

        # Update states
        states_value = [h, c]

    return decoded_sentence

In [21]:
def decodeSequence(seq, input=None):
    #todo: delta & länge beachten
    #todo: gebunden
    notes = []
    for i in range(0, len(seq)):
        if seq[i] < 129:
            n = music21.note.Note()
            n.pitch.midi = seq[i]
            notes.append(n)

    if input is not None:
        notes = input + notes

    piece = stream.Score()
    p1 = stream.Part()
    p1.id = 'part1'

    p1.append(notes)
    piece.insert(0, music21.metadata.Metadata())
    piece.metadata.title = 'Title'
    piece.insert(0, p1)
    return piece

In [22]:
input_seq = encoder_input_data[0:1]
decoded_sentence = decode_sequence(input_seq)
print('-')
print('Input sentence:', input_seq)
print('Decoded sentence:', decoded_sentence)


x = notes[:int(len(notes)*0.5)]
y = notes[int(len(notes)*0.5):]
from tools.encodeNotes import *
p = decodeSequence(decoded_sentence, x + [music21.note.Rest(type='half')])
#p.show()

-
Input sentence: [[[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]]
Decoded sentence: [69, 69, 67, 67, 65, 65, 65, 64, 64, 64, 64, 62, 62, 62, 62, 60, 129]


No let's have a look and play the piece. The first half (until the half Rest) is the input. The second haldf is generated by the network.

In [23]:
p.show('midi')
#p.show()