In [89]:
import tensorflow as tf
import numpy as np
import pretty_midi as midi
import os
import fluidsynth
import matplotlib.pyplot as plt
from tensorflow import keras 
from tensorflow.keras import layers
from matplotlib import pyplot as plt
import time
import pandas as pd

In [91]:
#midi_data is a list of PrettyMIDI objects
midi_data = []
directory = "data"

for filename in os.listdir(directory):
    midi_data.append(midi.PrettyMIDI(os.path.join(directory, filename)))
    

In [142]:
#Pull notes from midi file
velocities = []
pitches = []
starts = []
ends = []
durations = []
steps = []
#Iterate through all midi objects, pull notes and zip together in a list 
for midi_obj in midi_data:
    piano = sorted(midi_obj.instruments[0].notes, key = lambda note: note.start)
    prev_start = piano[0].start
    for note in piano:
        velocities.append(note.velocity)
        pitches.append(note.pitch)
        starts.append(note.start)
        ends.append(note.end)
        durations.append(note.get_duration())
        steps.append(note.start - prev_start)
        prev_start = note.start


note_data_lists = list(zip(velocities, pitches, starts, ends, durations, steps))

#Move zipped lists into dataframe and sort by start time so notes are chronological 
note_data = pd.DataFrame(note_data_lists, columns = ['Velocities', 'Pitches', 'Starts', 'Ends', 'Durations', 'Steps'])
note_data


Unnamed: 0,Velocities,Pitches,Starts,Ends,Durations,Steps
0,80,69,0.980469,1.074219,0.093750,0.000000
1,57,45,0.993490,1.407552,0.414062,0.013021
2,72,71,1.065104,1.199219,0.134115,0.071615
3,69,72,1.188802,1.261719,0.072917,0.123698
4,73,76,1.437500,1.519531,0.082031,0.248698
...,...,...,...,...,...,...
22292,62,37,441.944792,443.887500,1.942708,0.022917
22293,74,65,442.621875,443.868750,1.246875,0.677083
22294,79,66,443.769792,449.291667,5.521875,1.147917
22295,71,54,443.776042,449.029167,5.253125,0.006250


In [126]:
train_data = note_data.to_numpy()
train_data = np.delete(train_data, 2, axis = 1)
train_data = np.delete(train_data, 3, axis = 1)


(22297, 4)

In [133]:
pitches = 128
num_features = 4

In [134]:
init_dataset = tf.data.Dataset.from_tensor_slices(train_data)
init_dataset.element_spec

TensorSpec(shape=(4,), dtype=tf.float64, name=None)

In [135]:
seq_length = 25
windows = init_dataset.window(seq_length + 1, shift = 1, stride = 1, drop_remainder = True)
map_func = lambda x : x.batch(seq_length + 1, drop_remainder = True)
flattened_init_dataset = windows.flat_map(map_func)

TensorSpec(shape=(26, 4), dtype=tf.float64, name=None)

In [130]:
def label_make(x):
    input_data = x[:-1]
    label_value = x[-1]
    label_dict = {
        key : label_value[i] for i, key in enumerate(['Velocity', 'Pitch', 'Duration', 'Step'])
    }
    return input_data, label_dict


In [136]:
nonbatched_indata = flattened_init_dataset.map(label_make, num_parallel_calls = tf.data.AUTOTUNE)

In [137]:
batch_size = 64
#Shuffle populates a buffer of defined size with dataset and randomly samples it to shuffle examples 
#Batch takes dataset and divides into batches of specified size 
#Cache, assuming complete iteration of dataset, will cache the values and use cached data for further iteration 
#Prefetch allows for faster processing of data by preprocessing future elements while simultaneously processing currnet 
    # - Autotune parameter allows tensorflow to dynamically adjust buffer size for prefetch  as necessary
batched_indata = (nonbatched_indata
                    .shuffle(len(note_data))
                    .batch(batch_size, drop_remainder = True)
                    .cache()
                    .prefetch(tf.data.AUTOTUNE)
                )

batched_indata.element_spec

(TensorSpec(shape=(64, 25, 4), dtype=tf.float64, name=None),
 {'Velocity': TensorSpec(shape=(64,), dtype=tf.float64, name=None),
  'Pitch': TensorSpec(shape=(64,), dtype=tf.float64, name=None),
  'Duration': TensorSpec(shape=(64,), dtype=tf.float64, name=None),
  'Step': TensorSpec(shape=(64,), dtype=tf.float64, name=None)})

In [141]:
input_shape = (seq_length, 4)
alpha = 0.003


inputs = tf.keras.Input(input_shape)
x = tf.keras.layers.LSTM(128)(inputs)

#Build LSTM network with Keras Functional API

outputs = {
    'velocity': tf.keras.layers.Dense(128, name = 'velocity')(x),
    'pitch': tf.keras.layers.Dense(128, name = 'pitch')(x),
    'duration': tf.keras.layers.Dense(1, name = 'duration')(x),
    'step': tf.keras.layers.Dense(1, name = 'step')(x)
}

model = tf.keras.Model(inputs, outputs, name = 'Chad')

loss = {
    'velocity': tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    'pitch': tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    'duration': tf.keras.losses.MeanSquaredError(),
    'step': tf.keras.losses.MeanSquaredError()
}

model.compile(
            loss = loss, 
            loss_weights = {
                'pitch': 0.05,
                'velocity': 1,
                'duration': 1,
                'step': 1
            },
            optimizer = tf.keras.optimizers.Adam(learning_rate = alpha)
        )
        
model.summary()


Model: "Chad"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_21 (InputLayer)          [(None, 25, 4)]      0           []                               
                                                                                                  
 lstm_20 (LSTM)                 (None, 128)          68096       ['input_21[0][0]']               
                                                                                                  
 duration (Dense)               (None, 1)            129         ['lstm_20[0][0]']                
                                                                                                  
 pitch (Dense)                  (None, 128)          16512       ['lstm_20[0][0]']                
                                                                                               

In [139]:
model.evaluate(batched_indata, return_dict = True)

ValueError: in user code:

    File "/opt/anaconda3/envs/gilbert/lib/python3.10/site-packages/keras/engine/training.py", line 1557, in test_function  *
        return step_function(self, iterator)
    File "/opt/anaconda3/envs/gilbert/lib/python3.10/site-packages/keras/engine/training.py", line 1546, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/anaconda3/envs/gilbert/lib/python3.10/site-packages/keras/engine/training.py", line 1535, in run_step  **
        outputs = model.test_step(data)
    File "/opt/anaconda3/envs/gilbert/lib/python3.10/site-packages/keras/engine/training.py", line 1502, in test_step
        return self.compute_metrics(x, y, y_pred, sample_weight)
    File "/opt/anaconda3/envs/gilbert/lib/python3.10/site-packages/keras/engine/training.py", line 987, in compute_metrics
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "/opt/anaconda3/envs/gilbert/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 480, in update_state
        self.build(y_pred, y_true)
    File "/opt/anaconda3/envs/gilbert/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 393, in build
        self._metrics = tf.__internal__.nest.map_structure_up_to(

    ValueError: The two structures don't have the same sequence length. Input structure has length 8, while shallow structure has length 4.


In [119]:
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath='./training_checkpoints/ckpt_{epoch}',
        save_weights_only=True),
    tf.keras.callbacks.EarlyStopping(
        monitor='loss',
        patience=5,
        verbose=1,
        restore_best_weights=True),
]