In [26]:
import random
import time

import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
import sys
sys.path.append('..')
from data.load_data import *
from processing.utils import *

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

seed = 2022
tf.random.set_seed(seed)
np.random.seed(seed)

Num GPUs Available:  0


In [27]:
# read in a midi file as an array of events
def read_midi(midi_path):
    note_items, tempo_items = read_items(midi_path)
    note_items = quantize_items(note_items)
    max_time = note_items[-1].end
    chord_items = extract_chords(note_items)
    items = chord_items + tempo_items + note_items
    groups = group_items(items, max_time)
    events = item2event(groups)
    return np.array(events, dtype=object)

# read in a series of midi files as a list of sequence of events
def transform_midi(midi_paths):
    events = []
    for path in midi_paths:
        events.append(read_midi(path))
    return np.asarray(events, dtype=object)

In [3]:
print("Loading data...")
midi_paths = get_all_files(dataset_name="MOZART_SMALL")
dataset = transform_midi(midi_paths=midi_paths)
print(f">> {dataset.shape}")

Loading data...
>> (21,)


In [4]:
dataset[0][:10]

array([Event(name=Bar, time=None, value=None, text=1),
       Event(name=Position, time=0, value=1/16, text=0),
       Event(name=Chord, time=0, value=A#:maj, text=A#:maj),
       Event(name=Position, time=0, value=1/16, text=0),
       Event(name=Tempo Class, time=0, value=fast, text=None),
       Event(name=Tempo Value, time=0, value=10, text=None),
       Event(name=Position, time=0, value=1/16, text=0),
       Event(name=Note Velocity, time=0, value=12, text=50/48),
       Event(name=Note On, time=0, value=70, text=70),
       Event(name=Note Duration, time=0, value=15, text=960/960)],
      dtype=object)

In [5]:
# STRUCTURE BASED DATA
# Group events into a series of event structures:
# Structure 1: Bar
# Structure 2: Position, Note Velocity, Note On, Note Duration
# Strucutre 3: Position, Chord
# Structure 4: Position, Tempo Class, Tempo Value
event_structs = []

for e in dataset:
    event_struct = []

    for i in range(len(e)-3):
        if e[i].name == 'Bar' and i > 0:
            bar = Event(e[i].name, None, e[i].value, None)
            event_struct.append(tuple([e[i]]))
        elif e[i].name == 'Position' and \
            e[i+1].name == 'Note Velocity' and \
            e[i+2].name == 'Note On' and \
            e[i+3].name == 'Note Duration':
            position = Event(e[i].name, None, e[i].value, None)
            velocity = Event(e[i+1].name, None, e[i+1].value, None)
            pitch = Event(e[i+2].name, None, e[i+2].value, None)
            duration = Event(e[i+3].name, None, e[i+3].value, None)
            event_struct.append(tuple([e[i], e[i+1], e[i+2], e[i+3]]))
        elif e[i].name == 'Position' and e[i+1].name == 'Chord':
            position = Event(e[i].name, None, e[i].value, None)
            chord = Event(e[i+1].name, None, e[i+1].value, None)
            event_struct.append(tuple([e[i], e[i+1]]))
        elif e[i].name == 'Position' and \
            e[i+1].name == 'Tempo Class' and \
            e[i+2].name == 'Tempo Value':
            position = Event(e[i].name, None, e[i].value, None)
            t_class = Event(e[i+1].name, None, e[i+1].value, None)
            t_value = Event(e[i+2].name, None, e[i+2].value, None)
            event_struct.append(tuple([e[i], e[i+1], e[i+2]]))

    event_structs.append(np.asarray(event_struct, dtype=object))

event_structs = np.asarray(event_structs, dtype=object)

In [6]:
# Encode all event structures as a indices and build a lookup table
all_event_structs = np.asarray(np.concatenate(event_structs), dtype=object).flat
print(f"All Event Structures: {len(all_event_structs)}")

_, indices = np.unique([s for s in all_event_structs], return_index=True)
unique_event_structs = np.asanyarray([all_event_structs[i] for i in indices], dtype=object)
print(f"Unique Event Structures: {len(unique_event_structs)}")

struct2int = dict(zip(unique_event_structs, list(range(0, len(unique_event_structs)))))
int2struct = {i: e for e, i in struct2int.items()}

All Event Structures: 60750
Unique Event Structures: 55983


In [7]:
# Build training sequences of length 64
# Define a list of targets which is the event struture that follows the training sequence
sequenceLength = 64

train_structs = []
target_structs = []
for i in range(len(event_structs)):
    struct_list = [struct2int[s] for s in event_structs[i]]
    for i in range(len(struct_list) - sequenceLength):
        train_structs.append(struct_list[i:i+sequenceLength])
        target_structs.append(struct_list[i+1])

train_structs = np.asarray(train_structs, dtype=int64)[:4096,:]
target_structs = np.asarray(target_structs)[:4096]

train_structs.shape

In [9]:
# SET BASED DATA

# Define input layers
struct_input = tf.keras.layers.Input(shape = (1, train_structs.shape[1]))

# Define LSTM layer
lstm_layer = tf.keras.layers.LSTM(512, return_sequences=True)(struct_input)

# Define dense layer
dense_layer = tf.keras.layers.Dense(256)(lstm_layer)

# Define output layers
struct_output = tf.keras.layers.Dense(len(unique_event_structs), activation = 'softmax')(dense_layer)

# Define model
lstm = tf.keras.Model(inputs = struct_input, outputs = struct_output)

# Compile the model
lstm.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop')

lstm.summary()


Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1, 64)]           0         
_________________________________________________________________
lstm (LSTM)                  (None, 1, 512)            1181696   
_________________________________________________________________
dense (Dense)                (None, 1, 256)            131328    
_________________________________________________________________
dense_1 (Dense)              (None, 1, 55983)          14387631  
Total params: 15,700,655
Trainable params: 15,700,655
Non-trainable params: 0
_________________________________________________________________


In [10]:
# Train the model
#lstm.fit(train_structs.reshape((train_structs.shape[0], 1, train_structs.shape[1])), target_structs, epochs=100, batch_size=64)

Train on 4096 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100

<tensorflow.python.keras.callbacks.History at 0x7fe089b70290>

In [13]:
# Save the model to file
lstm.save('../checkpoints/event_structure_based_lstm.h5')

In [None]:
from keras.models import load_model

# Load the model from file
lstm = load_model('../checkpoints/event_structure_based_lstm.h5')

In [24]:
# Start by inputing the first event structure from the training data
initial_structs = np.expand_dims(train_structs[0,:].copy(), 0)
print(initial_structs.shape)

# Predict the next event structure
def predict_structs(struct_sequence):
    predicted_structs= lstm.predict(struct_sequence)
    return np.argmax(predicted_structs)

# Define empty lists for generated event structures
new_structs = []

# Generate event structures using 500 rounds of prediction
for j in range(500):
    new_struct = predict_structs(initial_structs)
    new_structs.append(new_struct)
    initial_structs[0][:-1] = initial_structs[0][1:]
    initial_structs[0][-1] = new_struct

(1, 64)


ValueError: Error when checking input: expected input_1 to have 3 dimensions, but got array with shape (1, 64)

In [None]:
new_structs[:15]

In [None]:
new_events = np.asarray(sum(new_structs,()), dtype=object)

