In [1]:
import random
import time

import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
import sys
sys.path.append('..')
from data.load_data import *
from processing.utils import *

#print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

seed = 2022
tf.random.set_seed(seed)
np.random.seed(seed)

In [2]:
def read_midi(midi_path):
    note_items, tempo_items = read_items(midi_path)
    note_items = quantize_items(note_items)
    max_time = note_items[-1].end
    chord_items = extract_chords(note_items)
    items = chord_items + tempo_items + note_items
    groups = group_items(items, max_time)
    events = item2event(groups)
    return np.array(events, dtype=object)

def transform_midi(midi_paths):
    # extract events
    events = []
    for path in midi_paths:
        events.append(read_midi(path))
    return np.asarray(events, dtype=object)

In [3]:
print("Loading data...")
midi_paths = get_all_files(dataset_name="MOZART_SMALL")
dataset = transform_midi(midi_paths=midi_paths)
print(f">> {dataset.shape}")

Loading data...
>> (21,)


In [4]:
all_events = np.concatenate(dataset).flat
_, indices = np.unique([e for e in all_events], return_index=True)
unique_events = np.asarray([all_events[i] for i in indices], dtype=object)
print(f"Unique Events: {len(unique_events)}")

event2int = dict(zip(unique_events, list(range(0, len(unique_events)))))
int2events = {i: e for e, i in event2int.items()}

Unique Events: 105874


In [5]:
sequenceLength = 32

train_notes = []
train_tempo = []

target_notes = []
target_tempo = []
for s in range(len(dataset)):
    note_list = []
    tempo_list = []
    for e in dataset[s]:
        if 'Note' in e.name or 'Chord' in e.name:
            note_list.append(event2int[e])
        else:
            tempo_list.append(event2int[e])
    for i in range(len(note_list) - sequenceLength):
        train_notes.append(note_list[i:i+sequenceLength])
        target_notes.append(note_list[i+1])
    for i in range(len(tempo_list) - sequenceLength):
        train_tempo.append(tempo_list[i:i+sequenceLength])
        target_tempo.append(tempo_list[i+1])

train_notes = np.asarray(train_notes)
train_tempo = np.asarray(train_tempo)

In [6]:
#sequenceLength = 32

#train_events = []
#for s in range(len(dataset)):
#    event_list = [event2int[e] for e in dataset[s]]
#    for i in range(len(event_list) - sequenceLength):
#        train_events.append(event_list[i:i+sequenceLength])

#train_events = np.asarray(train_events)

In [7]:
n_samples = train_notes.shape[0]
n_notes = train_notes.shape[1]
n_tempo = train_tempo.shape[1]

inputDim = n_notes * sequenceLength

embedDim = 64

train_notes.shape

(119279, 32)

In [8]:
# Define input layers
note_input = tf.keras.layers.Input(shape = (None,))
tempo_input = tf.keras.layers.Input(shape = (None,))

# Define embedding layers
note_embedding = tf.keras.layers.Embedding(n_notes, embedDim, input_length = sequenceLength)(note_input)
tempo_embedding = tf.keras.layers.Embedding(n_tempo, embedDim, input_length = sequenceLength)(tempo_input)

# Merge embedding layers using a concatenation layer
merge_layer = tf.keras.layers.Concatenate(axis=1)([note_embedding, tempo_embedding])

# Define LSTM layer
lstm_layer = tf.keras.layers.LSTM(512, return_sequences=True)(merge_layer)

# Define dense layer
dense_layer = tf.keras.layers.Dense(256)(lstm_layer)

# Define output layers
note_output = tf.keras.layers.Dense(n_notes, activation = 'softmax')(dense_layer)
tempo_output = tf.keras.layers.Dense(n_tempo, activation = 'softmax')(dense_layer)

# Define model
lstm = tf.keras.Model(inputs = [note_input, tempo_input], outputs = [note_output, tempo_output])

NotImplementedError: Cannot convert a symbolic Tensor (lstm/strided_slice:0) to a numpy array.

In [None]:
# Compile the model
lstm.compile(loss='categorical_crossentropy', optimizer='rmsprop')

# Train the model
lstm.fit([train_notes, train_tempo], [target_notes, target_tempo], epochs=500, batch_size=64)

In [None]:
initial_notes = np.expand_dims(train_notes[0,:].copy(), 0)
initial_tempos = np.expand_dims(train_tempo[0,:].copy(), 0)

def predictChords(note_sequence, tempo_sequence):
    predicted_notes, predicted_tempo = lstm.predict(lstm.predict([note_sequence, tempo_sequence]))
    return np.argmax(predicted_notes), np.argmax(predicted_tempo)

# Define empty lists for generated chords and durations
new_notes, new_tempos = [], []

# Generate chords and durations using 500 rounds of prediction
for j in range(500):
    new_note, new_tempo = predictChords(initial_notes, initial_tempos)
    new_notes.append(new_note)
    new_tempos.append(new_tempo)
    initial_notes[0][:-1] = initial_notes[0][1:]
    initial_notes[0][-1] = new_note
    initial_tempos[0][:-1] = initial_tempos[0][1:]
    initial_tempos[0][-1] = new_tempo