In [1]:
import random
import time

import pandas as pd
import tensorflow as tf
from matplotlib import pyplot as plt
import sys
sys.path.append('..')
from data.load_data import *
from processing.utils import *

#print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

seed = 2022
tf.random.set_seed(seed)
np.random.seed(seed)

In [2]:
def read_midi(midi_path):
    note_items, tempo_items = read_items(midi_path)
    note_items = quantize_items(note_items)
    max_time = note_items[-1].end
    chord_items = extract_chords(note_items)
    items = chord_items + tempo_items + note_items
    groups = group_items(items, max_time)
    events = item2event(groups)
    return np.array(events, dtype=object)

def transform_midi(midi_paths):
    # extract events
    events = []
    for path in midi_paths:
        events.append(read_midi(path))
    return np.asarray(events, dtype=object)

In [3]:
print("Loading data...")
midi_paths = get_all_files(dataset_name="MOZART_SMALL")
dataset = transform_midi(midi_paths=midi_paths)
print(f">> {dataset.shape}")

Loading data...
>> (21,)


In [5]:
# STRUCTURE BASED DATA
event_structs = []

for e in dataset:
    event_struct = []

    for i in range(len(e)-3):
        if e[i].name == 'Bar' and i > 0:
            bar = Event(e[i].name, None, e[i].value, None)
            event_struct.append(tuple([e[i]]))
        elif e[i].name == 'Position' and \
            e[i+1].name == 'Note Velocity' and \
            e[i+2].name == 'Note On' and \
            e[i+3].name == 'Note Duration':
            position = Event(e[i].name, None, e[i].value, None)
            velocity = Event(e[i+1].name, None, e[i+1].value, None)
            pitch = Event(e[i+2].name, None, e[i+2].value, None)
            duration = Event(e[i+3].name, None, e[i+3].value, None)
            event_struct.append(tuple([e[i], e[i+1], e[i+2], e[i+3]]))
        elif e[i].name == 'Position' and e[i+1].name == 'Chord':
            position = Event(e[i].name, None, e[i].value, None)
            chord = Event(e[i+1].name, None, e[i+1].value, None)
            event_struct.append(tuple([e[i], e[i+1]]))
        elif e[i].name == 'Position' and \
            e[i+1].name == 'Tempo Class' and \
            e[i+2].name == 'Tempo Value':
            position = Event(e[i].name, None, e[i].value, None)
            t_class = Event(e[i+1].name, None, e[i+1].value, None)
            t_value = Event(e[i+2].name, None, e[i+2].value, None)
            event_struct.append(tuple([e[i], e[i+1], e[i+2]]))

    event_structs.append(np.asarray(event_struct, dtype=object))

event_structs = np.asarray(event_structs, dtype=object)

In [6]:
all_event_structs = np.asarray(np.concatenate(event_structs), dtype=object).flat
print(f"All Event Structures: {len(all_event_structs)}")

_, indices = np.unique([s for s in all_event_structs], return_index=True)
unique_event_structs = np.asanyarray([all_event_structs[i] for i in indices], dtype=object)
print(f"Unique Event Structures: {len(unique_event_structs)}")

struct2int = dict(zip(unique_event_structs, list(range(0, len(unique_event_structs)))))
int2struct = {i: e for e, i in struct2int.items()}

All Event Structures: 60750
Unique Event Structures: 55983


In [17]:
sequenceLength = 32

train_structs = []

target_structs = []
for i in range(len(event_structs)):
    struct_list = [struct2int[s] for s in event_structs[i]]
    for i in range(len(struct_list) - sequenceLength):
        train_structs.append(struct_list[i:i+sequenceLength])
        target_structs.append(struct_list[i+1])

train_structs = np.asarray(train_structs)
target_structs = np.asarray(target_structs)

In [10]:
n_samples = train_structs.shape[0]
n_structs = train_structs.shape[1]

inputDim = n_structs * sequenceLength

train_structs.shape

(60078, 32)

In [20]:
# SET BASED DATA

# Define input layers
struct_input = tf.keras.layers.Input(shape = train_structs.shape)

# Define LSTM layer
lstm_layer = tf.keras.layers.LSTM(512, return_sequences=True)(struct_input)

# Define dense layer
dense_layer = tf.keras.layers.Dense(256)(lstm_layer)

# Define output layers
set_output = tf.keras.layers.Dense(n_structs, activation = 'softmax')(dense_layer)

# Define model
lstm = tf.keras.Model(inputs = struct_input, outputs = set_output)

In [18]:
# Compile the model
lstm.compile(loss='categorical_crossentropy', optimizer='rmsprop')

# Train the model
lstm.fit(train_structs, target_structs, epochs=500, batch_size=64)

ValueError: Error when checking input: expected input_3 to have 3 dimensions, but got array with shape (60078, 32)

In [None]:
#initial_notes = np.expand_dims(train_notes[0,:].copy(), 0)
#initial_tempos = np.expand_dims(train_tempo[0,:].copy(), 0)

#def predictChords(note_sequence, tempo_sequence):
#    predicted_notes, predicted_tempo = lstm.predict(lstm.predict([note_sequence, tempo_sequence]))
#    return np.argmax(predicted_notes), np.argmax(predicted_tempo)

# Define empty lists for generated chords and durations
#new_notes, new_tempos = [], []

# Generate chords and durations using 500 rounds of prediction
#for j in range(500):
#    new_note, new_tempo = predictChords(initial_notes, initial_tempos)
#    new_notes.append(new_note)
#    new_tempos.append(new_tempo)
#    initial_notes[0][:-1] = initial_notes[0][1:]
#    initial_notes[0][-1] = new_note
#    initial_tempos[0][:-1] = initial_tempos[0][1:]
#    initial_tempos[0][-1] = new_tempo

In [None]:
initial_sets = np.expand_dims(train_sets[0,:].copy(), 0)

def predictChords(set_sequence):
    predicted_sets= lstm.predict(set_sequence)
    return np.argmax(predicted_sets)

# Define empty lists for generated chords and durations
new_sets = []

# Generate chords and durations using 500 rounds of prediction
for j in range(500):
    new_set = predictChords(initial_sets)
    new_sets.append(new_set)
    initial_sets[0][:-1] = initial_sets[0][1:]
    initial_sets[0][-1] = new_set

In [None]:
new_sets[:15]

In [None]:
new_events = np.asarray(sum(new_sets,()), dtype=object)

