In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import load_model
from matplotlib import pyplot as plt
import sys
sys.path.append('..')
from data.load_data import *
from processing.utils import *

import pickle
from pathlib import Path

#print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

seed = 2022
tf.random.set_seed(seed)
np.random.seed(seed)

checkpoint_path = Path('resource/gen2/v1').absolute()

In [2]:
# read in a midi file as an array of events
def read_midi(midi_path):
    note_items, tempo_items = read_items(midi_path)
    note_items = quantize_items(note_items)
    max_time = note_items[-1].end
    chord_items = extract_chords(note_items)
    items = chord_items + tempo_items + note_items
    groups = group_items(items, max_time)
    events = item2event(groups)
    return np.array(events, dtype=object)

# read in a series of midi files as a list of sequence of events
def transform_midi(midi_paths):
    # extract events
    events = []
    for path in midi_paths:
        try:
          midi = read_midi(path)
          events.append(midi)
        except:
          print(f"Failed: {path}")
    return np.asarray(events, dtype=object)

In [3]:
# STRUCTURE BASED DATA
# Group events into a series of event structures:
# Structure Component 1: Bar
# Structure Component 2: Position, Note Velocity, Note On, Note Duration
# Strucutre Component 3: Position, Chord
# Structure Component 4: Position, Tempo Class, Tempo Value
def build_structures(midi_paths):
    dataset = transform_midi(midi_paths=midi_paths)
    event_structs = []

    for e in dataset:
        event_set = []
        event_struct = []

        for i in range(len(e)-3):
            if e[i].name == 'Bar' and i > 0:
                bar = Event(e[i].name, None, e[i].value, None)
                #event_struct.append(tuple([e[1]]))
                #event_set.append(e[i])
                event_set.append(bar)
                event_struct.append(tuple(event_set))
                event_set = []
            elif e[i].name == 'Position' and \
                e[i+1].name == 'Note Velocity' and \
                e[i+2].name == 'Note On' and \
                e[i+3].name == 'Note Duration':
                position = Event(e[i].name, None, e[i].value, None)
                velocity = Event(e[i+1].name, None, e[i+1].value, None)
                pitch = Event(e[i+2].name, None, e[i+2].value, None)
                duration = Event(e[i+3].name, None, e[i+3].value, None)
                #event_struct.append(tuple([e[i], e[i+1], e[i+2], e[i+3]]))
                #event_set.extend([e[i], e[i+1], e[i+2], e[i+3]])
                event_set.extend([position, velocity, pitch, duration])
            elif e[i].name == 'Position' and e[i+1].name == 'Chord':
                position = Event(e[i].name, None, e[i].value, None)
                chord = Event(e[i+1].name, None, e[i+1].value, None)
                #event_struct.append(tuple([e[i], e[i+1]]))
                #event_set.extend([e[i], e[i+1]])
                event_set.extend([position, chord])
            elif e[i].name == 'Position' and \
                e[i+1].name == 'Tempo Class' and \
                e[i+2].name == 'Tempo Value':
                position = Event(e[i].name, None, e[i].value, None)
                t_class = Event(e[i+1].name, None, e[i+1].value, None)
                t_value = Event(e[i+2].name, None, e[i+2].value, None)
                #event_struct.append(tuple([e[i], e[i+1], e[i+2]]))
                #event_set.extend([e[i], e[i+1], e[i+2]])
                event_set.extend([position, t_class, t_value])
            
        if event_set:
            event_struct.append(tuple(event_set))
            event_set = []

        event_structs.append(np.asarray(event_struct, dtype=object))

    return np.asarray(event_structs, dtype=object)

In [4]:
def build_lookups(midi_paths, training_set_path, dictionary_path):
    event_structs = build_structures(midi_paths=midi_paths)

    with open(training_set_path, 'wb') as handle:
        pickle.dump(event_structs, handle, protocol=pickle.HIGHEST_PROTOCOL)

    # Encode all event structures as a indices and build a lookup table
    all_event_structs = np.asarray(np.concatenate(event_structs), dtype=object).flat
    print(f"All Event Structures: {len(all_event_structs)}")

    _, indices = np.unique([s for s in all_event_structs], return_index=True)
    unique_event_structs = np.asanyarray([all_event_structs[i] for i in indices], dtype=object)
    print(f"Unique Event Structures: {len(unique_event_structs)}")

    struct2int = dict(zip(unique_event_structs, list(range(0, len(unique_event_structs)))))
    int2struct = {i: e for e, i in struct2int.items()}

    with open(dictionary_path, 'wb') as handle:
        pickle.dump([struct2int, int2struct], handle, protocol=pickle.HIGHEST_PROTOCOL)

    return event_structs, struct2int, int2struct

In [62]:
dataset_name = "ADL_MOZART"
training_set_path = f"{checkpoint_path}/data/training_set_{dataset_name}.pkl"
dictionary_path = f"{checkpoint_path}/dictionary/dictionary_{dataset_name}.pkl"

midi_paths = get_all_files(dataset_name=dataset_name)

# Build lookup dictionaries
#event_structs, struct2int, int2struct = build_lookups(midi_paths=midi_paths, training_set_path=training_set_path, dictionary_path=dictionary_path)

# Load existing dictionaries
event_structs = pickle.load(open(training_set_path, 'rb'))
struct2int, int2struct = pickle.load(open(dictionary_path, 'rb'))

In [63]:
# Build training sequences of length 8
# Define a list of targets which is the event struture that follows the training sequence
sequenceLength = 32

train_structs = []
target_structs = []
for i in range(len(event_structs)):
    struct_list = [struct2int[s] for s in event_structs[i]]
    for i in range(len(struct_list) - sequenceLength):
        train_structs.append(struct_list[i:i+sequenceLength])
        target_structs.append(struct_list[i+1])

train_structs = np.asarray(train_structs, dtype=np.int64)
target_structs = np.asarray(target_structs, dtype=np.int64)

train_structs.shape

(1557, 32)

In [7]:
def train_model(train_structs, target_structs, output_size, output_file):
    # Define input layers
    struct_input = tf.keras.layers.Input(shape = (1, train_structs.shape[1]))

    # Define LSTM layer
    lstm_layer = tf.keras.layers.LSTM(512, return_sequences=True)(struct_input)

    # Define dense layer
    dense_layer = tf.keras.layers.Dense(256)(lstm_layer)

    # Define output layers
    struct_output = tf.keras.layers.Dense(output_size, activation = 'softmax')(dense_layer)

    # Define model
    lstm = tf.keras.Model(inputs = struct_input, outputs = struct_output)

    # Compile the model
    lstm.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop')

    # Define data
    training_x = train_structs.reshape((train_structs.shape[0], 1, train_structs.shape[1]))
    training_y = target_structs

    # Train the model
    lstm.fit(training_x, training_y, epochs=100, batch_size=64)

    # Save the model to file
    lstm.save(output_file)

    return lstm

In [57]:
model_file = f"{checkpoint_path}/model/lstm_{dataset_name}_s{sequenceLength}.h5"

# Train the model
#lstm = train_model(train_structs=train_structs, target_structs=target_structs, output_size=len(struct2int.keys()), output_file=model_file)

# Load the model
lstm = load_model(model_file, custom_objects={'Functional':tf.keras.models.Model}, compile = False)

In [58]:
lstm.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1, 32)]           0         
_________________________________________________________________
lstm (LSTM)                  (None, 1, 512)            1116160   
_________________________________________________________________
dense (Dense)                (None, 1, 256)            131328    
_________________________________________________________________
dense_1 (Dense)              (None, 1, 2936)           754552    
Total params: 2,002,040
Trainable params: 2,002,040
Non-trainable params: 0
_________________________________________________________________


In [11]:
def init_structs(train_structs, rnd_idx, is_rand=False):
    if is_rand:
        initial_structs = []
        for _ in range(train_structs.shape[1]):
            initial_structs.append(np.random.randint(0, len(struct2int.values())))
    else:
        initial_structs = train_structs[rnd_idx,:].copy()

    initial_structs = np.expand_dims(initial_structs, 0)
    return initial_structs.reshape(1, initial_structs.shape[0], initial_structs.shape[1])

def predictChords(struct_sequence):
    predicted_structs= lstm.predict(struct_sequence)
    return np.argmax(predicted_structs)

In [55]:
def generate(is_rand=False):
    rnd_idx = np.random.randint(0, train_structs.shape[0])
    print(f"Index: {rnd_idx}")

    initial_structs = init_structs(train_structs, rnd_idx, is_rand)
    # Define empty lists for event structures
    new_structs_list = []

    # Generate event structures 
    for j in range(20):
        new_struct = predictChords(initial_structs)
        new_structs_list.append(new_struct)
        initial_structs[0][0][:-1] = initial_structs[0][0][1:]
        initial_structs[0][0][-1] = new_struct

    new_structs = [int2struct[s] for s in new_structs_list]
    new_events = np.asarray(sum(new_structs,()), dtype=object)

    output_path = f"{checkpoint_path}/outputs/sample_{dataset_name}_s{sequenceLength}_{rnd_idx}.mid"
    events_to_midi(new_events, output_path)

In [67]:
generate(True)

Index: 1066
/Users/alecyu/Desktop/Repos/AppliedDeepLearning/note-zart/models/resource/gen2/v1/outputs/sample_ADL_MOZART_s32_1066.mid
