# Author: Samuel Hickey

### Word-level (notes and chords treated as singular words) Music Generating RNN

## Imports

In [1]:
import collections
import glob
import keras
import numpy as np
import pathlib
import pandas as pd
import pretty_midi
import seaborn as sns
import tensorflow as tf
import warnings

from numba import jit
from keras.layers import (BatchNormalization, Bidirectional, Embedding, Dense, Dropout, LSTM)
from keras.utils import timeseries_dataset_from_array
from keras.preprocessing.text import Tokenizer
from utilities.DatasetUtils import (midi_to_notes)

warnings.filterwarnings("ignore")

# Sampling rate for audio playback
_SAMPLING_RATE = 16000

SEQ_LEN = 25

In [2]:
data_dir = pathlib.Path('data/maestro-v2.0.0')
filenames = glob.glob(str(data_dir/'*/*.mid*'))
print('Number of files:', len(filenames))

Number of files: 1282


## Extract the Notes

In [3]:
def midi_to_notes(midi_file: str) -> pd.DataFrame:
	pm = pretty_midi.PrettyMIDI(midi_file)
	instrument = pm.instruments[0]
	notes = collections.defaultdict(list)

	# Sort the notes by start time
	sorted_notes = sorted(instrument.notes, key=lambda note: note.start)
	
	notes['pitch'].append('START')
	notes['start'].append(0)
	for note in sorted_notes:
		notes['pitch'].append(pretty_midi.note_number_to_name(note.pitch))
		notes['start'].append(round(note.start, 3))
	notes['pitch'].append('END')
	notes['start'].append(round(note.start, 2) + 3)

	result = pd.DataFrame({name: np.array(value) for name, value in notes.items()})	
	result = result.groupby('start').agg({'pitch': sorted}).reset_index()
	result.pitch = result.pitch.str.join(',')
	return result

## Preprocessing

In [4]:
num_files = 50  # len(filenames)
all_notes, vocab = [], []
for f in filenames[:num_files]:
    df = midi_to_notes(f)
    all_notes.append(df)

In [5]:
# vocab = np.unique(pd.concat(all_notes).pitch.to_numpy())
# ids_from_chars = {v:k for k, v in enumerate(vocab)}
# chars_from_ids = {k:v for k, v in enumerate(vocab)}
# np.save(
#     pathlib.Path('data/preprocessed/vocab_ids_chars.npy'),
#     [vocab, ids_from_chars, chars_from_ids],
#     allow_pickle=True
# )

# all_notes = np.array([padded_song(song) for song in all_notes])
# np.save(
#     pathlib.Path('data/preprocessed/test.npy'),
#     all_notes,
#     allow_pickle=True
# )

# all_notes = np.load(pathlib.Path('data/preprocessed/test.npy'), allow_pickle=True)
vocab, ids_from_chars, chars_from_ids = np.load(pathlib.Path('data/preprocessed/vocab_ids_chars.npy'), allow_pickle=True)
def split_input_target(seq):
	inputs, targets = seq[:, :-1], seq[:, -1]
	return inputs, keras.layers.CategoryEncoding(num_tokens=len(vocab)+1, output_mode="one_hot")(targets)
all_notes = pd.concat(all_notes).pitch.map(lambda note: ids_from_chars[note]).reset_index().pitch
dataset = timeseries_dataset_from_array(all_notes, None, SEQ_LEN, batch_size=64).map(split_input_target)

In [7]:
# tokenizer = Tokenizer(filters='!"$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n')
# tokenizer.fit_on_texts(pd.concat(all_notes).pitch.to_numpy().astype(str))
# sequences = np.array([tokenizer.texts_to_sequences(i.pitch.to_numpy().astype(str)) for i in all_notes])
# vocab_size = len(tokenizer.word_index) + 1

# THIS CELL IS FOR CHARACTER LEVEL

In [7]:
model = keras.models.Sequential([
    Embedding(len(vocab)+1, 128, input_length=SEQ_LEN),
    Bidirectional(LSTM(128, return_sequences=True)),
    Dropout(.2),
    Bidirectional(LSTM(128)),
    Dropout(.2),
    Dense(128, activation='relu'),
    Dense(len(vocab)+1, activation='softmax')
])
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath='./training_checkpoints/ckpt_{epoch}',
        save_weights_only=True),
    keras.callbacks.EarlyStopping(
        monitor='loss',
        patience=5,
        verbose=1,
        restore_best_weights=True),
]
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, clipvalue=.5)

model.compile(optimizer=optimizer, loss='categorical_crossentropy')

In [None]:
history = model.fit(dataset, epochs=200, callbacks=callbacks)

In [None]:
model.save(pathlib.Path('models/word_based_dense256_batch64'))

In [8]:
model = keras.models.load_model('models/word_based', custom_objects={'optimizer':optimizer})

In [51]:
# Grab the index of the output, y, by bringing it to numpy wrapped in a list, and taking index(1.0)
# Try adding prefetch to see if that speeds up training without using too much memory

In [9]:
# INPUTS FOR PREDICTION
tmp = model(list(timeseries_dataset_from_array(all_notes[:SEQ_LEN*3].to_numpy(), targets=None, sequence_length=SEQ_LEN, batch_size=32))[0]).numpy()

In [42]:
tmp.shape

(32, 11619)

In [38]:
notes = []
start = []
j = 0.75
for i in range(len(tmp)):
    temp = .2
    j += temp
    notes.append((chars_from_ids[list(tmp[i]).index(np.max(tmp[i]))].split(','), j, j+.1, .1, temp))
notes_df = pd.DataFrame(notes, columns=['pitch', 'start', 'end', 'duration', 'step']).explode('pitch')
notes_df

Unnamed: 0,pitch,start,end,duration,step
0,A#3,0.95,1.05,0.1,0.2
0,A#6,0.95,1.05,0.1,0.2
0,F3,0.95,1.05,0.1,0.2
1,C5,1.15,1.25,0.1,0.2
1,F#5,1.15,1.25,0.1,0.2
...,...,...,...,...,...
30,G#5,6.95,7.05,0.1,0.2
31,A#1,7.15,7.25,0.1,0.2
31,A#2,7.15,7.25,0.1,0.2
31,D6,7.15,7.25,0.1,0.2


In [39]:
def df_to_midi(
        notes: pd.DataFrame,
        out_file: str, 
        instrument_name: str,
        velocity: int = 90,  # note loudness
) -> pretty_midi.PrettyMIDI:
    file = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(
        program=pretty_midi.instrument_name_to_program(
        instrument_name))
    for i, note in notes.iterrows():
        note = pretty_midi.Note(
            velocity=velocity,
            pitch=int(pretty_midi.note_name_to_number(note['pitch'])),
            start=note['start'],
            end=note['end'],
        )
        instrument.notes.append(note)
    file.instruments.append(instrument)
    file.write(out_file)
    return file

In [40]:
file = pretty_midi.PrettyMIDI(filenames[0])
instrument_name = pretty_midi.program_to_instrument_name(file.instruments[0].program)
df_to_midi(
    notes_df,
    'tmp.midi',
    instrument_name
)

<pretty_midi.pretty_midi.PrettyMIDI at 0x1994d88ca00>