# JAM JENERATION

Starter code taken from this blog post on [jazz improvisation](https://www.hackerearth.com/blog/machine-learning/jazz-music-using-deep-learning/) with [Github repo](https://github.com/shubham3121/music-generation-using-rnn)

In [160]:
import sys
import re 
from collections import Counter

import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import sklearn.metrics
from keras.utils import np_utils

from glob import glob
import IPython
import pickle

import music21

# import play # ERIC: This is me being dumb, doesn't work for me, skipping
# Note: play needs to be imported from pygame

In [161]:
from music21 import converter, instrument, note, chord, stream

In [181]:
# Any directory with .mid files in here is acceptable. We have 3 in Jazz and 26 in Beethoven.

songs = glob('Beethoven/*.mid')
print("Number of songs: {}".format(len(songs)))
print(songs[:5])

Number of songs: 26
['Beethoven/waldstein_1.mid', 'Beethoven/beethoven_opus90_2.mid', 'Beethoven/waldstein_2.mid', 'Beethoven/beethoven_opus90_1.mid', 'Beethoven/waldstein_3.mid']


# Baseline: Logistic Regression

In this super simple baseline, we ignore chords and pretend that all songs are just sequences of individual notes. Given a window of previous notes, we attempt to predict the next one.

### Extract Notes Simply

In [163]:
def get_simple_notes():
    """
    Returns a list of notes comprising our music.
    For chords in our piece, return the note of our top note.
    i.e. [F#5, C#7, C5, etc.]
    """
    notes = []
    for file in songs:
        # converting .mid file to stream object
        midi = converter.parse(file)
        notes_to_parse = []
        try:
            # Given a single stream, partition into a part for each unique instrument
            parts = instrument.partitionByInstrument(midi)
        except:
            pass
        if parts: # if parts has instrument parts 
            notes_to_parse = parts.parts[0].recurse()
        else:
            notes_to_parse = midi.flat.notes
    
        for element in notes_to_parse: 
            if isinstance(element, note.Note):
                # if element is a note, extract pitch
                notes.append(str(element.pitch))
            elif(isinstance(element, chord.Chord)):
                # if element is a chord, append the first note
                notes.append(str(element.pitches[0]))
        print("Processed song {}".format(file))
    with open('data/simple_notes', 'wb') as filepath:
        pickle.dump(notes, filepath)
    
    return notes

In [164]:
simple_notes = get_simple_notes()
print("Number of notes in dataset: {}".format(len(simple_notes)))
print(simple_notes[:10])

Processed song Beethoven/waldstein_1.mid
Processed song Beethoven/beethoven_opus90_2.mid
Processed song Beethoven/waldstein_2.mid
Processed song Beethoven/beethoven_opus90_1.mid
Processed song Beethoven/waldstein_3.mid
Processed song Beethoven/beethoven_opus10_2.mid
Processed song Beethoven/beethoven_opus10_3.mid
Processed song Beethoven/elise.mid
Processed song Beethoven/beethoven_opus10_1.mid
Processed song Beethoven/beethoven_les_adieux_3.mid
Processed song Beethoven/pathetique_1.mid
Processed song Beethoven/mond_1.mid
Processed song Beethoven/beethoven_les_adieux_2.mid
Processed song Beethoven/mond_3.mid
Processed song Beethoven/pathetique_2.mid
Processed song Beethoven/pathetique_3.mid
Processed song Beethoven/mond_2.mid
Processed song Beethoven/beethoven_les_adieux_1.mid
Processed song Beethoven/beethoven_opus22_1.mid
Processed song Beethoven/beethoven_opus22_2.mid
Processed song Beethoven/beethoven_hammerklavier_4.mid
Processed song Beethoven/beethoven_opus22_3.mid
Processed son

In [175]:
simple_note_counts = Counter(simple_notes)
print("Number of distinct notes in dataset: {}".format(len(simple_note_counts)))
print(simple_note_counts.most_common(3))
print("Guess-most-common classifier accuracy: {}".format(simple_note_counts.most_common(1)[0][1]/len(simple_notes)))

Number of distinct notes in dataset: 78
[('C4', 2410), ('G3', 2373), ('E-4', 2266)]
Guess-most-common classifier accuracy: 0.03230433092502982


### Prepare Logistic Regression Sequences

In [176]:
def one_hot_encoding(note, note_to_int):
    """ Returns one-hot encoded vector given note, dictionary from notes to indices """
    n_vocab = len(note_to_int)
    one_hot = np.zeros(n_vocab)
    note_idx = note_to_int[note]
    one_hot[note_idx] = 1
    return one_hot

In [177]:
def prepare_simple_sequences(notes, sequence_length):
    """
    Prepares vectors of simple notes for one-hot encoding input into LogisticRegression classifer
    
    returns X, y
    X: a list of training examples, where each training example are concatenations of one-hot encodings
        Each training input in X is thus (sequence_length * n_vocab) in length
    y: a list of notes. Each note corresponds to the next note in corresponding sequence from X
    """

    # Extract the unique pitches in the list of notes.
    pitchnames = sorted(set(item for item in notes))
    n_vocab = len(pitchnames)

    # Create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    # Slide a window over our notes, adding sequences to dataset
    X, y = [], []
    for i in range(0, len(notes) - sequence_length):
        sequence_in = notes[i : i + sequence_length]
        sequence_in = [one_hot_encoding(n, note_to_int) for n in sequence_in]
        sequence_in = np.concatenate(sequence_in, axis=None)
        note_out = notes[i + sequence_length]
        X.append(sequence_in)
        y.append(note_out)
    
    # TODO: Should we turn our labels into categorical one-hot encodings using np_utils.to_categorical
    # TODO: Should we normalize input?
    return X, y

In [178]:
simple_sequence_length = 10
simple_X, simple_y = prepare_simple_sequences(simple_notes, simple_sequence_length)
print("Sequence length: {}".format(simple_sequence_length))
print("Number of distinct notes: {}".format(len(set(simple_notes))))
print("Number of training examples: {}".format(len(simple_X)))
print("First five output notes: {}".format(simple_y[:5]))

Sequence length: 10
Number of distinct notes: 78
Number of training examples: 74593
First five output notes: ['G2', 'C3', 'G2', 'C3', 'G2']


Note that the following train-test split procedure could be somewhat flawed. We don't split on unseen (unheard) songs, so a melody that appears multiple times in one song could be picked up later on in the model.

In [188]:
simple_X_train, simple_X_test, simple_y_train, simple_y_test = train_test_split(simple_X, simple_y, test_size=0.15)
simple_y_train[:5]
print("Train set size: {}".format(len(simple_y_train)))
print("Test set size: {}".format(len(simple_y_test)))

Train set size: 63404
Test set size: 11189


### Train Simple Logistic Regression Multi-Class Classification Model

In this simpler problem, we assume that each sequence of notes has one and only one note succeeding it. This makes it a multi-class classification problem.

Later we'll have to expand it to multi-label problem for chords.

In [182]:
logreg = LogisticRegression()
logreg.fit(simple_X_train, simple_y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [189]:
simple_preds_test = logreg.predict(simple_X_test)
sklearn.metrics.accuracy_score(simple_preds_test, simple_y_test)

0.35624273840378945

# LSTM Models

## Extracting Chords and Notes

In [53]:
def get_notes():
    notes = []
    for file in songs:
        # converting .mid file to stream object
        midi = converter.parse(file)
        notes_to_parse = []
        try:
            # Given a single stream, partition into a part for each unique instrument
            parts = instrument.partitionByInstrument(midi)
        except:
            pass
        if parts: # if parts has instrument parts 
            notes_to_parse = parts.parts[0].recurse()
        else:
            notes_to_parse = midi.flat.notes
    
        for element in notes_to_parse: 
            if isinstance(element, note.Note):
                # if element is a note, extract pitch
                notes.append(str(element.pitch))
            elif(isinstance(element, chord.Chord)):
                # if element is a chord, append the normal form of the 
                # chord (a list of integers) to the list of notes. 
                notes.append('.'.join(str(n) for n in element.normalOrder))
    with open('data/notes', 'wb') as filepath:
        pickle.dump(notes, filepath)
    
    return notes

In [54]:
get_notes()[145:155]

['G3', 'F2', 'F3', 'E2', '10.2.4', 'E3', '7.10.0', '0', '10.0.4', '7.10.0']

## Preparing Sequence Vectors

In [6]:
def prepare_sequences(notes, n_vocab): 
    sequence_length = 100

    # Extract the unique pitches in the list of notes.
    pitchnames = sorted(set(item for item in notes))

    # Create a dictionary to map pitches to integers
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    network_input = []
    network_output = []

    # create input sequences and the corresponding outputs
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i: i + sequence_length]
        sequence_out = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])
        network_output.append(note_to_int[sequence_out])
    
    n_patterns = len(network_input)
    
    # reshape the input into a format comatible with LSTM layers 
    network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))
    
    # normalize input
    network_input = network_input / float(n_vocab)
    
    # one hot encode the output vectors
    network_output = np_utils.to_categorical(network_output)
    
    return (network_input, network_output)

## LSTM Model

In [35]:
from keras.models import Sequential
from keras.layers import Activation, Dense, LSTM, Dropout, Flatten
def create_network(network_in, n_vocab): 
    """Create the model architecture"""
    model = Sequential()
    model.add(LSTM(128, input_shape=(100,1), return_sequences=True)) # network_in.shape[1:]
    model.add(Dropout(0.2))
    model.add(LSTM(128, return_sequences=True))
    model.add(Flatten())
    model.add(Dense(256))
    model.add(Dropout(0.3))
    model.add(Dense(n_vocab))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')
#     print(network_in.shape[1:])

    return model

In [27]:
from keras.callbacks import ModelCheckpoint
def train(model, network_input, network_output, epochs): 
    """
    Train the neural network
    """
    # Create checkpoint to save the best model weights.
    filepath = 'weights.best.music3.hdf5'
    checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=0, save_best_only=True)
    
    model.fit(network_input, network_output, epochs=epochs, batch_size=32, callbacks=[checkpoint])

In [40]:
def train_network():
    """
    Get notes
    Generates input and output sequences
    Creates a model 
    Trains the model for the given epochs
    """
    
    epochs = 50
    
    notes = get_notes()
    print('Notes processed')
    
    n_vocab = len(set(notes))
    print('Vocab generated')
    
    network_in, network_out = prepare_sequences(notes, n_vocab)
    print('Input and Output processed')
    
    model = create_network(network_in, n_vocab)
    print('Model created')
#     return model
    print('Training in progress')
    train(model, network_in, network_out, epochs)
    print('Training completed')
    
    return model
    

In [41]:
### Train the model 
model = train_network()

Notes processed
Vocab generated
Input and Output processed
Model created
Training in progress
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Training completed


In [42]:
def generate(model=None):
    """ Generate a piano midi file """
    #load the notes used to train the model
    with open('data/notes', 'rb') as filepath:
        notes = pickle.load(filepath)

    # Get all pitch names
    pitchnames = sorted(set(item for item in notes))
    # Get all pitch names
    n_vocab = len(set(notes))
    
    print('Initiating music generation process.......')
    
    network_input = get_inputSequences(notes, pitchnames, n_vocab)
    
#             reshaped_input = np.reshape(pattern, (1, len(pattern), 1))
#         prediction_input = prediction_input / float(n_vocab)
        
    if not model:
        model = create_network(network_input, n_vocab)
        print('Loading Model weights.....')
        model.load_weights('weights.best.music3.hdf5')
    else:
        print('Using given model')
    
    print('Model Loaded')
    prediction_output = generate_notes(model, network_input, pitchnames, n_vocab)
    create_midi(prediction_output)

In [43]:
def get_inputSequences(notes, pitchnames, n_vocab):
    """ Prepare the sequences used by the Neural Network """
    # map between notes and integers and back
    note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

    sequence_length = 100
    network_input = []
    for i in range(0, len(notes) - sequence_length, 1):
        sequence_in = notes[i:i + sequence_length]
        network_input.append([note_to_int[char] for char in sequence_in])

    return (network_input)

In [44]:
def generate_notes(model, network_input, pitchnames, n_vocab):
    """ Generate notes from the neural network based on a sequence of notes """
    # Pick a random integer
    start = np.random.randint(0, len(network_input)-1)

    int_to_note = dict((number, note) for number, note in enumerate(pitchnames))
    
    # pick a random sequence from the input as a starting point for the prediction
    pattern = network_input[start]
    prediction_output = []
    
    print('Generating notes........')

    # generate 500 notes
    for note_index in range(500):
        prediction_input = np.reshape(pattern, (1, len(pattern), 1))
        prediction_input = prediction_input / float(n_vocab)

        prediction = model.predict(prediction_input, verbose=0)
        
        # Predicted output is the argmax(P(h|D))
        index = np.argmax(prediction)
        # Mapping the predicted interger back to the corresponding note
        result = int_to_note[index]
        # Storing the predicted output
        prediction_output.append(result)

        pattern.append(index)
        # Next input to the model
        pattern = pattern[1:len(pattern)]

    print('Notes Generated...')
    return prediction_output

In [45]:
def create_midi(prediction_output):
    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model
    for pattern in prediction_output:
        # pattern is a chord
        if ('.' in pattern) or pattern.isdigit():
            notes_in_chord = pattern.split('.')
            notes = []
            for current_note in notes_in_chord:
                new_note = note.Note(int(current_note))
                new_note.storedInstrument = instrument.Piano()
                notes.append(new_note)
            new_chord = chord.Chord(notes)
            new_chord.offset = offset
            output_notes.append(new_chord)
        # pattern is a note
        else:
            new_note = note.Note(pattern)
            new_note.offset = offset
            new_note.storedInstrument = instrument.Piano()
            output_notes.append(new_note)

        # increase offset each iteration so that notes do not stack
        offset += 0.5

    midi_stream = stream.Stream(output_notes)
    
    print('Saving Output file as midi....')

    midi_stream.write('midi', fp='test_output4.mid')

In [46]:
#### Generate a new jazz music 
generate(model)

Initiating music generation process.......
Using given model
Model Loaded
Generating notes........
Notes Generated...
Saving Output file as midi....


In [47]:
### Play the Jazz music
play.play_midi('test_output4.mid')

Music file test_output4.mid loaded!


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
