# Markov Generator from Bach Inventions

In [1]:
from mido import MidiFile, Message, MidiTrack
import numpy as np
import matplotlib.pyplot as plt
import itertools

We extract all the note beginnings and store it in a numpy array `tracks`.

In [2]:
tracks = []
for i in range(1, 16):
    input_file = MidiFile(str(i) + '.mid')
    for track in input_file.tracks[1:]:
        t = np.array([i.note for i in track[2:] if i.type=='note_on'])
        tracks.append(t)
        
input_file = MidiFile('goldberg.mid')
for track in input_file.tracks[1:]:
    t = np.array([i.note for i in track[2:] if i.type=='note_on'])
    tracks.append(t)
    
input_file = MidiFile('toccata.mid')
for track in input_file.tracks[1:]:
    t = np.array([i.note for i in track[2:] if i.type=='note_on'])
    tracks.append(t)

In [3]:
tracks = np.concatenate(tracks)

Now tracks contains 17594 notes. We ignore the side effects at the end of each track.

In [4]:
len(tracks)

49478

Let us store the probability for each note to be followed by another one. The highest note reached in Bach's invention is

In [5]:
max_note = max(tracks)
min_note = min(tracks)

Or, a C6 (the highest note on most harpsichords).
Therefore we build a 2-dimensional matrix of $85\times85$, and we compute the probability that one note follows another. This gives a Markov matrix of memory $1$.

In [None]:
def get_markov(level):
    res = {}
    for key in itertools.product(list(range(int(max_note)+1)), repeat=level):
        res[tuple(key)] = np.zeros(int(max_note)+1)
    for group in range(len(tracks)-level):
        key = tuple(tracks[group:group+level])
        res[key][int(tracks[group+level])] += 1
    for group in range(len(tracks)-level):
        key = tuple(tracks[group:group+level])
        res[key] /= np.sum(res[key])
    return res
markov = get_markov(4)

In [None]:
markov[(60, 62, 64, 65)]

As an example, the following plot gives the statistical distribution of all the notes following notes $60$ and $60$ in a group.

In [None]:
plt.plot(markov[(60, 62, 64, 65)])

Let us now build a method that, given a certain group, outputs a random note according to the previously computed distributions.

In [None]:
def output_note(input_note):
    if np.sum(markov[input_note]) > 0:
        return np.random.choice(list(range(max_note+1)), p=markov[input_note])
    else:
        return np.random.choice(list(range(min_note, max_note)))

Let us now build a method that, given a first group, outputs a line of different notes (forming a melody).

In [None]:
def output_melody(input_group, length, level):
    res = list(input_group)
    for _ in range(length):
        res.append(output_note(tuple(res[-level:])))
    return res

In [None]:
melody = output_melody((60, 62, 64, 65), 60, 4)

Let us now convert this melody to a midi file:

In [None]:
def convert_to_midi(melody, filename):
    mid = MidiFile()
    track = MidiTrack()
    mid.tracks.append(track)
    t = 500
    for i in melody:
        track.append(Message('note_on', note=i, time=0))
        track.append(Message('note_off', note=i, time=300))
    mid.save(filename)
convert_to_midi(melody, 'test.mid')