## Download ABC tunes and convert tod MIDI.

In [None]:
import requests


url_prefix = 'http://abc.sourceforge.net/NMD/nmd/'

filenames = [
    'jigs.txt',
    'hpps.txt',
    'morris.txt',
    'playford.txt',
    'reelsa-c.txt',
    'reelsd-g.txt',
    'reelsh-l.txt',
    'reelsm-q.txt',
    'reelsr-t.txt',
    'reelsu-z.txt',
    'slip.txt',
    'waltzes.txt',
    'xmas.txt',
    'ashover.txt',
]

!mkdir -p dataset
for fn in filenames:
    with open(f'dataset/{fn}', 'w') as f:
        f.write(requests.get(url_prefix + fn).text)
    !/abcmidi/abc2midi.exe dataset/{fn} > /dev/null

## Load MIDI files. Keep only those with 3 tracks (they are most).

In [None]:
import random
from collections import defaultdict
from pathlib import Path

import pretty_midi
from IPython.display import Audio, display
from matplotlib import pyplot as plt
from pypianoroll import Multitrack


midi_files = list(Path().rglob('dataset/*.mid'))

midis = [pretty_midi.PrettyMIDI(str(f)) for f in midi_files]
print(len(midis))

midis = [m for m in midis if len(m.instruments) == 3]
print(len(midis))

## Listen and plot tracks.

In [None]:
import random

# f = random.choice(midi_files)
f = 'dataset/reelsh-l65.mid'

midi = pretty_midi.PrettyMIDI(str(f))
display(Audio(midi.fluidsynth(), rate=44100))

pp = Multitrack(str(f))
for t in pp.tracks:
    t.pianoroll = t.pianoroll[:500, :]
    fig, ax = t.plot()
    plt.show()

## Pitch frequency by track number.

In [None]:
import pandas as pd
import plotly.express as px


counters = defaultdict(lambda: defaultdict(int))
for m in midis:
    for i, inst in enumerate(m.instruments):
        for n in inst.notes:
            counters[i][n.pitch] += 1

counters_df = pd.DataFrame.from_dict(counters).sort_index().reset_index().rename({'index': 'pitch'}, axis=1)

fig = px.bar()
fig.add_bar(x=counters_df['pitch'],y=counters_df[0], name='melody')
fig.add_bar(x=counters_df['pitch'],y=counters_df[1], name='bass')
fig.add_bar(x=counters_df['pitch'],y=counters_df[2], name='harmony')
fig.update_layout(barmode='stack')
fig.show()

## Pitch distance between consecutive notes in melody.

In [None]:
import numpy as np

def to_intervals(notes):
    return (notes - np.roll(notes, 1))[1:]

counters = defaultdict(int)
for m in midis:
    inst = m.instruments[0]
    notes = np.asarray([n.pitch for n in inst.notes])
    for interval in to_intervals(notes):
        counters[interval] += 1

fig = px.bar()
fig.add_bar(x=list(counters.keys()),y=list(counters.values()), name='intervals')
fig.show()