# Midi2Matrix Functions

In [81]:
import mido
import string
import numpy as np
import pandas as pd

In [10]:
def msg2dict(msg):
    result = dict()
    if 'note_on' in msg:
        on_ = True
    elif 'note_off' in msg:
        on_ = False
    else:
        on_ = None
    result['time'] = int(msg[msg.rfind('time'):].split(' ')[0].split('=')[1].translate(
        str.maketrans({a: None for a in string.punctuation})))

    if on_ is not None:
        for k in ['note', 'velocity']:
            result[k] = int(msg[msg.rfind(k):].split(' ')[0].split('=')[1].translate(
                str.maketrans({a: None for a in string.punctuation})))
    return [result, on_]

In [11]:
def switch_note(last_state, note, velocity, on_=True):
    # piano has 88 notes, corresponding to note id 21 to 108, any note out of this range will be ignored
    result = [0] * 88 if last_state is None else last_state.copy()
    if 21 <= note <= 108:
        result[note-21] = velocity if on_ else 0
    return result

In [12]:
def get_new_state(new_msg, last_state):
    new_msg, on_ = msg2dict(str(new_msg))
    new_state = switch_note(last_state, note=new_msg['note'], velocity=new_msg['velocity'], on_=on_) if on_ is not None else last_state
    return [new_state, new_msg['time']]
def track2seq(track):
    # piano has 88 notes, corresponding to note id 21 to 108, any note out of the id range will be ignored
    result = []
    last_state, last_time = get_new_state(str(track[0]), [0]*88)
    for i in range(1, len(track)):
        new_state, new_time = get_new_state(track[i], last_state)
        if new_time > 0:
            result += [last_state]*new_time
        last_state, last_time = new_state, new_time
    return result

In [13]:
def mid2arry(mid, min_msg_pct=0.1):
    tracks_len = [len(tr) for tr in mid.tracks]
    min_n_msg = max(tracks_len) * min_msg_pct
    # convert each track to nested list
    all_arys = []
    for i in range(len(mid.tracks)):
        if len(mid.tracks[i]) > min_n_msg:
            ary_i = track2seq(mid.tracks[i])
            all_arys.append(ary_i)
    # make all nested list the same length
    max_len = max([len(ary) for ary in all_arys])
    for i in range(len(all_arys)):
        if len(all_arys[i]) < max_len:
            all_arys[i] += [[0] * 88] * (max_len - len(all_arys[i]))
    all_arys = np.array(all_arys)
    all_arys = all_arys.max(axis=0)
    # trim: remove consecutive 0s in the beginning and at the end
    sums = all_arys.sum(axis=1)
    ends = np.where(sums > 0)[0]
    return all_arys[min(ends): max(ends)]

In [157]:
def iteration_interval_calculator(midi, resolution, bpm):
  SECOND_MS = 1000
  MINUTE_MS = 60000
  ppq = midi.ticks_per_beat
  single_tick_ms = MINUTE_MS / (bpm * ppq)
  single_second_ticks = SECOND_MS / single_tick_ms
  column_iter_interval = single_second_ticks / resolution

  return int(column_iter_interval)

In [150]:
def extract_nth_columns(df, n):
    extracted_columns = df.iloc[:,::n]
    return extracted_columns

In [151]:
def extract_bpm(midi_object):
  bpms = []
  for track in midi_object.tracks:
    for msg in track:
        if msg.type == 'set_tempo':
            bpm = 60000000 / msg.tempo
            bpms.append(bpm)
  
  return np.mean(bpms)

In [160]:
def midi_2_matrix(midi, resolution, filename):
  raw_file = mido.MidiFile(midi, clip=True)
  bpm = int(extract_bpm(raw_file))
  print(f'BPM of {filename} is {bpm}')
  sampling_frequency = iteration_interval_calculator(raw_file, 
                                                     resolution=resolution, 
                                                     bpm = bpm)
  df_file = pd.DataFrame(mid2arry(raw_file)).T
  print(f'{filename}: Shape of the original dataframe: \n {df_file.shape}')

  simplified_df = extract_nth_columns(df_file, sampling_frequency)
  print(f'{filename}: Shape of the simplified (sampled at each {sampling_frequency}th column): \n {simplified_df.shape}')

  simplified_df.to_csv(f'{filename}_rawmatrix.csv', sep='\t')


# Application

## Baroque / Classical

In [162]:
midi_2_matrix('goldberg_variation1.mid', resolution = 8, filename= 'goldberg')
midi_2_matrix('bach_847.mid', resolution = 8, filename = 'bach_847')

BPM of goldberg is 74
goldberg: Shape of the original dataframe: 
 (88, 92159)
goldberg: Shape of the simplified (sampled at each 74th column): 
 (88, 1246)
BPM of bach_847 is 118
bach_847: Shape of the original dataframe: 
 (88, 132476)
bach_847: Shape of the simplified (sampled at each 117th column): 
 (88, 1133)


In [163]:
midi_2_matrix('haydn_sonata50.mid', resolution=8, filename='haydn50')
midi_2_matrix('mz_331_3.mid', resolution = 8, filename = 'mozart_turca')

BPM of haydn50 is 150
haydn50: Shape of the original dataframe: 
 (88, 79000)
haydn50: Shape of the simplified (sampled at each 29th column): 
 (88, 2725)
BPM of mozart_turca is 139
mozart_turca: Shape of the original dataframe: 
 (88, 214079)
mozart_turca: Shape of the simplified (sampled at each 139th column): 
 (88, 1541)


## Romantic

In [165]:
midi_2_matrix('Waltz-in-C-Sharp-Minor-Opus-64-Nr-2.mid', resolution=8, filename = 'chopinwaltz')
midi_2_matrix('liz_liebestraum.mid', resolution=8, filename = 'liszt')
midi_2_matrix('schubert_impromptu2.mid', resolution = 8, filename = 'schubertimp')
midi_2_matrix('tchaikovsky_june.mid', resolution = 8, filename = 'tchaikovsky')

BPM of chopinwaltz is 120
chopinwaltz: Shape of the original dataframe: 
 (88, 185231)
chopinwaltz: Shape of the simplified (sampled at each 96th column): 
 (88, 1930)
BPM of liszt is 139
liszt: Shape of the original dataframe: 
 (88, 260639)
liszt: Shape of the simplified (sampled at each 139th column): 
 (88, 1876)
BPM of schubertimp is 180
schubertimp: Shape of the original dataframe: 
 (88, 407039)
schubertimp: Shape of the simplified (sampled at each 180th column): 
 (88, 2262)
BPM of tchaikovsky is 101
tchaikovsky: Shape of the original dataframe: 
 (88, 184319)
tchaikovsky: Shape of the simplified (sampled at each 101th column): 
 (88, 1825)


## Early Modern

In [166]:
midi_2_matrix('bartok_piano_sonata_1_cunknown.mid', resolution=8, filename = 'bartoksonata')
midi_2_matrix('debussyprelude.mid', resolution=8, filename = 'debussyprelude')
midi_2_matrix('vers_la_flamme_72_(c)lefeldt.mid', resolution = 8, filename = 'scriabin_vers')
midi_2_matrix('schoenberg_drei_klavierstucke_11_3_(c)simonetto.mid', resolution = 8, filename = 'schoenberg')

BPM of bartoksonata is 132
bartoksonata: Shape of the original dataframe: 
 (88, 553535)
bartoksonata: Shape of the simplified (sampled at each 281th column): 
 (88, 1970)
BPM of debussyprelude is 40
debussyprelude: Shape of the original dataframe: 
 (88, 22047)
debussyprelude: Shape of the simplified (sampled at each 16th column): 
 (88, 1378)
BPM of scriabin_vers is 123
scriabin_vers: Shape of the original dataframe: 
 (88, 629247)
scriabin_vers: Shape of the simplified (sampled at each 262th column): 
 (88, 2402)
BPM of schoenberg is 120
schoenberg: Shape of the original dataframe: 
 (88, 107007)
schoenberg: Shape of the simplified (sampled at each 96th column): 
 (88, 1115)


## Contemporary

In [167]:
midi_2_matrix('BoulezPREMIERE_SONATE1946.mid', resolution=8, filename = 'boulez_sonata')
midi_2_matrix('evryalixx.mid', resolution=8, filename = 'xenakis_evyrali')
midi_2_matrix('messiaen_la_nativite_du_seigneur_6_(c)mccoy.mid', resolution = 8, filename = 'messiaen_lesagnes')
midi_2_matrix('tude_1_Dsordre__Gyrgy_Ligeti_with_annotationcolor_coding.mid', resolution = 8, filename = 'ligeti_disordre')

BPM of boulez_sonata is 62
boulez_sonata: Shape of the original dataframe: 
 (88, 76912)
boulez_sonata: Shape of the simplified (sampled at each 62th column): 
 (88, 1241)
BPM of xenakis_evyrali is 96
xenakis_evyrali: Shape of the original dataframe: 
 (88, 453527)
xenakis_evyrali: Shape of the simplified (sampled at each 96th column): 
 (88, 4725)
BPM of messiaen_lesagnes is 107
messiaen_lesagnes: Shape of the original dataframe: 
 (88, 37169)
messiaen_lesagnes: Shape of the simplified (sampled at each 26th column): 
 (88, 1430)
BPM of ligeti_disordre is 252
ligeti_disordre: Shape of the original dataframe: 
 (88, 255106)
ligeti_disordre: Shape of the simplified (sampled at each 252th column): 
 (88, 1013)
