In [1]:
from mido import MidiFile, MidiTrack, Message
from music21 import *
import pandas as pd
import numpy as np
import math
import glob
# device=torch.device('cuda')

In [2]:
def parse_notes(file_name):
    # function to take a midi file and create a dataframe with columns representing note played, duration and time
    
    # start by reading the file:
    message_strings_split = []
    mid = MidiFile(file_name) 
    for i in mid.tracks[1][2:-1]: 
        message_string = str(i)
        message_strings_split.append(message_string.split(" "))
        
    # now extract all the relevant information from the message and create a data frame:
    message_type = []
    for item in message_strings_split:
        message_type.append(item[0])
    df1 = pd.DataFrame(message_type)
    attributes = []
    for item in message_strings_split:
        attributes.append(item[1:])
    attributes_dict = [{}]    
    for item in attributes:
        for i in item:
            key, val = i.split("=")
            if key in attributes_dict[-1]:
                attributes_dict.append({})
            attributes_dict[-1][key] = val
    df2 = pd.DataFrame.from_dict(attributes_dict)
    df_complete = pd.concat([df1, df2], axis=1)
    
    # control change messages are for the pedal...let's simplify by not having those, and don't need all columns:
    df_notes = df_complete[df_complete[0] == 'note_on'].drop(columns={0,'channel'}).reset_index(drop=True)
    if 'control' in df_notes.columns:
        df_notes = df_notes.drop(columns={'control','value'})
    
    # change some of the data types:
    df_notes.time = df_notes.time.astype(float)
    df_notes.note = df_notes.note.astype(int)
    df_notes.velocity = df_notes.velocity.astype(int)
    
    # create a time elapsed attribute equal to the cumulative sum of time.
    df_notes['time_elapsed'] = df_notes.time.cumsum()
    
    return df_notes


def subset_length(df_notes, frame_start, song_len):
    # function to take a dataframe with columns representing note played, duration and time and 
    # subset a fixed length (by note) section, returning normalized columns
    
    # we'll work with a small section of the song, starting at the frame_start and going for about song_len notes:
    frame_stop = min(song_len*3, len(df_notes)) # don't know just how long the frame will be with note stop signals
    df_song_section = df_notes.loc[frame_start:frame_stop].reset_index(drop=True).copy()
    
    # drop the first row if it's a stop signal:
    if df_song_section['velocity'][0] == 0:
        df_song_section.drop([0])
    
    # reset the indices for easier working:
    df_song_section = df_song_section.reset_index(drop=True)
    
    # find the duration each note was played based on the stop note signals (note on with velocity == 0)
    duration = [0] * len(df_song_section)
    for i in range(len(df_song_section)):
        if df_notes['velocity'][i] != 0 and i < len(df_song_section) - 1:            
            j = i + 1
            while df_song_section['note'][j] != df_song_section['note'][i]:
                if j >= len(df_song_section) - 1:
                    break
                else:
                    j += 1
            duration[i] = df_song_section['time_elapsed'][j] - df_song_section['time_elapsed'][i]
    df_song_section['duration'] = duration

    # now drop the "notes off" signal rows (this info is in the duration column)
    df_song_section = df_song_section[df_song_section['velocity'] != 0].reset_index(drop=True)
    # simplify to start without dynamics TODO: try with dynamics
    df_song_section = df_song_section.drop(columns={'time','velocity'})
    
    # now formally take just the first bit of the section, returning if there are not ample notes before the piece end
    df_first_notes = df_song_section.loc[0:song_len-1].copy()
    if len(df_first_notes) < song_len:
        return np.zeros((1,song_len,3))
    
    # now, let's normalize the time elapsed and make duration a fraction of time elapsed:
    df_first_notes['time_elapsed'] -= df_first_notes['time_elapsed'][0]
    df_first_notes['duration'] /= df_first_notes['time_elapsed'][song_len-1]
    df_first_notes['time_elapsed'] /= df_first_notes['time_elapsed'][song_len-1]
    
    # finally, let's recreate the "time since last event" nature of a midi file for time_elapsed:
    time_since_last = [0] * song_len
    for i in range(1, song_len):
        time_since_last[i] = df_first_notes['time_elapsed'][i] - df_first_notes['time_elapsed'][i-1]
    df_first_notes['time_since_last'] = time_since_last
    df_first_notes = df_first_notes.drop(columns='time_elapsed')
    
    # need to normalize the notes...MIDI for piano returns 21 to 108, so:
    df_first_notes['note'] -= 20
    df_first_notes['note'] /= 88
    
    # last bit is we've found that many time_elapsed and durations are very small numbers, so we will multiply by 10
    # truncating at 1 to still work with a sigmoid activation function
    df_first_notes['duration'] = [i*10 if i*10 < 1 else 1 for i in df_first_notes['duration']]
    df_first_notes['time_since_last'] = [i*10 if i*10 < 1 else 1 for i in df_first_notes['time_since_last']]

    
    return df_first_notes

In [6]:
def recreate_midi(df_first_notes, speed=20000):
    # function to take a dataframe created by something like parse_notes() or a gan and return a midi
    
    # Can start by reverse scaling the note:
    df_reversed = df_first_notes.copy()
    df_reversed['note'] = round(df_reversed['note'] * 88 + 20)
    df_reversed['duration'] /= 10
    df_reversed['time_since_last'] /= 10
    df_reversed.note = df_reversed.note.astype(int)
    df_reversed['velocity'] = 60  # create a uniform middling velocity

    # recreate the absolute time index and drop time_since_last (we'll recreate it with the stop signals)
    df_reversed['time_index'] = df_reversed.time_since_last.cumsum()
    df_reversed = df_reversed.drop(columns = 'time_since_last')

    # create a stop signal for each note at the appropriate time_index:
    for i in range(len(df_reversed)):
        stop_note = pd.DataFrame([[df_reversed.note[i], 0, 0, df_reversed.duration[i] + df_reversed.time_index[i]]],
                                 columns=['note', 'duration', 'velocity', 'time_index'])
        df_reversed = df_reversed.append(stop_note, ignore_index=True)
    df_reversed = df_reversed.sort_values('time_index').reset_index(drop=True)

    # recreate time_since last with the stop note signals
    df_reversed['time'] = [0] + [df_reversed.time_index[i+1] - df_reversed.time_index[i] 
                                 for i in range(len(df_reversed)-1)]
    # and now we don't need duration or time_index so can drop those
    df_reversed = df_reversed.drop(columns = {'time_index','duration'})

    # finally, we need to scale the time since last note appropriately:
    df_reversed['time'] = round(df_reversed['time'] * speed)
    df_reversed.time = df_reversed.time.astype(int)

    # finally, recreate the midi and return
    mid_remade = MidiFile()
    track = MidiTrack()
    mid_remade.tracks.append(track)
    track.append(Message('program_change', program=0, time=0))
    for i in range(len(df_reversed)):
        track.append(Message('note_on', note=df_reversed.note[i], velocity=df_reversed.velocity[i], time=df_reversed.time[i]))

    return mid_remade

In [5]:
# Parse all files into np array:
song_len = 256
frame_shift = 20
all_songs = np.zeros((1,song_len,3))  # create a blank first "song" to just append things to uniformly in loop
all_files = glob.glob("All_Maestro/*.midi")

for file_name in all_files:
    
    # first parse all the notes in a song:
    song_notes = parse_notes(file_name)
    
    # take first section of notes:
    frame_start = 0
    subset_notes = subset_length(song_notes, frame_start, song_len)
    
    # continue shifting through the song, taking the subset of notes, transposing many times and logging values
    while not np.array_equal(subset_notes, np.zeros((1,song_len,3))):  # will stop when too close to end of song
        
        transpose_notes = subset_notes.copy()
        for i in range(-5,7):
            transpose_notes['note'] = subset_notes['note'] + i/88
            transpose_notes['note'] = [1/88 if i <= 0 else i for i in transpose_notes['note']] # can't go below bottom A
            transpose_notes['note'] = [1 if i > 1 else i for i in transpose_notes['note']] # can't go above top C
            # append each transposition to the all_songs log
            all_songs = np.append(all_songs, transpose_notes.to_numpy().reshape((1,song_len,3)), axis=0)
            
        frame_start += frame_shift
        subset_notes = subset_length(song_notes, frame_start, song_len)
        
    print(str(round((all_files.index(file_name)+1)/len(all_files)*100,2)))  # print progress
            
all_songs = np.delete(all_songs, 0, 0)  # delete that first blank song

0.08
0.16
0.24
0.31
0.39
0.47
0.55
0.63
0.71
0.78
0.86
0.94
1.02
1.1
1.18
1.25
1.33
1.41
1.49
1.57
1.65
1.72
1.8
1.88
1.96
2.04
2.12
2.19
2.27
2.35
2.43
2.51
2.59
2.66
2.74
2.82
2.9
2.98
3.06
3.13
3.21
3.29
3.37
3.45
3.53
3.61
3.68
3.76
3.84
3.92
4.0
4.08
4.15
4.23
4.31
4.39
4.47
4.55
4.62
4.7
4.78
4.86
4.94
5.02
5.09
5.17
5.25
5.33
5.41
5.49
5.56
5.64
5.72
5.8
5.88
5.96
6.03
6.11
6.19
6.27
6.35
6.43
6.5
6.58
6.66
6.74
6.82
6.9
6.97
7.05
7.13
7.21
7.29
7.37
7.45
7.52
7.6
7.68
7.76
7.84
7.92
7.99
8.07
8.15
8.23
8.31
8.39
8.46
8.54
8.62
8.7
8.78
8.86
8.93
9.01
9.09
9.17
9.25
9.33
9.4
9.48
9.56
9.64
9.72
9.8
9.87
9.95
10.03
10.11
10.19
10.27
10.34
10.42
10.5
10.58
10.66
10.74
10.82
10.89
10.97
11.05
11.13
11.21
11.29
11.36
11.44
11.52
11.6
11.68
11.76
11.83
11.91
11.99
12.07
12.15
12.23
12.3
12.38
12.46
12.54
12.62
12.7
12.77
12.85
12.93
13.01
13.09
13.17
13.24
13.32
13.4
13.48
13.56
13.64
13.71
13.79
13.87
13.95
14.03
14.11
14.18
14.26
14.34
14.42
14.5
14.58
14.66
14.73
14.81
14.89
14.97

In [7]:
all_songs = np.float32(all_songs)
np.save('All_Maestro_Parsed', all_songs) # save the parse data for later use
# all_songs = np.load('All_Maestro_Parsed.npy') # test loading of saved array

In [8]:
# can test various elements of array
all_songs.shape
# all_songs

(200880, 256, 3)

In [29]:
# test recreating a midi
test_song = pd.DataFrame(all_songs_rev[0,:,:].reshape((song_len,3)), columns=["note", "duration","time_since_last"])
mid_remade = recreate_midi(test_song, 20000)
mid_remade.save('mid_test.mid')

In [30]:
# see if we can play in jupyter notebook
mf = midi.MidiFile()
mf.open('mid_test.mid')
mf.read()
mf.close()
s = midi.translate.midiFileToStream(mf)
s.show('midi') # note at this stage there will be no dynamics and no pedal