In [2]:
import os
import time
import shutil
import pypianoroll
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pretty_midi

from tqdm import tqdm
from pypianoroll import Multitrack, Track, BinaryTrack
from mido import Message, MidiFile, MidiTrack, bpm2tempo, tempo2bpm, MetaMessage

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('mode.chained_assignment', None)
#pd.options.mode.chained_assignment = None

# min_pitch: 
# max_pitch: 
# Homework: Get results for 1k files 

os.chdir("/Users/sorensabet/Desktop/MSC/CSC2506_Project")

# Read in hdf5 versions of data
song_df = pd.read_json('data/Dataframes/song_df.json')
track_df = pd.read_json('data/Dataframes/track_df.json')
msg_df = pd.read_hdf('data/Dataframes/msg_df.h5', key='data')

In [3]:
# song_df
song_df = song_df.loc[~song_df['ticks_per_beat'].isnull()]
song_df.index = song_df['song_idx']

# track_df
track_df.drop(columns={'song_name','track_has_pitchwheel', 'track_smpte'}, inplace=True)
track_df = track_df.loc[track_df['track_msg_types'].astype(str).str.contains('note') == True][['song_idx', 'track_num']] # Exclude tracks that don't contain notes
track_df.drop_duplicates(subset=['song_idx', 'track_num'],inplace=True)

# msg_df
msg_df = msg_df.loc[msg_df['song_idx'].isin(song_df['song_idx'].unique())]

In [4]:
def split_tracks(tdf, mdf, n_meas=4, n_copies=1, n_transpose=0, merge_tracks=False, song_tpb=480, song_idx= 0, folder=None):
    """
    tdf:           Track level dataframe, contains information about the track 
    mdf:           Message level dataframe, contains information about MIDI messages 
    n_measures:    The number of measures that will form an input to the GAN 
    n_duplicates:      The number of times the track should be duplicated 
    n_transpose:   The number of times the track should be transposed 
    transpose:     True if the MIDI data should be transposed, and number of octaves up and down 
    song_tpb:      The ticks per beat defined in the original song. This is needed to ensure the time values formed mean something. 
    """   
    
    tracks = list(tdf['track_num'].unique())
        
    # Testing Transposing up and down from middle octave
    if (n_transpose > 0):
        print('Transposing!')
        mdf.reset_index(drop=True, inplace=True)
        mdf['note'] = mdf['note'] % 12 + 60     # 60 corresponds to middle C; this preserves notes but might alter harmonics
            
        # Bound the number of transposes to the MIDI range 
        if (n_transpose > 5):
            n_transpose = 5
        if (n_transpose < 0):
            n_transpose = 0

        # For each note, tranpose n times up and down relative to middle C. Since we need middle C to be first:
        ranges = [0] + [n for n in range(n_transpose*(-1), 0)] + [n for n in range(1, n_transpose+1)]
        
        nrs = [{'type': x[1], 'song_idx': x[2], 'track_num': x[3], 'time': x[4] if n == 0 else 0, 
                'velocity': x[5], 'note': x[6] + n*12, 'ctime': x[7], 'cbeats': x[8], 'bar': x[9], 
                } for x in mdf.itertuples() for n in ranges]
        mdf = pd.DataFrame.from_records(nrs)    
        mdf = mdf.loc[mdf['note'].between(0,127)]
        
    if (merge_tracks == True): 
        print('Merging!')

        # Okay. What's the functionality to combine tracks together? 
        mdf.sort_values(by=['cbeats', 'track_num'], inplace=True)
        mdf.reset_index(drop=True, inplace=True)
        mdf['tmp_idx'] = mdf.index

        mdf2 = mdf[['cbeats']].rename(columns={'cbeats': 'prev_row_cbeats'})
        mdf2['tmp_idx'] = mdf2.index + 1

        mdf = mdf.merge(mdf2, on=['tmp_idx'], how='left')
        mdf.fillna({'prev_row_cbeats': 0}, inplace=True)
        mdf['beat_delta'] = mdf['cbeats'] - mdf['prev_row_cbeats']
        mdf['time'] = (mdf['beat_delta']*song_tpb)
        mdf = mdf.round({'time': 0})
        mdf['time'] = mdf['time'].astype(int)
        mdf['track_num'] = 1
        tracks = [1]
    
    mdf = mdf.loc[mdf['type'].isin(['note_on', 'note_off'])]
    mdf['type'] = np.where(mdf['velocity'] == 0, 'note_off', mdf['type']) # Change type to note off
    mdf['outfile'] = (mdf['bar']/n_meas).astype(int)
    midi_type = 1 if n_copies >= 1 else 0
    
    
    for t in tracks:        
        for f in mdf['outfile'].unique():
            #print('Track: %d, Section: %d' % (t,f))
            
            # Create the track specific MIDI file 
            mid = MidiFile(ticks_per_beat=int(song_tpb), type=midi_type)
            midiTrack = MidiTrack()
            
            # Get Specific messages for the track
            tmdf = mdf.loc[(mdf['track_num'] == t) & (mdf['outfile'] == f)]
                      
            # Get relevant information 
            is_empty = len(tmdf) == 0
            no_note_on = len(tmdf.loc[tmdf['type'] == 'note_on']) == 0
            
            if (is_empty or no_note_on):
                print(t)
                print(f)
                print(mdf)
                print(tmdf)

            # Tempo MIDI Message
            midiTrack.append(MetaMessage('set_tempo', time=0, tempo=500000))

            # Time Signature MIDI Message (Standardize to 120bpm)
            midiTrack.append(MetaMessage('time_signature', time=0, numerator=4, denominator=4, 
                                         clocks_per_click=24, notated_32nd_notes_per_beat=8))

            # Key Signature MIDI Message (Shouldn't matter since MIDI note number determines the correct note)
            midiTrack.append(MetaMessage('key_signature', time=0, key='C'))
            
            # Individual Messages corresponding to notes 
            midiTrack += [Message(x[1], time=int(x[4]), note=int(x[6]), velocity=int(x[5]), channel=0) for x in tmdf.itertuples()]
            
            # End of Track MIDI Message
            midiTrack.append(MetaMessage('end_of_track', time=0))
            
            # If we want to duplicate the track 
            for i in range(0, n_copies+1):
                mid.tracks.append(midiTrack)
            filename =  folder + str(song_idx) + '_' + str(t) + '_' + str(f) + '.mid' 
            filename_npz = folder + str(song_idx) + '_' + str(t) + '_' + str(f) + '.npz' 
            
            # Save MIDI and NPZ File
            mid.save(filename)
            
            if (is_empty or no_note_on):
                print('Filename: %s' % filename)
                print('Error! No notes found in track, continuing')
                print(tmdf)
                print(mdf['outfile'].unique())
                input('Batman')
                continue

            try:
                pyp_mid = pypianoroll.read(filename)
                pyp_mid.save(filename_npz)
            except ValueError as ex:
                print(ex)
                print('Unable to create npz file, skipping!')

In [15]:
n_meas = 16
n_copies = 0
n_transpose = 0
merge_tracks = False 

outpath = "/Users/sorensabet/Desktop/MSC/CSC2506_Project/data/Splitting MIDI Files/"
if os.path.exists(outpath):
    shutil.rmtree(outpath)
os.mkdir(outpath)

for song in tqdm(song_df.itertuples()):
    song_tpb = song[9] # Ticks per beat 
    
    t_df = track_df.loc[track_df['song_idx'] == song[0]]
    m_df = msg_df.loc[(msg_df['song_idx'] == song[0]) & (msg_df['track_num'].isin(t_df['track_num']))]

    new_m_dfs = [] 
    for t in t_df['track_num']:
        temp_msgs = m_df.loc[m_df['track_num'] == t]
        temp_msgs['ctime'] = temp_msgs['time'].cumsum()
        temp_msgs['cbeats'] = temp_msgs['ctime']/song_tpb
        new_m_dfs.append(temp_msgs)
    
    if (len(new_m_dfs) == 0):
        continue
    
    m_df = pd.concat(new_m_dfs)
    m_df['bar'] = (m_df['cbeats']/4).astype(int)  

    break


#     # Step 1. Copy original song over to the new folder 
#     # Step 2. Write all split files into the new folder 
    
#     orig_path = song[1]
#     song_folder = outpath + str(song[0]) + '/'
#     os.mkdir(song_folder)
    
#     shutil.copy('Raw Data/' + orig_path, song_folder + str(song[0]) +'_original.midi')

#     split_tracks(t_df, m_df, n_meas=n_meas, n_copies=n_copies, n_transpose=n_transpose, merge_tracks=merge_tracks, song_tpb=song_tpb, song_idx=song[0], folder=song_folder)
        
#     #print('Finished song: %d' % (song[0]))
    
#     # Make a folder for the new song based on song_idx 
#     # Paste the original as well as the subsections to the new folder 
m_df

0it [00:00, ?it/s]


Unnamed: 0,type,song_idx,track_num,time,velocity,note,ctime,cbeats,bar
13,midi_port,1,1,0,,,0,0.0,0
14,note_on,1,1,962,115.0,36.0,962,8.016667,2
15,note_on,1,1,183,0.0,36.0,1145,9.541667,2
16,note_on,1,1,17,83.0,36.0,1162,9.683333,2
17,note_on,1,1,23,0.0,36.0,1185,9.875,2
18,note_on,1,1,17,113.0,36.0,1202,10.016667,2
19,note_on,1,1,160,0.0,36.0,1362,11.35,2
20,note_on,1,1,0,90.0,43.0,1362,11.35,2
21,note_on,1,1,40,99.0,31.0,1402,11.683333,2
22,note_on,1,1,3,0.0,43.0,1405,11.708333,2


In [61]:
notes = []
for c, i in enumerate(range(60, 64)):
    note_ons = {'type': 'note_on', 'velocity': 64, 'note': i, 'start_beat': c}
    notes.append(note_ons)
    note_offs = {'type': 'note_off', 'velocity': 0, 'note': i, 'start_beat': c + 1} # Change one to end beat
    notes.append(note_offs)
note_df = pd.DataFrame.from_records(notes)
note_df['ctime'] = note_df['start_beat']*480
#note_df['end_beat'] = note_df['start_beat'] + note_df['length_beats']
#note_df['bar'] = (note_df['cumul_beats']/4).astype(int)
#note_df['time'] = note_df['ctime'] - note_df['ctime'].shift(0)
note_df['tmp_idx'] = note_df.index + 1
temp = note_df[['ctime', 'tmp_idx']]
temp.index = temp['tmp_idx']
temp.drop(columns=['tmp_idx'], inplace=True)
temp.rename(columns={'ctime': 'new_ctime'}, inplace=True)
note_df = note_df.merge(temp, left_index=True, right_index=True, how='left').fillna(0)
note_df['time'] = note_df['ctime'] - note_df['new_ctime']
note_df = note_df[['type', 'velocity', 'note', 'time', 'start_beat', 'ctime', 'new_ctime', 'tmp_idx']]
note_df

Unnamed: 0,type,velocity,note,time,start_beat,ctime,new_ctime,tmp_idx
0,note_on,64,60,0.0,0,0,0.0,1
1,note_off,0,60,480.0,1,480,0.0,2
2,note_on,64,61,0.0,1,480,480.0,3
3,note_off,0,61,480.0,2,960,480.0,4
4,note_on,64,62,0.0,2,960,960.0,5
5,note_off,0,62,480.0,3,1440,960.0,6
6,note_on,64,63,0.0,3,1440,1440.0,7
7,note_off,0,63,480.0,4,1920,1440.0,8
