In [1]:
import os
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from mido import MidiFile
from mido import MidiTrack
from mido import bpm2tempo
from mido import tempo2bpm 
from mido import Message
from mido import MetaMessage

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('mode.chained_assignment', None)
#pd.options.mode.chained_assignment = None

# min_pitch: 
# max_pitch: 
# Homework: Get results for 1k files 

os.chdir("/Users/sorensabet/Desktop/Master's Coursework/CSC2506_Project")

# Read in hdf5 versions of data
song_df = pd.read_hdf('Raw Data/song_df.h5', key='data')
track_df = pd.read_hdf('Raw Data/track_df.h5', key='data')
msg_df = pd.read_hdf('Raw Data/msgs_df.h5', key='data')
msg_df = msg_df.astype({'type': 'category'})

track_df.drop(columns={'track_has_pitchwheel', 'track_smpte'}, inplace=True)
track_df = track_df.loc[track_df['track_msg_types'].astype(str).str.contains('note') == True]
track_df.drop_duplicates(subset=['song_idx', 'track_num'],inplace=True)

# Number of measures to go into a single output
num_measures = 4

In [2]:
# Start with just considering a single song 
song_df = song_df.loc[song_df['song_idx'] == 1]
track_df = track_df.loc[track_df['song_idx'] == 1]
msg_df = msg_df.loc[(msg_df['song_idx'] == 1) & (msg_df['track_num'].isin(track_df['track_num']))]

In [3]:
# I need to get cumulative time for each track separately. 
num_tracks = 8
new_msg_dfs = []
for i in range(0, song_df['n_tracks'].iloc[0]):
    tmp_msgs = msg_df.loc[msg_df['track_num'] == i]
    tmp_msgs['ctime'] = tmp_msgs['time'].cumsum()
    tmp_msgs['cbeats'] = tmp_msgs['ctime']/song_df['ticks_per_beat'].iloc[0] 
    new_msg_dfs.append(tmp_msgs)
msg_df = pd.concat(new_msg_dfs)

# I'm going to 4/4 time, so, I just need to divide cbeats by the 4 to get measure number
# Then, I need to divide measure number by n which is the number of measures going into a track 
msg_df['bar'] = (msg_df['cbeats']/4).astype(int)

In [4]:
def split_tracks(tdf, mdf, n_meas=4, n_copies=1, n_transpose=0, merge_tracks=False, song_tpb=480):
    """
    tdf:           Track level dataframe, contains information about the track 
    mdf:           Message level dataframe, contains information about MIDI messages 
    n_measures:    The number of measures that will form an input to the GAN 
    n_duplicates:      The number of times the track should be duplicated 
    n_transpose:   The number of times the track should be transposed 
    transpose:     True if the MIDI data should be transposed, and number of octaves up and down 
    song_tpb:      The ticks per beat defined in the original song. This is needed to ensure the time values formed mean something. 
    """   
    
    tracks = list(tdf['track_num'].unique())
    
    # Testing Transposing up and down from middle octave
    if (n_transpose > 0):
        print('Transposing!')
        mdf.reset_index(drop=True, inplace=True)
        mdf['note'] = mdf['note'] % 12 + 60     # 60 corresponds to middle C; this preserves notes but might alter harmonics
            
        # Bound the number of transposes to the MIDI range 
        if (n_transpose > 5):
            n_transpose = 5
        if (n_transpose < 0):
            n_transpose = 0

        # For each note, tranpose n times up and down relative to middle C. Since we need middle C to be first:
        ranges = [0] + [n for n in range(n_transpose*(-1), 0)] + [n for n in range(1, n_transpose+1)]
        
        nrs = [{'type': x[1], 'song_idx': x[2], 'track_num': x[3], 'time': x[4] if n == 0 else 0, 
                'velocity': x[5], 'note': x[6] + n*12, 'ctime': x[7], 'cbeats': x[8], 'bar': x[9], 
                } for x in mdf.itertuples() for n in ranges]
        mdf = pd.DataFrame.from_records(nrs)    
        mdf = mdf.loc[mdf['note'].between(0,127)]
        
    if (merge_tracks == True): 
        print('Merging!')

        # Okay. What's the functionality to combine tracks together? 
        mdf.sort_values(by=['cbeats', 'track_num'], inplace=True)
        mdf.reset_index(drop=True, inplace=True)
        mdf['tmp_idx'] = mdf.index

        mdf2 = mdf[['cbeats']].rename(columns={'cbeats': 'prev_row_cbeats'})
        mdf2['tmp_idx'] = mdf2.index + 1

        mdf = mdf.merge(mdf2, on=['tmp_idx'], how='left')
        mdf.fillna({'prev_row_cbeats': 0}, inplace=True)
        mdf['beat_delta'] = mdf['cbeats'] - mdf['prev_row_cbeats']
        mdf['time'] = (mdf['beat_delta']*song_tpb)
        mdf = mdf.round({'time': 0})
        mdf['time'] = mdf['time'].astype(int)
        mdf['track_num'] = 1
        tracks = [1]
    
    mdf = mdf.loc[mdf['type'].isin(['note_on', 'note_off'])]
    mdf['outfile'] = (mdf['bar']/n_meas).astype(int)
    song_tpb = song_df['ticks_per_beat'].iloc[0]
    midi_type = 1 if n_copies >= 1 else 0

    for t in tracks: 
        for f in mdf['outfile'].unique():
            print('Track: %d, Section: %d' % (t,f))
            # Get Specific messages for the track
            tmdf = mdf.loc[(mdf['track_num'] == t) & (mdf['outfile'] == f)]
            # Create the track specific MIDI file 
            mid = MidiFile(ticks_per_beat=song_tpb, type=midi_type)
            midiTrack = MidiTrack()

            # Tempo MIDI Message
            midiTrack.append(MetaMessage('set_tempo', time=0, tempo=500000))

            # Time Signature MIDI Message (Standardize to 120bpm)
            midiTrack.append(MetaMessage('time_signature', time=0, numerator=4, denominator=4, 
                                         clocks_per_click=24, notated_32nd_notes_per_beat=8))

            # Key Signature MIDI Message (Shouldn't matter since MIDI note number determines the correct note)
            midiTrack.append(MetaMessage('key_signature', time=0, key='C'))

            # Individual Messages corresponding to notes 
            midiTrack += [Message(x[1], time=int(x[4]), note=int(x[6]), velocity=int(x[5]), channel=0) for x in tmdf.itertuples()]

            # End of Track MIDI Message
            midiTrack.append(MetaMessage('end_of_track', time=0))

            # If we want to duplicate the track 
            for i in range(0, n_copies+1):
                mid.tracks.append(midiTrack)
            filename = 'Splitting MIDI Files/si' + str(1) + '_tr' + str(t) + '_section' + str(f) + '.mid' 

            mid.save(filename)

split_tracks(track_df, msg_df, n_meas=4, n_copies=0, n_transpose=0, merge_tracks=False, song_tpb=song_df['ticks_per_beat'].iloc[0])

Merging!
Track: 1, Section: 0
Track: 1, Section: 1
Track: 1, Section: 2
Track: 1, Section: 3
Track: 1, Section: 4
Track: 1, Section: 5
Track: 1, Section: 6
Track: 1, Section: 7


In [5]:
# # # Okay. What's the functionality to combine tracks together? 
# msg_df.sort_values(by=['cbeats', 'track_num'], inplace=True)
# msg_df.reset_index(drop=True, inplace=True)
# msg_df['tmp_idx'] = msg_df.index

# msg_df_2 = msg_df[['cbeats']].rename(columns={'cbeats': 'prev_row_cbeats'})
# msg_df_2['tmp_idx'] = msg_df_2.index + 1

# msg_df = msg_df.merge(msg_df_2, on=['tmp_idx'], how='left')
# msg_df.fillna({'prev_row_cbeats': 0}, inplace=True)
# msg_df['beat_delta'] = msg_df['cbeats'] - msg_df['prev_row_cbeats']
# msg_df['time'] = (msg_df['beat_delta']*song_df['ticks_per_beat'].iloc[0])
# msg_df = msg_df.round({'time': 0})
# msg_df['time'] = msg_df['time'].astype(int)
# msg_df['track_num'] = 1

# msg_df