In [None]:
import os
import pickle
import sparse
import fluidsynth
import pypianoroll
import pretty_midi

import numpy as np
import IPython.display

from time import time
from glob import glob
from utils.common_utils import *

### Preprocessing

In [None]:
data_path = '../../data/lakh/'

In [None]:
count = 0
loop_list = []
start_time = time()

for folder_path in glob(data_path+'*'):
    for file_path in glob(folder_path+'/*'):
        try:
            pm = pretty_midi.PrettyMIDI(file_path)
            file_name = file_path.split('/')[-1]

            # time signature 4/4 check
            if not check_time_sign(pm, num=4, denom=4): 
                continue

            multitrack = pypianoroll.from_pretty_midi(pm, algorithm='strict')

            # inst check
            new_tracks = get_inst(multitrack.tracks)

            if new_tracks is not None:
                multitrack.tracks = new_tracks
                multitrack.set_resolution(4, 'floor') # 16th note unit
                multitrack.pad_to_same()
                multitrack.binarize()

                for inst in multitrack:
                    if inst.is_drum == True:
                        drum = standarize_drum(inst.pianoroll) # 9 components
                    else:
                        bass = inst.pianoroll[:, 24:72] # clip pitch (C1 ~ B4)
                        bass = remove_dup_step(bass)

                np_pianoroll = np.concatenate([drum, bass], axis=1)

                # get loop from pianoroll
                num_bar_unit = 16
                for j in range(0, np_pianoroll.shape[0], num_bar_unit):
                    loop = np_pianoroll[j:j+(num_bar_unit*8)] # 8 bars
                    
                    if (np.sum(loop[:, :9]) >= 32) and (np.sum(loop[:, 9:]) >= 32) and (loop.shape[0] == num_bar_unit*8):
                        loop_list.append(sparse.COO(loop))
        except:
            continue
        
        count += 1
        if (count + 1) % 1000 == 0:
            print('Processing %s (%0.3f sec)' % (file_name, time()-start_time))
            start_time = time()

In [None]:
# play demo
for loop in loop_list:
    pm = play_pianoroll(loop, fs=8)
    IPython.display.display(IPython.display.Audio(pm.fluidsynth(fs=16000), rate=16000))
    break

In [None]:
# save pickle
save_path = './data/midi_full_strict_pianoroll.pkl'
with open(save_path, 'wb') as f:
    print('File saved!')
    pickle.dump(loop_list, f, protocol=pickle.HIGHEST_PROTOCOL)