# Slicing the tracks 
In this notebook we split the tracks into small pieces and save them in a format that can be loaded as data for the neural network.

In [1]:
import pretty_midi
import numpy as np
import collections
import os
import pypianoroll as ppr
from PIL import Image

In [2]:
# Get all files
files = list()
dirName = os.path.join("..", "data_mapped") # path to mapped data
for (dirpath, dirnames, filenames) in os.walk(dirName):
    files += [os.path.join(dirpath, file) for file in filenames]
    
print("Number of files found: ", len(files))

# Save song names as txt
idx = 0
names_out = files
for fn in files:
    idx += 1
    sn = fn.split(os.sep)[-1]
    sn = sn[0:(len(sn)-3)]
    sn = str(idx).zfill(4) + '_' + sn
    names_out[idx-1] = sn
    
np.savetxt('song_names.txt', names_out, delimiter=" ", fmt="%s")

Number of files found:  5042


In [4]:
# check which tracks exists and return them in a fixed order
def get_tracks(pr, active_length):
    it = 0
    tracks = [0,0,0,0]
    for trk in pr.tracks:
        it += 1
        if(trk.is_drum):
            tracks[0] = trk.pianoroll[:, (35,38,42,43,44,47,49,51)] # drum
        else:
            if(trk.program == 34):
                tracks[1] = trk.pianoroll[:, 0:64] # bass
            elif(trk.program == 0):
                tracks[2] = trk.pianoroll[:, 16:(16+64)] # piano
            elif(trk.program == 56):
                tracks[3] = trk.pianoroll[:, 48:(48+64)] # lead
    
    SONG = np.zeros((active_length, 8 + 3*64))
    i1 = 8
    i2 = 8 + 64
    i3 = 8 + 2*64
    i4 = 8 + 3*64
    RANGES = [range(0,i1), range(i1,i2), range(i2, i3), range(i3, i4)]
    it = 0
    for idx in range(0,4):
        trk = tracks[idx]
        if(not isinstance(trk, int)):
            d1 = SONG.shape[0]
            d2 = trk.shape[0]
            d = min(d1, d2)
            if(abs(d1-d1)<5):
                SONG[1:d, RANGES[idx]] = trk[1:d]
    return(SONG.astype('uint8'))
                
# function for splitting
def split_and_save_tracks(fn, p1, p2):
    os.mkdir(p1)
    pr = ppr.parse(fn, beat_resolution = 4)
    pr.binarize()
    alen = pr.get_active_length()
    blen = 128
    SONG = get_tracks(pr, alen) 
    nbar = np.int(alen/blen) - 2
    for b in range(0, nbar):
        ibar = 1 + b
        out = os.path.join(p1,p2) + '_' + str(ibar).zfill(4) + '.png'
        i1 = ibar*blen
        i2 = (ibar+1)*blen
        BAR = SONG[i1:i2, :]
        rate = sum(np.sum(BAR, axis = 1) > 0 )/blen
        if(rate > 0.7):
            img = Image.fromarray(255*BAR)
            img.save(out)

Process all files

In [None]:
idx = 0
N = len(files)

for fn in files[0:N]:
    idx += 1
    pos = fn.split(os.sep)[-1]
    pos = pos.split('.')[0]
    num = str(idx).zfill(4)
    p1 = os.path.join('img', 'img' + num) # a subfolder called 'img' must be in the working folder
    p2 = 'img' + num + "_"
    split_and_save_tracks(fn, p1, p2)
    if(idx%10==0):
        print(idx)