# Listening to the data samples
In this notebook we convert some of the training and test data samples to MIDI format that can be listened.

In [1]:
import numpy as np
import os
import pypianoroll as ppr
from PIL import Image
from matplotlib import pyplot as plt
from matplotlib import gridspec as grid

Define functions for loading, visualizing and converting.

In [4]:
def file_to_numpy(fn):
    img = Image.open(fn).convert('L')
    a = np.array(img)
    a = a/255
    return(a)

def binarize(track, threshold):
    track = 100*(track>threshold).astype('int')
    return(track)

def numpy_to_tracks(a, THRESHOLDS):
    piano = np.zeros((128, 128))
    bass  = np.zeros((128, 128))
    lead  = np.zeros((128, 128))
    drum  = np.zeros((128, 128))
    piano[:, 16:(16+64)]               = binarize(a[:,(8+64):(8+2*64)], THRESHOLDS[0])
    bass[:, 0:64]                      = binarize(a[:,8:(8+64)], THRESHOLDS[1])
    lead[:, 48:(48+64)]                = binarize(a[:,(8+2*64):(8+3*64)], THRESHOLDS[2])
    drum[:, (35,38,42,43,46,47,49,51)] = binarize(a[:,0:8], THRESHOLDS[3])
    t0 = ppr.Track(pianoroll=piano, program=0, is_drum=False, name='piano')
    t1 = ppr.Track(pianoroll=bass, program=34, is_drum=False, name='bass')
    t2 = ppr.Track(pianoroll=lead, program=56, is_drum=False, name='lead')
    t3 = ppr.Track(pianoroll=drum, program=0, is_drum=True, name='drums')
    multitrack = ppr.Multitrack(tracks=[t2,t0,t1,t3], tempo=120.0,
                        downbeat=[0, 32, 64, 96], beat_resolution=4)
    return(multitrack)

def get_tracks(fn, h):
    arr = file_to_numpy(fn)
    trk = numpy_to_tracks(arr, h)
    return(trk)

def get_pianoroll(track):
    if(track.name=='piano'):
        i1 = 16
        i2 = 16+64
    elif(track.name=='bass'):
        i1 = 0
        i2 = 64
    else:
        i1 = 48
        i2 = 48+64
    mat = track.pianoroll[:, i1:i2]
    return(mat)
    
def get_drumroll(track):
    mat = track.pianoroll[:,(35,38,42,43,46,47,49,51)]
    return(mat)

def generate_midi(fn, outname, thresholds = 0.5*np.ones(4)):
    trk = get_tracks(fn, thresholds)
    trk.write(outname)
    print('You can now listen to', outname)
    
def listen(idx_song, idx_sample=1, is_test = False):
    str0 = 'train_data'
    if(is_test):
        str0 = 'test_data'
    str1 = str(idx_song).zfill(4)
    str2 = str(idx_sample).zfill(4)
    data_path = os.path.join('.', str0, 'img' + str1, 'img' + str1 + '__' + str2 + '.png')
    f = open('song_names.txt', "r")
    lines = f.readlines()
    f.close()
    sn = lines[idx_song-1]
    sn = sn[5:len(sn)]
    print("The song name is", sn)
    out_path = os.path.join('data', str0 + '_' + str1 + '_' + str2)
    generate_midi(data_path, out_path, THRESH)

Below we convert and visualize an example sample from the training set ( the folder name indices are off by one, but set the song index based on `song_names.txt`).

In [8]:
THRESH = [0.3, 0.3, 0.6, 0.3]
idx_song = 3445
idx_sample = 3
listen(idx_song, idx_sample)

The song name is rebel_yell.

You can now listen to data\train_data_3445_0003
