In [4]:
import random
import pretty_midi
import IPython
import numpy as np
from tqdm import tnrange, tqdm_notebook, tqdm
from random import shuffle, seed
import numpy as np
from numpy.random import choice
import matplotlib.pyplot as plt
import glob

In [5]:
def piano_roll_to_pretty_midi(piano_roll, fs=100, program=0):
    '''Convert a Piano Roll array into a PrettyMidi object
     with a single instrument.
    Parameters
    ----------
    piano_roll : np.ndarray, shape=(128,frames), dtype=int
        Piano roll of one instrument
    fs : int
        Sampling frequency of the columns, i.e. each column is spaced apart
        by ``1./fs`` seconds.
    program : int
        The program number of the instrument.
    Returns
    -------
    midi_object : pretty_midi.PrettyMIDI
        A pretty_midi.PrettyMIDI class instance describing
        the piano roll.
    '''
    notes, frames = piano_roll.shape
    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=program)

    # pad 1 column of zeros so we can acknowledge inital and ending events
    piano_roll = np.pad(piano_roll, [(0, 0), (1, 1)], 'constant')

    # use changes in velocities to find note on / note off events
    velocity_changes = np.nonzero(np.diff(piano_roll).T)

    # keep track on velocities and note on times
    prev_velocities = np.zeros(notes, dtype=int)
    note_on_time = np.zeros(notes)

    for time, note in zip(*velocity_changes):
        # use time + 1 because of padding above
        velocity = piano_roll[note, time + 1]
        time = time / fs
        if velocity > 0:
            if prev_velocities[note] == 0:
                note_on_time[note] = time
                prev_velocities[note] = velocity
        else:
            pm_note = pretty_midi.Note(
                velocity=prev_velocities[note],
                pitch=note,
                start=note_on_time[note],
                end=time)
            instrument.notes.append(pm_note)
            prev_velocities[note] = 0
    pm.instruments.append(instrument)
    return pm

In [6]:
def get_list_midi(folder = 'maestro-v1.0.0/**/*.midi', seed_int = 666):
    """
  Get the list of all midi file in the folders
  
  Parameters
  ==========
  folder : str
    The midi folder.
  seed_int : int
    the random seed.
  
  Returns
  =======
  The midi files
  
  """
    
    list_all_midi = glob.glob(folder)
    seed(seed_int)
    shuffle(list_all_midi)
    return list_all_midi

list_all_midi = get_list_midi()

In [7]:
class NoteTokenizer:
    
    def __init__(self):
        self.notes_to_index = {}
        self.index_to_notes = {}
        self.num_of_word = 0
        self.unique_word = 0
        self.notes_freq = {}
        
    def transform(self,list_array):
        """ 
      Transform a list of note in string into index.
      
      Parameters
      ==========
      list_array : list
        list of note in string format
      
      Returns
      =======
      The transformed list in numpy array.
      
      """
        transformed_list = []
        for instance in list_array:
            transformed_list.append([self.notes_to_index[note] for note in instance])
        return np.array(transformed_list, dtype=np.int32)
 
    def partial_fit(self, notes):
        """ Partial fit on the dictionary of the tokenizer
        
        Parameters
        ==========
        notes : list of notes
        
        """
        for note in notes:
            note_str = ','.join(str(a) for a in note)
            if note_str in self.notes_freq:
                self.notes_freq[note_str] += 1
                self.num_of_word += 1
            else:
                self.notes_freq[note_str] = 1
                self.unique_word += 1
                self.num_of_word += 1
                self.notes_to_index[note_str], self.index_to_notes[self.unique_word] = self.unique_word, note_str
            
    def add_new_note(self, note):
        """ Add a new note into the dictionary

        Parameters
        ==========
        note : str
          a new note who is not in dictionary.  

        """
        assert note not in self.notes_to_index
        self.unique_word += 1
        self.notes_to_index[note], self.index_to_notes[self.unique_word] = self.unique_word, note

        
        
def generate_batch_song(list_all_midi, batch_music=16, start_index=0, fs=30, seq_len=50, use_tqdm=False):
    """
    Generate Batch music that will be used to be input and output of the neural network
    
    Parameters
    ==========
    list_all_midi : list
      List of midi files
    batch_music : int
      A number of music in one batch
    start_index : int
      The start index to be batched in list_all_midi
    fs : int
      Sampling frequency of the columns, i.e. each column is spaced apart
        by ``1./fs`` seconds.
    seq_len : int
      The sequence length of the music to be input of neural network
    use_tqdm : bool
      Whether to use tqdm or not in the function
    
    Returns
    =======
    Tuple of input and target neural network
    
    """
    
    assert len(list_all_midi) >= batch_music
    dict_time_notes = generate_dict_time_notes(list_all_midi, batch_music, start_index, fs, use_tqdm=use_tqdm)
    
    list_musics = process_notes_in_song(dict_time_notes, seq_len)
    collected_list_input, collected_list_target = [], []
     
    for music in list_musics:
        list_training, list_target = generate_input_and_target(music, seq_len)
        collected_list_input += list_training
        collected_list_target += list_target
    return collected_list_input, collected_list_target

def generate_dict_time_notes(list_all_midi, batch_song = 16, start_index=0, fs=30, use_tqdm=True):
    """ Generate map (dictionary) of music ( in index ) to piano_roll (in np.array)

    Parameters
    ==========
    list_all_midi : list
        List of midi files
    batch_music : int
      A number of music in one batch
    start_index : int
      The start index to be batched in list_all_midi
    fs : int
      Sampling frequency of the columns, i.e. each column is spaced apart
        by ``1./fs`` seconds.
    use_tqdm : bool
      Whether to use tqdm or not in the function

    Returns
    =======
    dictionary of music to piano_roll (in np.array)

    """
    assert len(list_all_midi) >= batch_song
    
    dict_time_notes = {}
    process_tqdm_midi = tqdm_notebook(range(start_index, min(start_index + batch_song, len(list_all_midi)))) if use_tqdm else range(start_index,  min(start_index + batch_song, len(list_all_midi)))
    for i in process_tqdm_midi:
        midi_file_name = list_all_midi[i]
        if use_tqdm:
            process_tqdm_midi.set_description("Processing {}".format(midi_file_name))
        try: # Handle exception on malformat MIDI files
            midi_pretty_format = pretty_midi.PrettyMIDI(midi_file_name)
            piano_midi = midi_pretty_format.instruments[0] # Get the piano channels
            piano_roll = piano_midi.get_piano_roll(fs=fs)
            dict_time_notes[i] = piano_roll
        except Exception as e:
            print(e)
            print("broken file : {}".format(midi_file_name))
            pass
    return dict_time_notes

def generate_input_and_target(dict_keys_time, seq_len=50):
    """ Generate input and the target of our deep learning for one music.
    
    Parameters
    ==========
    dict_keys_time : dict
      Dictionary of timestep and notes
    seq_len : int
      The length of the sequence
      
    Returns
    =======
    Tuple of list of input and list of target of neural network.
    
       
    """
    # Get the start time and end time
    start_time, end_time = list(dict_keys_time.keys())[0], list(dict_keys_time.keys())[-1]
    list_training, list_target = [], []
    for index_enum, time in enumerate(range(start_time, end_time)):
        list_append_training, list_append_target = [], []
        start_iterate = 0
        flag_target_append = False # flag to append the test list
        if index_enum < seq_len:
            start_iterate = seq_len - index_enum - 1
            for i in range(start_iterate): # add 'e' to the seq list. 
                list_append_training.append('e')
                flag_target_append = True

        for i in range(start_iterate,seq_len):
            index_enum = time - (seq_len - i - 1)
            if index_enum in dict_keys_time:
                list_append_training.append(','.join(str(x) for x in dict_keys_time[index_enum]))      
            else:
                list_append_training.append('e')

        # add time + 1 to the list_append_target
        if time+1 in dict_keys_time:
            list_append_target.append(','.join(str(x) for x in dict_keys_time[time+1]))
        else:
            list_append_target.append('e')
        list_training.append(list_append_training)
        list_target.append(list_append_target)
    return list_training, list_target

def process_notes_in_song(dict_time_notes, seq_len = 50):
    """
    Iterate the dict of piano rolls into dictionary of timesteps and note played
    
    Parameters
    ==========
    dict_time_notes : dict
      dict contains index of music ( in index ) to piano_roll (in np.array)
    seq_len : int
      Length of the sequence
      
    Returns
    =======
    Dict of timesteps and note played
    """
    list_of_dict_keys_time = []
    
    for key in dict_time_notes:
        sample = dict_time_notes[key]
        times = np.unique(np.where(sample > 0)[1])
        index = np.where(sample > 0)
        dict_keys_time = {}

        for time in times:
            index_where = np.where(index[1] == time)
            notes = index[0][index_where]
            dict_keys_time[time] = notes
        list_of_dict_keys_time.append(dict_keys_time)
    return list_of_dict_keys_time




In [13]:
sampled_midi = list_all_midi[0:100]

In [14]:
batch = 1
start_index = 0
note_tokenizer = NoteTokenizer()

for i in tqdm(range(len(sampled_midi))):
    dict_time_notes = generate_dict_time_notes(sampled_midi, batch_song=1, start_index=i, use_tqdm=False, fs=5)
    full_notes = process_notes_in_song(dict_time_notes)
    for note in full_notes:
        note_tokenizer.partial_fit(list(note.values()))

100%|██████████| 100/100 [00:39<00:00,  2.52it/s]


In [10]:
note_tokenizer.__dict__

{'notes_to_index': {'78': 1,
  '49,78': 2,
  '44,49,78': 3,
  '44,49,58,78,80': 4,
  '44,49,54,58,78,80': 5,
  '44,49,54,58,78,80,81': 6,
  '44,49,54,58,61,78,80,81,83': 7,
  '44,49,54,58,61,78,80,81,82,83': 8,
  '49,82': 9,
  '49,61,82': 10,
  '49,54,61,82': 11,
  '49,54,61,78,82': 12,
  '49,78,80': 13,
  '49,61,78,80': 14,
  '49,54,61,78,80': 15,
  '49,54,61,78,80,81': 16,
  '44,49,54,61,78,80,81': 17,
  '44,49,54,61,78,80,81,83': 18,
  '49,78,82,83': 19,
  '49,54,61,78,80,81,83': 20,
  '49,54,61,78,80,81,83,85': 21,
  '49,83,85': 22,
  '45,49,85': 23,
  '45,49,85,88': 24,
  '45,49,61,83,85,88': 25,
  '45,49,55,61,83,85,88': 26,
  '45,49,55,61,81,83,85,88': 27,
  '45,49,55,61,77,81,83,85,88': 28,
  '50,77': 29,
  '50,62,76,77': 30,
  '50,56,62,76,77': 31,
  '53,74': 32,
  '53,62,70,74': 33,
  '53,56,62,70,74': 34,
  '52,53,56,61,62,70,74': 35,
  '52,61,70': 36,
  '52,61,63,70': 37,
  '52,56,61,63,70': 38,
  '52,56,61,63,64,68,70': 39,
  '44,52,64,68': 40,
  '44,52,64,66,68': 41,
  '4

In [15]:
sample_song = sampled_midi[0]

In [19]:
sample_song

'maestro-v1.0.0/2015/MIDI-Unprocessed_R1_D2-13-20_mid--AUDIO-from_mp3_19_R1_2015_wav--4.midi'

In [20]:
# input a list of midi file and get the output of 128 notes played across time
dict_time_notes = generate_dict_time_notes([sample_song], batch_song=1, start_index=0, use_tqdm=False, fs=5)
dict_time_notes

{0: array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])}

In [21]:
# 128 keys of a piano
len(dict_time_notes[0])

128

In [22]:
# total number of frames (sample frequency) within the song
len(dict_time_notes[0][0])

1093

In [23]:
# generate, across "time" which notes are getting played together
full_notes = process_notes_in_song(dict_time_notes)
full_notes

[{4: array([78]),
  5: array([49, 78]),
  6: array([49, 78]),
  7: array([49, 78]),
  8: array([44, 49, 78]),
  9: array([44, 49, 78]),
  10: array([44, 49, 78]),
  11: array([44, 49, 58, 78, 80]),
  12: array([44, 49, 58, 78, 80]),
  13: array([44, 49, 54, 58, 78, 80]),
  14: array([44, 49, 54, 58, 78, 80]),
  15: array([44, 49, 54, 58, 78, 80, 81]),
  16: array([44, 49, 54, 58, 78, 80, 81]),
  17: array([44, 49, 54, 58, 78, 80, 81]),
  18: array([44, 49, 54, 58, 61, 78, 80, 81, 83]),
  19: array([44, 49, 54, 58, 61, 78, 80, 81, 83]),
  20: array([44, 49, 54, 58, 61, 78, 80, 81, 83]),
  21: array([44, 49, 54, 58, 61, 78, 80, 81, 83]),
  22: array([44, 49, 54, 58, 61, 78, 80, 81, 82, 83]),
  23: array([49, 82]),
  24: array([49, 82]),
  25: array([49, 82]),
  26: array([49, 61, 82]),
  27: array([49, 61, 82]),
  28: array([49, 54, 61, 82]),
  29: array([49, 54, 61, 82]),
  30: array([49, 54, 61, 82]),
  31: array([49, 54, 61, 82]),
  32: array([49, 54, 61, 78, 82]),
  33: array([49, 54

### my though process is, if we can use index to represent a particular set of node that is getting played and treat it as one entity, then using LSTM we can predict the next "entity" to generate music