In [1]:
from music21 import *
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
pd.options.display.max_rows = 999
# Listing current data on our folder.
import os
print(os.listdir("."))
from music21 import converter, corpus, instrument, midi, note, chord, pitch, roman

import pandas as pd
import numpy as np
import string
from string import digits
import re
from sklearn.model_selection import train_test_split
# Building a english to french translator

from keras.layers import Input, LSTM, Embedding, Dense
from keras.models import Model
from keras.utils import plot_model

import pickle
import random

#https://github.com/devm2024/nmt_keras/blob/master/base.ipynb
#https://medium.com/@dev.elect.iitd/neural-machine-translation-using-word-level-seq2seq-model-47538cba8cd7

import h5py
from keras.models import model_from_json

['including_rests_clean-Copy1.ipynb', '.DS_Store', 'get_harmony.ipynb', 'encoder_model.json', 'get_music.ipynb', 'best_model_farm.h5', 'encoder_model.h5', 'target_token_index.pkl', 'all_4.py', 'dict.pkl', 'processed_songs.pkl', '.ipynb_checkpoints', 'decoder_model.h5', 'run_model.ipynb', 'reverse_target_char_index.pkl', 'get_data-Copy1.ipynb', 'decoder_model.json']


Using TensorFlow backend.


### Loading model

In [2]:
# load json and create model
json_file = open("encoder_model.json", 'r')
loaded_model_json = json_file.read()
json_file.close()
encoder_model = model_from_json(loaded_model_json)
# load weights into new model
encoder_model.load_weights("encoder_model.h5")
print("Loaded model from disk")

json_file = open("decoder_model.json", 'r')
loaded_model_json = json_file.read()
json_file.close()
decoder_model = model_from_json(loaded_model_json)
# load weights into new model
decoder_model.load_weights("decoder_model.h5")
print("Loaded model from disk")

with open('target_token_index.pkl', 'rb') as picklefile:
    target_token_index = pickle.load(picklefile)

with open('reverse_target_char_index.pkl', 'rb') as picklefile:
    reverse_target_char_index = pickle.load(picklefile)

Loaded model from disk
Loaded model from disk


In [4]:
input_max = 16

zeros = [0]*23 # 23 = number of unique notes + rest

In [5]:
with open('dict.pkl', 'rb') as picklefile:
    d = pickle.load(picklefile)

### Processing music functions

In [6]:
def open_midi(midi_path, remove_drums):
    '''
    There is an one-line method to read MIDIs
    but to remove the drums we need to manipulate some
    low level MIDI events.
    '''
    
    mf = midi.MidiFile()
    mf.open(midi_path)
    mf.read()
    mf.close()
    if (remove_drums):
        for i in range(len(mf.tracks)):
            mf.tracks[i].events = [ev for ev in mf.tracks[i].events if ev.channel != 10]
            # By convention track 10 is reserved for percussion
    melody_track = ""
    
    '''
    It is hard to identify the melody track. I'll do my best by looking for keywords, 
    but even if I'm confusing the melody with the base line I'm assuming it is still 
    useful to capture the relationship between that and the harmony of the song.
    '''
    
    for track in range(len(mf.tracks)):
            if 'lead' in str(mf.tracks[track].events[1].data).lower() or 'voice' in str(mf.tracks[track].events[1].data).lower() or 'melody' in str(mf.tracks[track].events[1].data).lower() or 'karaoke' in str(mf.tracks[track].events[1].data).lower():
                melody_track = track - 1
                break
    if melody_track == "":
        melody_track = 0
        # By convention the first track is the melody
    return (midi.translate.midiFileToStream(mf),melody_track)

In [7]:
notes_unique = ['i','bi','#i','ii','bii','#ii','iii','biii','#iii',
  'iv','biv','#iv','v','bv','#v','vi','bvi','#vi',
  'vii','bvii','#vii','rest']

In [8]:
def get_melo_encoded(df,melody_track):
    ''' This function returns the encoded melody'''
    music_key = df.analyze('key')
    time = []
    melody_roman = []
    duration = []
    for nt in (df.parts[melody_track].notesAndRests):
        note_array = np.zeros(len(notes_unique)+1) # add one place for the duration of the note
        if isinstance(nt, note.Note):     
            time.append(float(nt.offset))
            roman_chord = chord.Chord(nt.pitch.name + " " + "C") # for some reason Music21 doesn't allow to 
            roman_chord.remove('C') # convert augmented notes into a chord, so I add and remove 'C' and it works
            roman_chord_numeral = roman.romanNumeralFromChord(roman_chord,music_key).figure
            note_array[notes_unique.index(roman_chord_numeral)] = 1
            note_array[22] = nt.duration.quarterLength
            melody_roman.append(note_array)
        elif isinstance(nt, note.Rest):
            time.append(float(nt.offset))
            note_array[21] = 1
            note_array[22] = nt.duration.quarterLength
            melody_roman.append(note_array)
    
            
    melody = pd.DataFrame({'offset':time,'input':melody_roman})
    melody['group'] = melody['offset'].apply(lambda x: np.floor(x/float(4))) # split sequences in groups of 4 offsets
    return melody

In [9]:
def get_chords(df):
    '''Using chordify function extract the harmony of the song'''
    music_key = df.analyze('key')
    df_chordify = df.chordify()

    time = []
    chordify = []
    
    for thisChord in df_chordify.recurse().getElementsByClass('Chord'):
        time.append(float(thisChord.offset))
        chordify.append(simplify_roman_name(thisChord, music_key))

    chordify_df = pd.DataFrame({'offset':time,'target':chordify})
    chordify_df['group'] = chordify_df['offset'].apply(lambda x: np.floor(x/float(4)))
    return chordify_df

In [10]:
def simplify_roman_name(thisChord, music_key):
    global d
    if d[(str(thisChord), str(music_key))]:
        return d[(str(thisChord), str(music_key))]
    roman_numeral = roman.romanNumeralFromChord(thisChord, music_key)
    '''Thanks @wfaria for this code! https://www.kaggle.com/wfaria/midi-music-data-extraction-using-music21/notebook
    in this method we try to simplify names, even if it ends in
    a different chord to reduce the chord vocabulary and reduce the number of classes for the decoder model.'''
    
    
    ret = roman_numeral.romanNumeral
    inversion_name = None
    inversion = roman_numeral.inversion()
    
    # Checking valid inversions.
    if ((roman_numeral.isTriad() and inversion < 3) or
            (inversion < 4 and
                 (roman_numeral.seventh is not None or roman_numeral.isSeventh()))):
        inversion_name = roman_numeral.inversionName()
        
    if (inversion_name is not None):
        ret = ret + str(inversion_name)
        
    elif (roman_numeral.isDominantSeventh()): ret = ret + "M7"
    elif (roman_numeral.isDiminishedSeventh()): ret = ret + "o7"
    d[(str(thisChord), str(music_key))] = ret
    return ret

In [11]:
def group_measure_encoded(df):
    ''' We group melodies and harmonies by 4 offsets'''
    
    df = df.fillna("null")
    grouped = df.groupby('group_x')
    input_texts = np.array([np.array(list(grouped.get_group(x)['input'])) for x in grouped.groups])
    target_texts = [list(grouped.get_group(x)['target']) for x in grouped.groups]
    group = [x for x in grouped.groups]
    target_texts = [' '.join(x) for x in target_texts]
    return pd.DataFrame({'group':group,'input':input_texts,'target':target_texts})

In [12]:
def get_melo_chord(df,melody_track):
    ''' Returns the final dataframe with the melody and the harmony'''
    melo = get_melo_encoded(df,melody_track)
    chords = get_chords(df)    
    melo_chords = melo.merge(chords,on='offset',how='left')
    grouped  = group_measure_encoded(melo_chords)

    final = (grouped
             .sort_values(by='group')
             .rename(columns={'input':'melody','target':'harmony'})
             .fillna('rest')) 
    return final

### Making predictions

In [13]:
def decode_note(array):
    ''' Returns the note in roman notation '''
    try:
        note_index, = np.where(array[:-1] == 1)
        return (notes_unique[note_index[0]],array[-1])
    except:
        return ""

In [14]:
def decode_sequence(input_seq):
    ''' Returns the harmony in roman notation '''
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))
    # Populate the first character of target sequence with the start character.
    target_seq[0, 0] = target_token_index['START_']

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict(
            [target_seq] + states_value)


        # Sample a token
        sampled_token_index = np.argmax(output_tokens[0, -1, :])

        sampled_char = reverse_target_char_index[sampled_token_index]

        decoded_sentence += ' '+sampled_char

        # Exit condition: either hit max length
        # or find stop character.
        if (sampled_char == '_END' or
           len(decoded_sentence) > 52):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = sampled_token_index

        # Update states
        states_value = [h, c]

    return decoded_sentence

In [16]:
def is_diatonic(rom_chord, music_key):
    ''' Check if a given chord is diatonic '''
    if 'major' in music_key.name:
        diatonic_chords = ['i','I','ii','iii','III','IV','V','vi','vii']
    else:
        diatonic_chords = ['i','ii','III','iv','v','V','VI','VII']
    if re.sub(r"[1-9]", "", rom_chord) in diatonic_chords:
        return True
    else:
        return False

In [17]:
roman_to_int = {'I':0,'II':1,'III':2,'IV':3,'V':4,'VI':5,'VII':6}

In [18]:
def get_diatonic_chord(chord, music_key):
    ''' If the chord is not diatonic, convert it to diatonic '''
    if 'major' in music_key.name:
        diatonic_chords = ['I7','ii7','iii','IV7','V7','vi','vii5b7']
    else:
        diatonic_chords = ['i7','ii7','III','iv7','V7','VI','VII']
    if not is_diatonic(chord,music_key):
        chord = re.sub(r"[1-9-b]", "", chord).upper()
        chord = diatonic_chords[roman_to_int[chord]]
    return chord

In [19]:
def convert_numeral_chords(dec_input,decoded_sentence,k):
    ''' Returns chords in the key of the song.
    Initially, each chord is gonna have the duration of the note of the melody'''
    notes = [x[0] for x in dec_input if x!=""]
    durations = [x[1] for x in dec_input if x!=""]

    decoded_chords = []
    rom_chords = decoded_sentence.split(" ")[1:]

    for i in range(len(durations)):
        try:
            if notes[i] == 'rest':
                tonic_chord = note.Rest()
                tonic_chord.duration.quarterLength = durations[i]
            else:
                diatonic_roman = get_diatonic_chord(rom_chords[i],k)
                tonic_chord = chord.Chord(roman.RomanNumeral(diatonic_roman,k).pitches)
                tonic_chord.duration.quarterLength = durations[i]
            decoded_chords.append(tonic_chord)
        except: # If there are less chords that notes of the melody
            tonic_chord = note.Rest()
            tonic_chord.duration.quarterLength = durations[i]
            decoded_chords.append(tonic_chord)
            pass
    return decoded_chords

In [20]:
def reduce_chords(decoded_chords):
    ''' Reduce total number of chords to only two.
    The position of the chords is gonna be random'''
    start_chord =random.randint(0,len(decoded_chords)-2)

    end_chord = random.randint(start_chord+1,len(decoded_chords)-1)

    chord1 = decoded_chords[start_chord]
    for ch in decoded_chords[start_chord:end_chord]:
        if chord1 != ch:
            #chord1 = get_diatonic_chord(chord1)
            chord1.duration.quarterLength += ch.duration.quarterLength
            if isinstance(chord1, chord.Chord):
                chord1.closedPosition(forceOctave=3, inPlace=True)

    chord2 = decoded_chords[end_chord]
    for ch in decoded_chords[end_chord:]:
        if chord2 != ch:
            #chord2 = get_diatonic_chord(chord2)
            chord2.duration.quarterLength += ch.duration.quarterLength
            if isinstance(chord2, chord.Chord):
                chord2.closedPosition(forceOctave=3, inPlace=True)

    if start_chord != 0:
        chord0 = note.Rest()
        chord0.duration.quarterLength = 0
        for ch in decoded_chords[:start_chord]:
            chord0.duration.quarterLength += ch.duration.quarterLength
            if isinstance(chord0, chord.Chord):
                chord0.closedPosition(forceOctave=3, inPlace=True)
        return [chord0, chord1,chord2]

    return [chord1,chord2]
        

In [21]:
def reduce_chords_rock(decoded_chords):
    ''' Reduce total number of chords to only two.
    One in the 0 and 3 offset'''
    start_chord =0

    end_chord = random.randint(start_chord+1,len(decoded_chords)-1)

    chord1 = decoded_chords[start_chord]
    chord1.duration.quarterLength = 2
    if isinstance(chord1, chord.Chord):
        chord1.closedPosition(forceOctave=3, inPlace=True)

    chord2 = decoded_chords[end_chord]
    chord2.duration.quarterLength = 2
    if isinstance(chord2, chord.Chord):
        chord2.closedPosition(forceOctave=3, inPlace=True)

    return [chord1,chord2]
    return decoded_chords
        

In [47]:
def get_harmony(midi_name):
    path = midi_name
    song, melody_track = open_midi(path,True)
    music_key = song.analyze('key')
    print(music_key)

    ### Encoding melody ###
    song_grouped = get_melo_chord(song, melody_track)
    input_data = song_grouped['melody'].values

    for measure_number in range(len(input_data)):
        while len(input_data[measure_number]) < input_max:
            input_data[measure_number] = np.append(input_data[measure_number],np.array([zeros]),axis=0)
        if len(input_data[measure_number]) > input_max:
            input_data[measure_number] = input_data[measure_number][:input_max]
    
    input_data = np.array(list(input_data))
    encoder_song = input_data.reshape((len(input_data),input_max,23))
    
    ### --------------- ###
    
    output_melody = song.parts[melody_track]
    output_chords = stream.Part()
    
    ### Decoding harmony ###
    
    for seq_index in range(len(encoder_song)-1):
        input_seq = encoder_song[seq_index: seq_index + 1]
        dec_input = [decode_note(x) for x in input_seq[0]]
        decoded_sentence = decode_sequence(input_seq)
        
        decoded_chords = convert_numeral_chords(dec_input,decoded_sentence,music_key)
        decoded_chords = reduce_chords(decoded_chords)

        for ch in decoded_chords:
            output_chords.append(ch)
        decoded_chords = []
    
    ### ----------------- ###
    
    return stream.Stream([output_melody,output_chords])

In [48]:
get_harmony('melody.mid').write('midi','test.midi')

a minor


'test.midi'