In [4]:
from midi_to_dataframe import NoteMapper, MidiReader, MidiWriter
import IPython
from IPython.display import Image, IFrame
from PIL import Image
import seaborn as sns
import pandas as pd
import numpy as np
import os
import json
import music21
import pickle
import nltk
nltk.download('punkt')

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import Embedding
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

[nltk_data] Downloading package punkt to /home/maarij/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [5]:
note_mapping_config_path = "./config/map-to-group.json"
note_mapper = NoteMapper(note_mapping_config_path)
reader = MidiReader(note_mapper)

In [13]:
# load the tokenizer object from the saved file
with open("./tokenizer.pkl", 'rb') as f:
    tokenizer = pickle.load(f)
    
# load the tokenizer object from the saved file
with open("./vocabularyMappings.pkl", 'rb') as f:
    vocabMappings = pickle.load(f)

In [7]:
checkpoint_path = "./model_checkpoint.h5"
loaded_model = keras.models.load_model(checkpoint_path)

In [9]:
#Creates word to idx map using tokenizer.word_index
reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))

In [63]:
def next_tokens(input_str, n):
    print("Seed -", input_str, sep='\n\n')
    final_string = ""
    for i in range(n):
        token = tokenizer.texts_to_sequences([input_str])[0]
        if len(token) > 50:
            token = token[-50:]
        elif len(token) < 50:
            token = pad_sequences([token], maxlen=50, padding="pre")
        prediction = loaded_model.predict(token, verbose=0)
        final_string = final_string + reverse_word_map[np.argmax(prediction[0])] + " "
        input_str = input_str + " " + reverse_word_map[np.argmax(prediction[0])]
        input_str = " ".join(input_str.split(" ")[1:])
    return final_string

In [83]:
# getting midi files
filepath = "./datasets/extra/"
filename = "YouBelongWithMe.midi"
fullFilePath = filepath+filename

MidiDataDF = pd.DataFrame()
tempDF = reader.convert_to_dataframe(fullFilePath)
MidiDataDF = MidiDataDF.append(tempDF)

In [84]:
NotesDataDF = MidiDataDF[["notes"]]

In [85]:
notesTemp = list(NotesDataDF["notes"])
mappedNotes = []

for i in notesTemp:
    indexSplit = i.split(",")
    for j in indexSplit:
        if len(indexSplit) > 1:
            mapping = int(vocabMappings[j]) * (-1)
        else:
            mapping = int(vocabMappings[j])
        mappedNotes.append(mapping)

print(len(mappedNotes))
#mappedNotes

9919


In [86]:
vocabularyChars = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"] # 10 is minus (-) and 11 is comma (,)
mappedNotesChars = []

for note in mappedNotes:
    temp = str(note)
    tempArr = [*temp]
    if tempArr[0] == "-":
        tempArr[0] = "10"
    tempArr.append("11")
    mappedNotesChars.extend(tempArr)
    
# Convert the list to a string with each element separated by a space
mappedNotesString = " ".join(mappedNotesChars)

print(len(mappedNotesString))
print(mappedNotesString[0:100])

144655
0 11 0 11 0 11 0 11 0 11 0 11 0 11 0 11 0 11 0 11 0 11 0 11 0 11 0 11 0 11 0 11 2 0 2 5 6 11 0 11 0 


In [93]:
input_str = mappedNotesString[100:200]

output = next_tokens(input_str, 500)
print("\nGenerated string -\n\n", output)

Seed -

11 0 11 2 0 2 5 6 11 0 11 0 11 0 11 2 0 2 5 8 11 0 11 0 11 0 11 2 0 2 5 6 11 0 11 6 8 7 5 11 10 7 3 

Generated string -

 2 8 11 10 7 2 2 0 11 10 7 2 0 0 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8 6 11 10 6 7 8

In [94]:
outputStr = mappedNotesString[0:200] + output

In [95]:
outputStr = outputStr.split()

In [118]:
output = []
temp = ""

for idx in outputStr:
    if idx == "10":
        temp += "-"
    elif int(idx) < 10:
        temp += idx
    else:
        output.append(int(temp))
        temp = ""

In [119]:
concatStr = ""
reverseMapping = []
negatives = []

for i in output:
    if i < 0:
        i = i * (-1)
        result = [new_k for new_k in vocabMappings.items() if new_k[1] == i][0][0]
        negatives.append(result)
    else:
        if concatStr != "":
            reverseMapping.append(concatStr.lstrip(","))
            concatStr = ""
        result = [new_k for new_k in vocabMappings.items() if new_k[1] == i][0][0]
        reverseMapping.append(result)
        if len(negatives) > 0:
            concatStr = ",".join(negatives)
            negatives = []

if len(negatives) > 0:
    reverseMapping.append(",".join(negatives))

In [120]:
outputDF = pd.DataFrame(reverseMapping, columns =['notes'])
outputDF["bpm"] = 125

cols = outputDF.columns.tolist()
cols = cols[-1:] + cols[:-1]
outputDF = outputDF[cols]

outputDF

Unnamed: 0,bpm,notes
0,125,rest
1,125,rest
2,125,rest
3,125,rest
4,125,rest
5,125,rest
6,125,rest
7,125,rest
8,125,rest
9,125,rest


In [121]:
# Drop the first 15 rows of the dataframe, which represented 1 measure of silence

# Write the modified DataFrame to disk as a playable MIDI file
writer = MidiWriter(note_mapper)
writer.convert_to_midi(outputDF, "./output.midi")

parsed = music21.converter.parse("./output.midi")
parsed.write('musicxml.png', fp='./sheets/Score')
pdfPath = parsed.write('lily.pdf', fp='./sheets/Score')

filepath = "./sheets/"
for filename in os.listdir(filepath):
    if filename.endswith(".png"):
        im = Image.open(filepath+filename)
        bg = Image.new("RGB", im.size, (255,255,255))
        bg.paste(im,im)
        os.remove(filepath+filename)
        filename = filename.replace(".png",".jpg")
        bg.save(filepath+filename)

os.remove(filepath + "Score")
os.remove(filepath + "Score.musicxml")

IFrame(str(pdfPath), width=900, height=800)

Changing working directory to: `sheets'
Processing `sheets/Score'
Parsing...
Interpreting music...
Preprocessing graphical objects...
                      < 
                        cis'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  f'  > 16  ~   
Finding the ideal number of pages...
Fitting music on 1 page...
Drawing systems...
                \new Voice { 
                             r 2...  
                \new Voice { 
                             r 2...  
Converting to `Score.pdf'...
Success: compilation successfully completed
