In [None]:
"""Implementation of a generator based on Bach fugues using a LSTM for training 
a dataset of midi files
Stages:
#
2 Data cleaning and preparation
3. Data exploration
4. Convert notes into integers...
5. Model definition and fit
6. New music creation!
7. Convert results into midi files for curating

Data source: http://www.bachcentral.com/midiindexcomplete.html


IMPORTANT: FOR THE MOMENT THIS PROCESSOR WILL ONLY WORK WITH MELODIES (SINGLE NOTES)
SO THE SONGS CONTAINING CHORDS WILL BE FILTERED OUT"""


'Implementation of a generator based on Bach fugues using a LSTM for training a dataset of midi files\nStages:\n#\n2 Data cleaning and preparation\n3. Data exploration\n4. Convert notes into integers...\n5. Model definition and fit\n6. New music creation!\n7. Convert results into midi files for curating\n\nData source: http://www.bachcentral.com/midiindexcomplete.html\n\n\nIMPORTANT: FOR THE MOMENT THIS PROCESSOR WILL ONLY WORK WITH MELODIES (SINGLE NOTES)\nSO THE SONGS CONTAINING CHORDS WILL BE FILTERED OUT'

In [None]:
import requests
import copy
import music21 as m21
from music21 import *
from pathlib import Path
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
import json
import tensorflow.keras as keras


from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

import os
import glob 
from pathlib import Path

import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#1. Download data in midi format into project data folder

#DATA_PATH = "/Users/mauricioalfaro/Documents/mae_code/Bach/data/fugues"

# DATA_PATH = "/Users/mauricioalfaro/Documents/mae_code/Bach/data/chorales/JSBChorales/total"
# SAVE_DIR = "/Users/mauricioalfaro/Documents/mae_code/Bach/data/chorales/JSBChorales/encoded_dataset"
# #DATA_PATH = "/Users/mauricioalfaro/Documents/mae_code/Bach/data/Trane"


DATA_PATH = "/content/drive/MyDrive/maeGenerator22/Jazz"
SAVE_DIR = "/content/drive/MyDrive/maeGenerator22/Jazz/encoded_dataset"# where the encoded data will be saved


# durations are expressed in quarter length
ACCEPTABLE_DURATIONS = [
    0.25, # 16th note
    0.5, # 8th note
    0.75,
    1.0, # quarter note
    1.25,
    1.5,
    2, # half note
    3,
    4 ,
    4.25]# whole note



In [None]:
def load_songs(data_path):
    """loads all the songs in the data folder and 
    converts them into a m21 stream object
    args: data path
    returns: a list of all the converted songs
    Important concept: parsing is the process of recognizing and identifying the components of
    a particular input"""
    songs = []
    for path, subdirs, files in os.walk(data_path):
        for i, file in enumerate(files):
            try:
                if file[-3:] == "mid" or file[-4:] == "midi":
                    song = m21.converter.parse(os.path.join(path, file)) #parsing...crea un objeto stream.Score
                    songs.append(song)
            except:
                print("Failed loading the {} song".format(i))
    
    print("{} songs successfully loaded and converted to m21 stream objects".format(len(songs)))

    return songs


def has_acceptable_duration(song, acceptable_durations):
    """Returns a boolean for checking if the song copmponents has
    all its elements of the acceptable durations
    args:
    song: m21 stream
    acceptable_durations: list cointaining the acceptable durations
    Al decir for note in song.flat.notesAndRests repasamos toda la cancion
    This check is very important because the time step referencefor encoding the
   songs is the semicorchea (0.25)"""
    #load the song: reads it as argument


    #check for each component if has acceptable durations
    for note in song.flat.notesAndRests:
        if note.duration.quarterLength not in acceptable_durations:
            return False
    #returns True/False
    return True
    
def songs_has_no_chords(song):
    """Returns a boolean True indicating if song has no chords"""
    for element in song.flat.notesAndRests:
        if isinstance(element, m21.chord.Chord):
            return False
    #Returns False by default
    return True


def check_durations(song, acceptable_durations):
    """Returns a boolean for checking if the song copmponents has
    all its elements of the acceptable durations
    args:
    song: m21 stream
    acceptable_durations: list cointaining the acceptable durations
    """
    #load the song: reads it as argument


    #check for each component if has acceptable durations
    durations = []
    for note in song.flat.notesAndRests:
        durations.append(note.duration.quarterLength) 
            
    return durations


def transpose_song(song):
    """Transpose the song to Cmajor/Aminor
    arg: song as ms21 object
    return: song transposed"""
    #Get the original key of the song
    parts = song.getElementsByClass(m21.stream.Part) #Extrae todas las partes de la canción (violin, viola, etc)
    measures_part0 = parts[0].getElementsByClass(m21.stream.Measure) #Extrae los elementos de la parte0 como referencia
    
    try: 
        key = measures_part0[0][4] ##tomo la primera parte de measures0 y extraigo de esa lista el elemento 4 que es key

    except:
        key = song.analyze("key") #si no resulta de esa forma que intente este metodo

    #If we cant get the key by the previous method because is not in the song, estimate it
    if not isinstance(key, m21.key.Key): #if the song doesnt hace any key stored
        key = song.analyze("key") #estimate it...

    #Calculate the interval or distance to transpose
    #si esta en tono mayor calcula intervalo con A minor
    #print("The song is originilally in the key of {}".format(key))
    if key.mode == "minor":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("A")) #key.tonic da el tono en que está

    elif key.mode == "major":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("C")) #key.tonic da el tono en que está

    #Transpose the song

    transposed_song = song.transpose(interval)

    #print("The song has been transposed to the key of {}".format(transposed_song.analyze("key")))

    return transposed_song


def encode_song(song, time_step = 0.25):
    """method for converting a stream object into a time series sequence,
    considering a time step of 0.25 (1 semicorchea). Le time series avanzara a
    un paso de 1 semicorchea
    returns: a list with the form [60,_,_, r, 67,_, 74, _, _, 34]
    Paso 1: extraigo el symbolo (note/rest/chord)
    Paso 2: calculo su duracion (en nr de time_steps o.25)
    Paso 3: Append a la lista encoded_song symbol+ "_" se gun duracion del evento"""
    encoded_song = []
    chord_count = 0
    for event in song.flat.notesAndRests: #crea una lista de todos los elementos de la cancion (notas, rests)
        #si event es una nota guarda la nota
        if isinstance(event, m21.note.Note):
            symbol = event.pitch.midi #le asigna su equvalente de la nota en valor midi
        #Si es un acorde...
        elif isinstance(event, m21.chord.Chord):
            #current_chord = [str(event.duration.quarterLength)] #contenedor del tamaño del acorde
            # current_chord = [ ] #contenedor del acorde
            # for chord_note in event:
            #     if (chord_note.tie and chord_note.tie.type == "start") or not chord_note.tie:
            #         current_chord.append(chord_note.pitch.midi)
            # if len(current_chord) == 1: #si current chord está vacío
            #         current_chord.append("r")
            current_chord= ".".join(str(n.pitch.midi) for n in event)
            
                
            symbol = current_chord
        elif isinstance(event, m21.note.Rest):
            symbol = "r"
    
        #Calcula el nro de time_steps que dura el evento:
        nr_time_steps = int(event.duration.quarterLength / time_step)

        #Ahora voy guardando en encoded song considerando que si estoy al principio (Nr_time_step = 0)
        #append la nota/rest y para el resto "_"

        for step in range(nr_time_steps):
            if step == 0:
                encoded_song.append(symbol)
            else:
                encoded_song.append("_")

    #Convierto con map todos los caracteres de encoded_song a str
    #y luego los uno separados por un " "
    encoded_song = " ".join(map(str, encoded_song))
    return encoded_song



#Saving all the encoded songs in one file
SEQUENCE_LENGTH = 64 #nr of repetitions of "/"
SINGLE_FILE_PATH = "single_dataset" #name of the single file to be created 

def load(dataset_path):
    """Utility for reading individual songs from a directory"""
    with open(dataset_path,"r") as fp:
        song = fp.read()
        return song

def create_single_file_dataset(dataset_path, single_file_path, sequence_length):
    """Se crea un gran archivo tipo string donde se almacenan todas las canciones del dataset,
    separadas por un delimitador /
    Delimitador: simbolo "/ " repetido 64 veces, ya que asi las leen las LSTM
    args:
    dataset_path: path of the directory of individual songs (the already encoded songs)
    single_file_path: where the single file to bre created will be saved/name of the single file
    sequence_length: to be used for indicating the beginning of a new song"""
    songs = " "
    new_song_delimiter = "/ " * sequence_length #separador de canciones
    print("Creating single file sequence...")
    #Paso por todos los archivos del directorio dataset_path, load song, put delimiters 
    for path, _, files,  in os.walk(dataset_path):
        for file in files:
            file_path = os.path.join(path, file) #ubicacion exacta de la cancion
            song = load(file_path) #metodo para load la cancion
            songs = songs + song + " " + new_song_delimiter

    songs = songs[:-1] #recorto espacio que quedaria en el deliminador de la ultima cancion

    #Save the songs
    with open(single_file_path, "w") as fp:
        fp.write(songs)
    print("Single sequence file created...")
    return songs

def songs_with_chords(dataset_path):
    """Loads and converts the songs to m21 and
    returns the numbers of the songs that have chords
    arg:
    data_path: path where the files are
    return: chords(list): list of song numbers that have chords in them """
    songs = load_songs(dataset_path)#list of m21 converted songs
    with_chords= [] #list of songs with chords
    for i, song in enumerate(songs):
        if not songs_has_no_chords(song):
            with_chords.append(i)

    
    return with_chords



def preprocess(dataset_path):
    """sequence of reading, processing the midi files into encoded songs and
    saving them into a new SAVE_DIR
    args:
    dataset_path: original midi data folder
    
    """
    #Empty the save_dir directory before starts...
    [f.unlink() for f in Path(SAVE_DIR).glob("*") if f.is_file()] 

    #Load and convert songs to m21 streams...
    print("Loading songs...")
    songs = load_songs(dataset_path)
    print("Initially loaded songs:", len(songs))
    #Transpose and encode each song...
    out_songs = 0
    enc_songs = 0
    for i, song in enumerate(songs):
        # if not songs_has_no_chords(song):
        #     out_songs += 1
        #     continue #si la cancion tiene acordes  la ignora
        
        song = transpose_song(song)
        encoded_song = encode_song(song, time_step= 0.25)
        enc_songs += 1
        #Save sons as text file in SAVE_DIR 
        save_path = os.path.join(SAVE_DIR, str(i)) #saves each song with a number
        with open(save_path, "w") as fp:
            fp.write(encoded_song)
    print("Number of encoded songs:", enc_songs)

#Create a dictionary for mapping the symbols
MAPPING_JSON_NAME = "mapping.json" #archivo json que se creara con ese nombre

def create_mapping(songs, mapping_json_name):
    mappings = {}
    songs_elements = songs.split() #separa todos los elementos del archivo songs
    vocabulary = list(set(songs_elements)) #lista de los elementos unicos
    for i, symbol in enumerate(vocabulary):
        mappings[symbol] = i

    with open(mapping_json_name, "w") as fp:
        json.dump(mappings, fp, indent = 4)

    print("Mapping created...")

#Convert  the single file symbols into integers using the mapping
def convert_into_integers(single_file):
    """Takes the single file and coverts its symbols into integers using
    the mapping created"""
    int_single_file =[] #vaciamos el mapeo a una lista
    #Open the json mapping file
    with open (MAPPING_JSON_NAME, "r") as fp:
        mappings = json.load(fp)
    
    #Split of elements in single file
    single_file = single_file.split()

    #Map songs into integers
    for symbol in single_file:
        int_single_file.append(mappings[symbol])
    print("Single file converted to integers using the mapping")
    return int_single_file

#Create training sequences 
#Generating training sequences...
#las LSTM se estructuran tomando una secuencia de notas y prediciendo cual es la proxima
#Por ser supervisado, se le da una secuencia y se le muestra un target; asi se va entrenando
#Por ello tomaremos una secuencia de 64 time_steps (que equivalen a 4 compases de 4/4) como sample
#y como target le mostramos la siguiente nota o figura. Recuerda que cada time_step es una semicorchea
#Para ello las secuencias se construyen considerando que se trata de un time series, mviendose
#con un window hacia adelante
#En este caso, dado que tenemos un sequence length de 64 timesteps, si hay 100 symbols en total
#y nos movemos de a uno en la ventana, tendriamos un total de secuencias de 100 - 64

def generate_training_sequences(sequence_length):
    """Takes the integer converted sequence and creates sequences of examples and targets:
    examples: 64 elements
    target: the following element
    output: inputs and targets
    input vector shape: nr_of_songs x sequence_length x nr_of_symbols(or features)"""
    #load the songs and map them to int
    songs = load(SINGLE_FILE_PATH)
    int_song = convert_into_integers(songs)

    inputs = [] #to save the examples/sequences
    targets = []
    number_of_sequences = len(int_song) - sequence_length # cantidad de secuencias que se van a generar

    for i in range(number_of_sequences):
        inputs.append(int_song[i: i + sequence_length])
        targets.append(int_song[i + sequence_length]) #la siguiente nota/rest
    
    #Convert to one-hot encoding for creating the input vectors
    vocab_size = len(set(int_song)) #unique elements
    print("Vocab size:", vocab_size)
    print("Ahora voy a ocupar keras...")
    inputs = keras.utils.to_categorical(inputs, num_classes = vocab_size)
    #Convert targets to array
    targets = np.array(targets)
    print("Training data successfully generated")
    #Saving the data generated...
    inputs_df = pd.DataFrame(inputs)
    targets_df= pd.DataFrame(targets)
    inputs_df.to_csv("training_inputs.csv")

    targets_df.to_csv("training_targets.csv")


    return inputs, targets, vocab_size


In [None]:
#Check if songs actually have chords, because in Jazz directory should only be melody solos
if __name__=="__main__":
    chords = songs_with_chords(DATA_PATH)

Failed loading the 76 song
Failed loading the 97 song
Failed loading the 133 song
464 songs successfully loaded and converted to m21 stream objects


SubConverterException: ignored

In [None]:
chords

[3,
 5,
 20,
 21,
 23,
 38,
 40,
 43,
 55,
 66,
 90,
 96,
 103,
 107,
 112,
 120,
 125,
 135,
 140,
 142,
 150,
 174,
 204,
 208,
 209,
 223,
 226,
 240,
 256,
 295,
 302,
 320,
 324,
 349,
 390,
 403,
 405,
 425,
 428,
 445,
 450]

In [None]:
##Sequence so far
    #read the songs from directory
    #convert songs to streams
    #check acceptable durations (optional for the moment...)
    #transpose to C major/ A minor
    #Encode songs

In [None]:
##Main processing sequence

if __name__ == "__main__":
    print("Processing data begings...")
    #Load, encode and save the individual m21 songs 
    preprocess(DATA_PATH)
    #Read the encoded songs and create a single file with all songs delimited
    single_file_songs = create_single_file_dataset(SAVE_DIR, SINGLE_FILE_PATH, SEQUENCE_LENGTH)
    #Create a dictionary of symbols for mapping and save it
    create_mapping(single_file_songs, MAPPING_JSON_NAME)
    #Convert  the single file symbols into integers using the mapping
    int_song = convert_into_integers(single_file_songs) #sequence converted to integers
    #Generating training sequences
    inputs, targets, vocab_size = generate_training_sequences(SEQUENCE_LENGTH)
    


Processing data begings...
Loading songs...
Failed loading the: 76 song
Failed loading the: 97 song
Failed loading the: 133 song
464 songs successfully loaded and converted to m21 stream objects
Initially loaded songs: 464
Number of encoded songs: 464
Creating single file sequence...
Single sequence file created...
Mapping created...
Single file converted to integers using the mapping
Single file converted to integers using the mapping
Vocab size: 120
Ahora voy a ocupar keras...


In [None]:
file = m21.converter.parse(contents)

components = []

for element in file.recurse():
    components.append(element)

components

In [None]:
for note in file.flat.notesAndRests():
    print(note)

In [None]:
for element in file.flat.notesAndRests():
    print(element.duration.quarterLength)

In [None]:
songs_has_no_chords(file)

In [None]:
for event in file.flat.notesAndRests: #crea una lista de todos los elementos de la cancion (notas, rests)
        #si event es una nota guarda la nota
        if isinstance(event, m21.chord.Chord):
            current_chord = [] #contenedor del acorde
            for chord_note in event:
                if (chord_note.tie and chord_note.tie.type == "start") or not chord_note.tie:
                    current_chord.append(chord_note.pitch.midi)
            if len(current_chord) == 1: #si current chord está vacío
                    current_chord.append("r")
            
                
            symbol = current_chord
            print("Simbolo del acorde", symbol)

In [None]:
songs = "86 _ r _ 84 81 _ 75.76"
#list(set(songs))

for s in songs:
    print(s)


In [None]:
def create_mapping(songs):
    #©onvert songs to a list here!!!
    mappings = {}
    songs_elements = songs.split() #separa todos los elementos del archivo songs
    vocabulary = list(set(songs_elements)) #lista de los elementos unicos
    for i, symbol in enumerate(vocabulary):
        if isinstance(symbol, list):
            print()
        mappings[symbol] = i

    print("Mapping created...")
    return mappings

In [None]:
create_mapping(songs)

In [None]:
def create_mapping2(songs):
    mappings = {}
    #songs_elements = songs.split() #separa todos los elementos del archivo songs
    vocabulary = list(set(songs)) #lista de los elementos unicos
    for i, symbol in enumerate(vocabulary):
        if isinstance(symbol, list):
            print("Acorde!")
        mappings[symbol] = i

    print("Mapping created...")
    return mappings

In [None]:
create_mapping2(songs)

In [None]:
m21.chord.Chord([35, 57]).pitches

In [None]:
import numpy as np 
import pandas as pd
train = np.random.randn(3,2)

In [None]:
train

In [None]:
train.to_csv("trainned.csv")

In [None]:
train = pd.DataFrame(train)

In [None]:
train.to_csv("trainned.csv")

In [None]:
ls -lrt "training_inputs.csv"

In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
import tensorflow.keras as keras



In [None]:
print(keras.__version__)