In [1]:
#Data preprocessing tutorial implementation
#Url: https://youtu.be/coEgwnMBuo0

import music21 as m21
import os

In [2]:
"""Steps
1. Load data
2. Filter songs with no acceptable duration
3. Transpose songs yo cmaj/Amin
4. Encode songs with music time series representation
5. save songs to text file"""

'Steps\n1. Load data\n2. Filter songs with no acceptable duration\n3. Transpose songs yo cmaj/Amin\n4. Encode songs with music time series representation\n5. save songs to text file'

In [3]:
#Path: C:\Users\malfaro\Desktop\mae_code\SoundGeneration


In [16]:
#1. Data loading
DATASET_PATH = r"/Users/mauricioalfaro/Documents/mae_code/SoundGeneration/data/essen/europa/deutschl/test/"



#Go through all the .kern files and load them together using m21
def load_songs_in_kern(dataset_path):
   songs = []
   for path, subdirs, files in os.walk(dataset_path):
        for file in files:
           if file[-3:] == "krn":
               song = m21.converter.parse(os.path.join(path, file))#convertir a objeto de music21
               songs.append(song)

   return songs

def preprocess(dataset_path):
    print("Loading songs...")
    songs = load_songs_in_kern(dataset_path)
    print(f"Loaded {len(songs)} songs!")
              
              
#2. Filter by acceptable duration

ACCEPTABLE_DURATIONS = [0.25, 0.5, 0.75, 1, 1.5, 2, 3, 4]

def has_acceptable_duration(song, acceptable_durations):
    """Boolean method for checking if the songs complies with duration.
    Se considera como referncia una negra (quarter length)
    redonda = whole note = 4
    blanca = half note = 2
    blanca con punto = 3
    negra = quarter note = 1
    negra con punto = 1.5
    corchea = eigth note = 0.5
    corchea con punto = 0.75
    semicorchea = sixteenth note = 0.25
    """
    for note in song.flat.notesAndRests: 
        #flat toma todos los objetos de la cancion, los convierte en lista
        #notesAndRests deja solo las notas y silencios, excluyendo claves, simolos, etc
        if note.duration.quarterLength not in acceptable_durations:
            return False
        
        return True
    
def transpose(song):
    """
    - Detect the key or estimate it using music21
    - get the interval or distance necessary to transpose to Cmaj/Amin
    - transpose using m21 if necessary"""
    #Get the song key
    #usually the key is in the first measure of the song
    parts = song.getElementsByClass(m21.stream.Part) # extracts the parts adnd extracts all the elements by part 
    #go to the first part and take all the measures in part 0 
    measures_part0 = parts[0].getElementsByClass(m21.stream.Measure)
    key = measures_part0[0][4] #tomo la primera parte de measures y extraigo de esa lista el elemento 4 que es key
    
    #In case the key is not in the song we use m21 to estimate it
    
    if not isinstance(key, m21.key.Key):#if the song doesnt have a key stored
        key = song.analyze("key") #estimate it...
    #Now transpose to cmaj or A minor depending on the mode of the song...
    if key.mode == "major":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("C")) #calculates the interval
        
    elif key.mode == "minor":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("A ")) #calculates the interval
    
    print("Original key:", key)
    #transpose de song
    transposed_song = song.transpose(interval)
    
    return transposed_song


#4. Encode songs with music time series representation

def encode_song(song, time_step = 0.25):
    """takes a song as a music21 object
    and returns a string in which the song has
    been encoded into a time series music representation
    Example:
    a note of pitch 60 that lasts one bar would be encoded
    as: [60,"_", "_", "_"]
    Time_step = 0.25 significa que nos vamos moviendo en semicorcheas por
    toda la canción"""
    
    for event in song.flat.notesAndRests:#flat crea una lista de todos los elementos de la cancion
        """un event es una nota o rest. Por ejemplo: la canción empieza con 
        una nota larga de pitch 60 que dura 4 tiempos (un compás)"""
        encoded_song = []
        #pueden ser notes or rests
        #if note ---> guardar la nota
        if isinstance(event, m21.note.Note):#si el evento es una nota
            symbol = event.pitch.midi #guarda la nota como midi (60 en este caso)
        #if rest---> guardar como string "r"
        if isinstance(event, m21.note.Rest):
            symbol = "r"
    #ahora convierte todo a time series music notation. El evento del ejemplo
    #quedaria como [60,"_", "_", "_"] steps es en nro de timesteps que dura el evento. 
    #Para calcularlo tomo la duracion del evento en negras y la divido por time_step"""
        steps = int(event.duration.quarterLength / time_step)
        
        #tomo e evento dividido en steps y si estoy al comnienzo guardo el simbolo, si no 
        #guardo "_", ya que siempre va a ser así
        for step in range(steps):
            if step == 0:
                encoded_song.append(symbol)
            else:
                encoded_song.append("_")
            
    #cast the encoded song into a string
    #convierto con map todos los caracteres de encoded_song a str
    #y luego los uno separados por un " "
    encoded_song = " ".join(map(str, encoded_song))
    return encoded_song

    
    

SAVE_DIR = r"/Users/mauricioalfaro/Documents/mae_code/SoundGeneration/dataset" 




#nombre del directorio donde quedaran las canciones


    
def preprocess(dataset_path):
    print("Loading songs...")
    songs = load_songs_in_kern(dataset_path)
    print(f"Loaded {len(songs)} songs!")
    #Filter by duration
    for i, song in enumerate(songs):
        if not has_acceptable_duration(song, ACCEPTABLE_DURATIONS):
            continue #si la cancion no cumple la ignora
        #Transpose song
        song = transpose(song)
        # Encode songs with music time series representation
        encoded_song = encode_song(song)
        
        #5. save songs to text file  
        save_path = os.path.join(SAVE_DIR, str(i)) #guarda cada cancion con un nro en el dir "dataset"
        with open(save_path, "w") as fp:
            fp.write(encoded_song)
    

if __name__ == "__main__":
    songs = load_songs_in_kern(DATASET_PATH)
    print(f"Loaded {len(songs)} songs!")
    song = songs[0]
    print(f"Has acceptable duration? {has_acceptable_duration(song, ACCEPTABLE_DURATIONS)}")
    
    transposed_song = transpose(song)
    #transposed_song.show()
    print(os.getcwd())
    preprocess(DATASET_PATH)


Loaded 12 songs!
Has acceptable duration? True
Original key: g minor
/Users/mauricioalfaro/Documents/mae_code/SoundGeneration
Loading songs...
Loaded 12 songs!
Original key: g minor


FileNotFoundError: [Errno 2] No such file or directory: '/Users/mauricioalfaro/Documents/mae_code/SoundGeneration/dataset/0'

In [None]:
songs = load_songs_in_kern(DATASET_PATH)
print(f"Loaded {len(songs)} songs!")
song = songs[0]

encoded = encode_song(song)
encoded

In [None]:
#Ver los archivos de un directorio con os
files = os.listdir(DATASET_PATH)
for f in files:
    print(f)

In [None]:
#Ver ruta actual
os.getcwd()

In [None]:
songs = load_songs_in_kern(DATASET_PATH)

In [None]:
song = songs[0]
parts = song.getElementsByClass(m21.stream.Part)

In [None]:
parts

In [None]:
    """"ahora convierte todo a time series music notation. El evento del ejemplo
    quedaria como [60,"_", "_", "_"] steps es en nro de timesteps que dura el evento"""
    
    """Tomo la duracion del evento en negras y la divido por time_step"""