In [None]:
import zipfile
import os

# Path to the uploaded ZIP file
zip_path = '/content/deutschl.zip'

# Extract the contents
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall('/content/dataset')

# Verify contents
os.listdir('/content/dataset')


['__MACOSX', 'deutschl']

preprocessing

In [None]:
import os
import json
import music21 as m21
import numpy as np
import tensorflow.keras as keras

KERN_DATASET_PATH = "/content/dataset/deutschl"  
SAVE_DIR = "/content/dataset/processed_dataset"  
SINGLE_FILE_DATASET = "/content/dataset/file_dataset"  
MAPPING_PATH = "/content/dataset/mapping.json" 
SEQUENCE_LENGTH = 64  

# Durations acceptées (en quarts de temps)
ACCEPTABLE_DURATIONS = [
    0.25,  # 16ème de note
    0.5,   # 8ème de note
    0.75,
    1.0,   # Noire
    1.5,
    2,     # Blanche
    3,
    4      # Ronde
]

def load_songs_in_kern(dataset_path):
    songs = []
    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(".krn"):
                file_path = os.path.join(root, file)
                try:
                    song = m21.converter.parse(file_path)
                    songs.append(song)
                except Exception as e:
                    print(f"Erreur lors du chargement de {file_path}: {e}")
    return songs


def has_acceptable_durations(song, acceptable_durations):
    for note in song.flat.notesAndRests:
        if note.duration.quarterLength not in acceptable_durations:
            return False
    return True


def transpose(song):
    key = song.analyze("key")
    if key.mode == "major":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("C"))
    elif key.mode == "minor":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("A"))
    else:
        return song  # Retourne la chanson sans transposition si la clé est indéterminée
    return song.transpose(interval)


def encode_song(song, time_step=0.25):
    encoded_song = []
    for event in song.flat.notesAndRests:
        if isinstance(event, m21.note.Note):
            symbol = event.pitch.midi  # Note MIDI
        elif isinstance(event, m21.note.Rest):
            symbol = "r"  # Silence
        else:
            continue

        steps = int(event.duration.quarterLength / time_step)
        for step in range(steps):
            if step == 0:
                encoded_song.append(symbol)
            else:
                encoded_song.append("_")
    return " ".join(map(str, encoded_song))


def preprocess(dataset_path, save_dir):
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    songs = load_songs_in_kern(dataset_path)
    print(f"Nombre total de chansons chargées : {len(songs)}")

    for i, song in enumerate(songs):
        if not has_acceptable_durations(song, ACCEPTABLE_DURATIONS):
            continue

        song = transpose(song)
        encoded_song = encode_song(song)

        # Sauvegarder la chanson prétraitée dans un fichier texte
        file_name = f"song_{i}.txt"
        save_path = os.path.join(save_dir, file_name)
        with open(save_path, "w") as fp:
            fp.write(encoded_song)

        if i % 10 == 0:
            print(f"{i} chansons prétraitées sur {len(songs)}")

def load(file_path):
    with open(file_path, "r") as fp:
        song = fp.read()
    return song


def create_single_file_dataset(dataset_path, file_dataset_path, sequence_length):
    new_song_delimiter = "/ " * sequence_length
    songs = ""

    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(root, file)
                song = load(file_path)
                songs += song + " " + new_song_delimiter

    # Retirer le dernier espace vide
    songs = songs.strip()

    with open(file_dataset_path, "w") as fp:
        fp.write(songs)
    return songs

def create_mapping(songs, mapping_path):
    mappings = {}

    # identify the vocabulary
    songs = songs.split()
    vocabulary = list(set(songs))

    # create mappings
    for i, symbol in enumerate(vocabulary):
        mappings[symbol] = i

    # save voabulary to a json file
    with open(mapping_path, "w") as fp:
        json.dump(mappings, fp, indent=4)


def convert_songs_to_int(songs):
    int_songs = []

    # load mappings
    with open(MAPPING_PATH, "r") as fp:
        mappings = json.load(fp)

    # transform songs string to list
    songs = songs.split()

    # map songs to int
    for symbol in songs:
        int_songs.append(mappings[symbol])

    return int_songs


def generate_training_sequences(sequence_length):

    # load songs and map them to int
    songs = load(SINGLE_FILE_DATASET)
    int_songs = convert_songs_to_int(songs)

    inputs = []
    targets = []

    # generate the training sequences
    num_sequences = len(int_songs) - sequence_length
    for i in range(num_sequences):
        inputs.append(int_songs[i:i+sequence_length])
        targets.append(int_songs[i+sequence_length])

    # one-hot encode the sequences
    vocabulary_size = len(set(int_songs))
    # inputs size: (# of sequences, sequence length, vocabulary size)
    inputs = keras.utils.to_categorical(inputs, num_classes=vocabulary_size)
    targets = np.array(targets)

    return inputs, targets

def main():
  preprocess(KERN_DATASET_PATH, SAVE_DIR)
  print("Prétraitement terminé et dataset enregistré !")
  songs = create_single_file_dataset(SAVE_DIR, SINGLE_FILE_DATASET, SEQUENCE_LENGTH)
  create_mapping(songs, MAPPING_PATH)
  inputs, targets = generate_training_sequences(SEQUENCE_LENGTH)

if __name__ == "__main__":
    main()






Nombre total de chansons chargées : 628
0 chansons prétraitées sur 628


  return self.iter().getElementsByClass(classFilterList)


10 chansons prétraitées sur 628
20 chansons prétraitées sur 628
30 chansons prétraitées sur 628
40 chansons prétraitées sur 628
50 chansons prétraitées sur 628
60 chansons prétraitées sur 628
70 chansons prétraitées sur 628
80 chansons prétraitées sur 628
90 chansons prétraitées sur 628
100 chansons prétraitées sur 628
110 chansons prétraitées sur 628
120 chansons prétraitées sur 628
130 chansons prétraitées sur 628
140 chansons prétraitées sur 628
150 chansons prétraitées sur 628
160 chansons prétraitées sur 628
170 chansons prétraitées sur 628
180 chansons prétraitées sur 628
190 chansons prétraitées sur 628
200 chansons prétraitées sur 628
210 chansons prétraitées sur 628
230 chansons prétraitées sur 628
240 chansons prétraitées sur 628
250 chansons prétraitées sur 628
260 chansons prétraitées sur 628
270 chansons prétraitées sur 628
280 chansons prétraitées sur 628
290 chansons prétraitées sur 628
300 chansons prétraitées sur 628
310 chansons prétraitées sur 628
320 chansons prétra

training

In [None]:
import os
import json
import music21 as m21
import numpy as np
import tensorflow.keras as keras

KERN_DATASET_PATH = "/content/dataset/deutschl"  
SAVE_DIR = "/content/dataset/processed_dataset"  
SINGLE_FILE_DATASET = "/content/dataset/file_dataset"  
MAPPING_PATH = "/content/dataset/mapping.json"  
SEQUENCE_LENGTH = 64  

OUTPUT_UNITS = 38  
NUM_UNITS = [256]  
LOSS = "sparse_categorical_crossentropy"  
LEARNING_RATE = 0.001
EPOCHS = 50
BATCH_SIZE = 64
SAVE_MODEL_PATH = "/content/model.h5"

def load(file_path):
    """Load an encoded song from a file."""
    with open(file_path, "r") as fp:
        song = fp.read()
    return song

def convert_songs_to_int(songs):
    int_songs = []

    # Load mappings
    with open(MAPPING_PATH, "r") as fp:
        mappings = json.load(fp)

    # Transform song string to list
    songs = songs.split()

    # Map songs to integers
    for symbol in songs:
        int_songs.append(mappings[symbol])

    return int_songs

def generate_training_sequences(sequence_length):
    # Load songs and map them to integers
    songs = load(SINGLE_FILE_DATASET)
    int_songs = convert_songs_to_int(songs)

    inputs = []
    targets = []

    # Generate the training sequences
    num_sequences = len(int_songs) - sequence_length
    for i in range(num_sequences):
        inputs.append(int_songs[i:i + sequence_length])
        targets.append(int_songs[i + sequence_length])

    # One-hot encode the inputs
    vocabulary_size = len(set(int_songs))
    inputs = keras.utils.to_categorical(inputs, num_classes=vocabulary_size)  # Shape: (num_sequences, sequence_length, vocabulary_size)
    targets = np.array(targets)  # Shape: (num_sequences,)

    return inputs, targets, vocabulary_size

def build_model(sequence_length, vocabulary_size, num_units, loss, learning_rate):
    # Create the model architecture
    input = keras.layers.Input(shape=(sequence_length, vocabulary_size))
    x = keras.layers.LSTM(num_units[0], return_sequences=False)(input)
    x = keras.layers.Dropout(0.2)(x)
    output = keras.layers.Dense(vocabulary_size, activation="softmax")(x)

    model = keras.Model(input, output)

    # Compile the model
    model.compile(loss=loss,
                  optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                  metrics=["accuracy"])

    model.summary()

    return model

def train(sequence_length=SEQUENCE_LENGTH, num_units=NUM_UNITS, loss=LOSS, learning_rate=LEARNING_RATE):
    # Generate the training sequences
    inputs, targets, vocabulary_size = generate_training_sequences(sequence_length)

    # Build the model
    model = build_model(sequence_length, vocabulary_size, num_units, loss, learning_rate)

    # Train the model
    model.fit(inputs, targets, epochs=EPOCHS, batch_size=BATCH_SIZE)

    # Save the model
    model.save(SAVE_MODEL_PATH)

if __name__ == "__main__":
    train()


Epoch 1/50
[1m2070/2070[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 7ms/step - accuracy: 0.7227 - loss: 1.0838
Epoch 2/50
[1m2070/2070[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 7ms/step - accuracy: 0.7695 - loss: 0.7144
Epoch 3/50
[1m2070/2070[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 7ms/step - accuracy: 0.7878 - loss: 0.6627
Epoch 4/50
[1m2070/2070[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.7959 - loss: 0.6367
Epoch 5/50
[1m2070/2070[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 7ms/step - accuracy: 0.7998 - loss: 0.6209
Epoch 6/50
[1m2070/2070[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 7ms/step - accuracy: 0.8062 - loss: 0.5985
Epoch 7/50
[1m2070/2070[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 8ms/step - accuracy: 0.8090 - loss: 0.5868
Epoch 8/50
[1m2070/2070[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - accuracy: 0.8150 - loss: 0.5689
Epoch 9/50
[1m2



melodie generation

In [None]:
import json
import numpy as np
import tensorflow.keras as keras
import music21 as m21

KERN_DATASET_PATH = "/content/dataset/deutschl"  
SAVE_DIR = "/content/dataset/processed_dataset"  
SINGLE_FILE_DATASET = "/content/dataset/file_dataset"  
MAPPING_PATH = "/content/dataset/mapping.json"  
SEQUENCE_LENGTH = 64 

class MelodyGenerator:

    def __init__(self, model_path="model.h5"):

        self.model_path = model_path
        self.model = keras.models.load_model(model_path)

        with open(MAPPING_PATH, "r") as fp:
            self._mappings = json.load(fp)

        self._start_symbols = ["/"] * SEQUENCE_LENGTH


    def generate_melody(self, seed, num_steps, max_sequence_length, temperature):
        # create seed with start symbols
        seed = seed.split()
        melody = seed
        seed = self._start_symbols + seed

        # map seed to int
        seed = [self._mappings[symbol] for symbol in seed]

        for _ in range(num_steps):

            # limit the seed to max_sequence_length
            seed = seed[-max_sequence_length:]

            # one-hot encode the seed
            onehot_seed = keras.utils.to_categorical(seed, num_classes=len(self._mappings))
            # (1, max_sequence_length, num of symbols in the vocabulary)
            onehot_seed = onehot_seed[np.newaxis, ...]

            # make a prediction
            probabilities = self.model.predict(onehot_seed)[0]
            # [0.1, 0.2, 0.1, 0.6] -> 1
            output_int = self._sample_with_temperature(probabilities, temperature)

            # update seed
            seed.append(output_int)

            # map int to our encoding
            output_symbol = [k for k, v in self._mappings.items() if v == output_int][0]

            # check whether we're at the end of a melody
            if output_symbol == "/":
                break

            # update melody
            melody.append(output_symbol)

        return melody


    def _sample_with_temperature(self, probabilites, temperature):
        predictions = np.log(probabilites) / temperature
        probabilites = np.exp(predictions) / np.sum(np.exp(predictions))

        choices = range(len(probabilites)) # [0, 1, 2, 3]
        index = np.random.choice(choices, p=probabilites)

        return index


    def save_melody(self, melody, step_duration=0.25, format="midi", file_name="mel.mid"):
        # create a music21 stream
        stream = m21.stream.Stream()

        start_symbol = None
        step_counter = 1

        # parse all the symbols in the melody and create note/rest objects
        for i, symbol in enumerate(melody):

            # handle case in which we have a note/rest
            if symbol != "_" or i + 1 == len(melody):

                # ensure we're dealing with note/rest beyond the first one
                if start_symbol is not None:

                    quarter_length_duration = step_duration * step_counter # 0.25 * 4 = 1

                    # handle rest
                    if start_symbol == "r":
                        m21_event = m21.note.Rest(quarterLength=quarter_length_duration)

                    # handle note
                    else:
                        m21_event = m21.note.Note(int(start_symbol), quarterLength=quarter_length_duration)

                    stream.append(m21_event)

                    # reset the step counter
                    step_counter = 1

                start_symbol = symbol

            # handle case in which we have a prolongation sign "_"
            else:
                step_counter += 1

        # write the m21 stream to a midi file
        stream.write(format, file_name)


if __name__ == "__main__":
    mg = MelodyGenerator()
    seed = "67 _ 64 _ 67 _ _ 65 64 _ 64 _ 64 _ _"
    seed2 = "67 _ _ _ _ _ 65 _ 64 _ 62 _ 60 _ _ _"
    melody = mg.generate_melody(seed, 500, SEQUENCE_LENGTH, 0.3)
    print(melody)
    mg.save_melody(melody)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 91ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15