In [1]:
# Import required libraries
import glob
import pickle
import numpy as np
from music21 import converter, instrument, note, chord
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Embedding, Flatten
from keras.layers import MultiHeadAttention, LayerNormalization
from keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
import tensorflow as tf
import os

def train_transformer():
    """ Trains a Transformer model to generate music """
    notes = get_notes()  # Get all notes and chords from MIDI files
    vocab_size = len(set(notes))  # Vocabulary size
    network_input, network_output = prepare_sequences(notes, vocab_size)
    model = create_transformer(network_input, vocab_size)
    train_model(model, network_input, network_output)

def get_notes():
    """ Gets all notes and chords with their durations from MIDI files """
    notes = []
    
   
    midi_path = "/kaggle/input/full-51/*.mid"  
    for file in glob.glob(midi_path):
        midi = converter.parse(file)
        print(f"Parsing {file}")

        notes_to_parse = None
        try:
            s2 = instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse()
        except:
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            duration = element.duration.quarterLength
            if duration < 0.75:
                duration_class = 'short'
            elif duration < 1.5:
                duration_class = 'medium'
            else:
                duration_class = 'long'

            if isinstance(element, note.Note):
                note_str = f"{str(element.pitch)}_{duration_class}"
                notes.append(note_str)
            elif isinstance(element, chord.Chord):
                chord_str = f"{'.'.join(str(n) for n in element.normalOrder)}_{duration_class}"
                notes.append(chord_str)

    # Save notes to Kaggle's working directory
    os.makedirs('/kaggle/working/transformer_outputs/model_notes', exist_ok=True)
    with open('/kaggle/working/transformer_outputs/model_notes/full51_notes.pkl', 'wb') as filepath:
        pickle.dump(notes, filepath)

    return notes

def prepare_sequences(notes, vocab_size):
    """ Prepares the sequences used by the Transformer """
    sequence_length = 25
    note_names = sorted(set(notes))
    note_to_int = dict((note, number) for number, note in enumerate(note_names))

    network_input = []
    network_output = []

    for i in range(len(notes) - sequence_length):
        input_sequence = notes[i:i + sequence_length]
        output_sequence = notes[i + sequence_length]
        network_input.append([note_to_int[char] for char in input_sequence])
        network_output.append(note_to_int[output_sequence])

    network_input = np.array(network_input)
    network_output = to_categorical(network_output, num_classes=vocab_size)

    return network_input, network_output

def create_transformer(network_input, vocab_size):
    """ Creates the Transformer model structure """
    d_model = 256  # Embedding dimension
    num_heads = 8  # Number of attention heads
    dff = 512      # Feed-forward layer dimension

    input_length = network_input.shape[1]
    input_length = int(input_length)

    print(f"input_length: {input_length}")
    print(f"type(input_length): {type(input_length)}")

    inputs = Input(shape=(input_length,))
    embedding = Embedding(input_dim=vocab_size, output_dim=d_model)(inputs)

    # Positional Embedding
    positions = tf.range(start=0, limit=input_length, delta=1)
    positions = positions[tf.newaxis, :]
    position_embedding_layer = Embedding(input_dim=input_length, output_dim=d_model)
    position_embeddings = position_embedding_layer(positions)

    x = embedding + position_embeddings

    # Transformer block
    attn_output = MultiHeadAttention(num_heads=num_heads, key_dim=d_model)(x, x)
    attn_output = Dropout(0.1)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(x + attn_output)

    ffn_output = Dense(dff, activation='relu')(out1)
    ffn_output = Dense(d_model)(ffn_output)
    ffn_output = Dropout(0.1)(ffn_output)
    out2 = LayerNormalization(epsilon=1e-6)(out1 + ffn_output)

    out_flat = Flatten()(out2)
    outputs = Dense(vocab_size, activation='softmax')(out_flat)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy')

    return model

def train_model(model, network_input, network_output):
    """ Trains the Transformer model """
    # Save weights every epoch in Kaggle's working directory
    os.makedirs('/kaggle/working/transformer_outputs/weights', exist_ok=True)
    filepath = "/kaggle/working/transformer_outputs/weights/weights_transformer-full_51-epoch{epoch:02d}-loss{loss:.4f}.keras"

    checkpoint = ModelCheckpoint(
        filepath,
        monitor='loss',
        verbose=1,
        save_best_only=False,  # Save every epoch
        save_weights_only=False,  # Save full model
        mode='min'
    )
    callbacks_list = [checkpoint]

    model.fit(network_input, network_output, epochs=100, batch_size=64, callbacks=callbacks_list)

if __name__ == '__main__':
    train_transformer()

2025-05-19 20:45:19.115856: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747687519.358738      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747687519.427894      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Parsing /kaggle/input/full-51/burg_geschwindigkeit.mid
Parsing /kaggle/input/full-51/br_im2.mid
Parsing /kaggle/input/full-51/alb_se2.mid
Parsing /kaggle/input/full-51/muss_7.mid
Parsing /kaggle/input/full-51/mendel_op62_3.mid
Parsing /kaggle/input/full-51/mendel_op19_6.mid
Parsing /kaggle/input/full-51/scn16_4.mid
Parsing /kaggle/input/full-51/schubert_D850_4.mid
Parsing /kaggle/input/full-51/elise.mid
Parsing /kaggle/input/full-51/mz_570_2.mid
Parsing /kaggle/input/full-51/burg_perlen.mid
Parsing /kaggle/input/full-51/chpn_op10_e05.mid
Parsing /kaggle/input/full-51/mendel_op30_4.mid
Parsing /kaggle/input/full-51/chpn-p11.mid
Parsing /kaggle/input/full-51/schub_d760_4.mid
Parsing /kaggle/input/full-51/liz_et_trans8.mid
Parsing /kaggle/input/full-51/brahms_opus1_4.mid
Parsing /kaggle/input/full-51/chpn-p18.mid
Parsing /kaggle/input/full-51/liz_et4.mid
Parsing /kaggle/input/full-51/scn16_2.mid
Parsing /kaggle/input/full-51/alb_esp4.mid
Parsing /kaggle/input/full-51/brahms_opus1_1.mid
Pa

I0000 00:00:1747687704.156548      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Epoch 1/100


I0000 00:00:1747687711.860030      95 service.cc:148] XLA service 0x7dc7c8014250 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1747687711.860780      95 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1747687712.347372      95 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  16/1086[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m11s[0m 11ms/step - loss: 8.8060

I0000 00:00:1747687715.368400      95 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1086/1086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 5.6727
Epoch 1: saving model to /kaggle/working/transformer_outputs/weights/weights_transformer-full_51-epoch01-loss4.9646.keras
[1m1086/1086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 13ms/step - loss: 5.6720
Epoch 2/100
[1m1081/1086[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 10ms/step - loss: 4.0520
Epoch 2: saving model to /kaggle/working/transformer_outputs/weights/weights_transformer-full_51-epoch02-loss3.9340.keras
[1m1086/1086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 11ms/step - loss: 4.0514
Epoch 3/100
[1m1081/1086[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 10ms/step - loss: 3.1424
Epoch 3: saving model to /kaggle/working/transformer_outputs/weights/weights_transformer-full_51-epoch03-loss3.1074.keras
[1m1086/1086[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 11ms/step - loss: 3.1422
Epoch 4/100
[1m1081/1086[0m [32m━━━━━━━━━━━━━━