In [9]:
import numpy as np
import pandas as pd
import joblib
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Attention, Concatenate, Flatten, Conv1D, MaxPooling1D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
from midiutil import MIDIFile
import pickle

In [10]:
def note_to_midi(note, octave):
    # MIDI note numbers for the notes in octave 0
    note_map = {
        'c': 0, 'c#': 1, 'd': 2, 'd#': 3, 'e': 4, 'f': 5, 'f#': 6,
        'g': 7, 'g#': 8, 'a': 9, 'a#': 10, 'b': 11
    }

    # Convert note to lowercase to handle both upper and lower case inputs
    note = note.lower()

    # Calculate the MIDI number
    midi_number = (octave + 1) * 12 + note_map[note]

    return midi_number

def melody_to_midi(melody, rhythm_pattern, velocity_pattern, filename):
    midi = MIDIFile(1)
    midi.addTempo(0, 0, 120)

    for i, note in enumerate(melody):
        pitch_class = note[:-1]
        octave = note[-1]
        pitch = note_to_midi(pitch_class.lower(), int(octave))
        duration = rhythm_pattern[i % len(rhythm_pattern)]
        velocity = velocity_pattern[i % len(velocity_pattern)]
        midi.addNote(0, 0, pitch, i, duration, velocity)  # Add note with duration of 1

    with open(filename, 'wb') as output_file:
        midi.writeFile(output_file)

In [11]:
import os
os.getcwd()
os.chdir("/home/jovyan/workspace/cantus_ai/")

In [12]:
MODEL = "antiphon"

In [13]:
tb_chants = pd.read_csv(f'output/{MODEL}_melodies.csv')
tb_chants = tb_chants[tb_chants['mode'].isin([str(x) for x in range(1, 9)])].dropna()
unique_ids = tb_chants.groupby('id')['notes'].apply(list).reset_index().drop_duplicates(subset = 'notes')['id'].tolist()
tb_chants = tb_chants[tb_chants['id'].isin(unique_ids)]
test_set = np.random.choice(unique_ids, 100, replace = False)
train_data = tb_chants[~tb_chants['id'].isin(test_set)]
test_data = tb_chants[tb_chants['id'].isin(test_set)]
melodies = train_data.groupby('id')['notes'].apply(list).tolist()
modes = train_data.groupby('id')['mode'].first().to_list()

In [14]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, Concatenate, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler

# Create a vocabulary and mode dictionary
vocab = sorted(set([note for melody in melodies for note in melody]))
vocab_dict = {note: i + 1 for i, note in enumerate(vocab)}  # Start indexing from 1 for padding
vocab_size = len(vocab_dict) + 1  # +1 for padding

mode_dict = {mode: i + 1 for i, mode in enumerate(sorted(set(modes)))}
mode_vocab_size = len(mode_dict) + 1

# Encode melodies and modes using dictionaries
encoded_melodies = [[vocab_dict[note] for note in melody] for melody in melodies]
encoded_modes = [mode_dict[mode] for mode in modes]

# Create position input indicating how many elements are left until the end of the series
positions = [[len(melody) - idx - 1 for idx in range(len(melody))] for melody in encoded_melodies]

# Prepare data for the model
X_series = []
X_modes = []
X_positions = []
y = []

for melody, mode, position in zip(encoded_melodies, encoded_modes, positions):
    for i in range(1, len(melody)):
        X_series.append(melody[:i])
        X_modes.append([mode])
        X_positions.append([position[i-1]])
        y.append(melody[i])

# Pad sequences to have the same length
X_series = tf.keras.preprocessing.sequence.pad_sequences(X_series, padding='pre')

# Convert to numpy arrays
X_series = np.array(X_series)
X_modes = np.array(X_modes)
X_positions = np.array(X_positions)
y = np.array(y)

In [15]:
# Define cosine decay function for learning rate
def cosine_decay(epoch, initial_lr):
    cosine_decay = 0.5 * (1 + np.cos(np.pi * epoch / epochs))
    return initial_lr * cosine_decay

# Parameters
embedding_dim = 24
lstm_units = 64
mode_embedding_dim = 6
dense_units = 64
dropout_rate = 0.2
initial_lr = 0.001
epochs = 50

# Input layers
series_input = Input(shape=(None,), name='series_input')
mode_input = Input(shape=(1,), name='mode_input')
position_input = Input(shape=(1,), name='position_input')

# Embedding layers
series_embedding = Embedding(input_dim=vocab_size, output_dim=embedding_dim, name='series_embedding')(series_input)
mode_embedding = Embedding(input_dim=mode_vocab_size, output_dim=mode_embedding_dim, name='mode_embedding')(mode_input)

# CNN Layer
cnn1 = Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(series_embedding)
pool1 = MaxPooling1D(pool_size=2)(cnn1)

# LSTM layer
lstm1 = LSTM(units=lstm_units, name='lstm1', dropout = 0, recurrent_dropout = 0)(pool1)
lstm2 = LSTM(units=lstm_units, name='lstm2', dropout = 0, recurrent_dropout = 0)(series_embedding)

# Concatenate embeddings and position input
concatenated = Concatenate(name='concat_layer')([lstm1,lstm2 , tf.squeeze(mode_embedding, axis=1), position_input])

# Dense layers with dropout and batch normalization
dense1 = Dense(units=dense_units, activation='relu', name='dense1')(concatenated)
dropout1 = Dropout(rate=dropout_rate, name='dropout1')(dense1)
batch_norm1 = BatchNormalization(name='batch_norm1')(dropout1)

dense2 = Dense(units=dense_units, activation='relu', name='dense2')(batch_norm1)
dropout2 = Dropout(rate=dropout_rate, name='dropout2')(dense2)
batch_norm2 = BatchNormalization(name='batch_norm2')(dropout2)


dense3 = Dense(units=dense_units, activation='relu', name='dense3')(batch_norm2)
dropout3 = Dropout(rate=dropout_rate, name='dropout3')(dense3)
batch_norm3 = BatchNormalization(name='batch_norm3')(dropout3)

# Output layer
output = Dense(units=vocab_size, activation='softmax', name='output')(batch_norm3)

# Model
model = Model(inputs=[series_input, mode_input, position_input], outputs=output, name='LSTM_Model')

# Compile model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=initial_lr), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_scheduler = LearningRateScheduler(schedule=lambda epoch: cosine_decay(epoch, initial_lr))

# Summary of the model
model.summary()

Model: "LSTM_Model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 series_input (InputLayer)   [(None, None)]               0         []                            
                                                                                                  
 series_embedding (Embeddin  (None, None, 24)             672       ['series_input[0][0]']        
 g)                                                                                               
                                                                                                  
 conv1d_1 (Conv1D)           (None, None, 32)             2336      ['series_embedding[0][0]']    
                                                                                                  
 mode_input (InputLayer)     [(None, 1)]                  0         []                   

In [16]:
# Fit model
model.fit([X_series, X_modes, X_positions], y,
          validation_split=0.2, epochs=epochs, batch_size=64,
          callbacks=[early_stopping, lr_scheduler])


Epoch 1/50


2024-05-29 10:14:14.076362: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 417706400 exceeds 10% of free system memory.




2024-05-29 10:16:40.206318: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 104426600 exceeds 10% of free system memory.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50


<keras.src.callbacks.History at 0x7fc5c53adc30>

In [18]:
# Save the model
model.save(f'models/{MODEL}/melody_prediction_model_with_notes_until_end.h5')

# Save the dictionaries
with open(f'models/{MODEL}/pitch_encoder.pkl', 'wb') as f:
    pickle.dump(vocab_dict, f)

with open(f'models/{MODEL}/mode_encoder.pkl', 'wb') as f:
    pickle.dump(mode_dict, f)