In [4]:
# Global Imports
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences

from keras.models import Sequential
from keras.layers import LSTM, Dense, Embedding, Flatten, Dropout, Input
from keras.utils import to_categorical
from tensorflow.keras.models import Model


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder

from keras.optimizers import Adam


import joblib


In [5]:
# Load csv data
data = pd.read_csv('midi_training.csv')

data.head()

Unnamed: 0,Onset_Beats,Duration_Beats,Midi_Channel,Midi_Pitch,Velocity,Onset_Sec,Duration_Sec,Composer,Piece
0,1.0,1.0,2,59,127,0.0,0.521739,abel,sonata_e_minor_1_(nc)werths
1,2.0,2.0,2,67,127,0.521739,1.043478,abel,sonata_e_minor_1_(nc)werths
2,4.0,0.5,2,66,127,1.565217,0.260869,abel,sonata_e_minor_1_(nc)werths
3,4.5,0.483333,2,64,127,1.826086,0.252174,abel,sonata_e_minor_1_(nc)werths
4,5.0,0.333333,2,63,127,2.086956,0.173913,abel,sonata_e_minor_1_(nc)werths


In [6]:
# Features are everything except 'Midi_Pitch', 'Composer', and 'Piece'
X = data.drop(['Midi_Pitch', 'Composer', 'Piece'], axis=1).values

# Target variables: 'Midi_Pitch', 'Composer', and 'Piece' are categorical
y_continuous = data[['Onset_Beats', 'Duration_Beats', 'Midi_Channel', 'Velocity', 'Onset_Sec', 'Duration_Sec']].values
y_midi_pitch = data['Midi_Pitch'].values
y_composer = data['Composer'].values
y_piece = data['Piece'].values

In [7]:
# Check the unique values for Midi_Pitch, Composer, Piece
print("Unique Midi_Pitch values:", np.unique(data['Midi_Pitch']))
print("Unique Composer values:", np.unique(data['Composer']))
print("Unique Piece values:", np.unique(data['Piece']))

Unique Midi_Pitch values: [38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84]
Unique Composer values: ['abel' 'albrechtsberger' 'lasso' 'lidon']
Unique Piece values: [' duet 3f (gangi)' 'discendite_a_me_(mccloskey)'
 'expandi_manus_(mccloskey)' 'intellectum_tibi_(mccloskey)'
 'oculus_non_vidit_(mccloskey)' 'quartet_in_concert_form_2-3_(c)pajares'
 'quartet_in_concert_from_1_(c)pajares' 'sonata_e_minor_1_(nc)werths']


In [8]:
# Step 3: Normalize the features (except for categorical ones)
scaler = MinMaxScaler()
X = scaler.fit_transform(X)


In [9]:
# Step 4: Encode categorical features ('Midi_Pitch', 'Composer', and 'Piece')
encoder_midi_pitch = LabelEncoder()
encoder_composer = LabelEncoder()
encoder_piece = LabelEncoder()

In [10]:
y_midi_pitch = encoder_midi_pitch.fit_transform(y_midi_pitch)
y_composer = encoder_composer.fit_transform(y_composer)
y_piece = encoder_piece.fit_transform(y_piece)

In [11]:
print("Encoded Midi_Pitch values:", np.unique(y_midi_pitch))
print("Encoded Composer values:", np.unique(y_composer))
print("Encoded Piece values:", np.unique(y_piece))

Encoded Midi_Pitch values: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46]
Encoded Composer values: [0 1 2 3]
Encoded Piece values: [0 1 2 3 4 5 6 7]


In [12]:
# Step 5: Create sequences for the model
def create_sequences(data, sequence_length):
    sequences = []
    for i in range(len(data) - sequence_length):
        sequences.append(data[i:i + sequence_length])
    return np.array(sequences)

def create_target_sequences(data, sequence_length):
    target_sequences = []
    for i in range(len(data) - sequence_length):
        target_sequences.append(data[i + sequence_length])  # Shift by one step
    return np.array(target_sequences)

In [13]:
sequence_length = 10  # The length of each sequence

# Creating sequences for X (input data)
X_seq = create_sequences(X, sequence_length)

# Creating target sequences for continuous variables
y_continuous_seq = create_target_sequences(y_continuous, sequence_length)

In [14]:
# Creating target sequences for categorical variables
y_midi_pitch_seq = create_target_sequences(y_midi_pitch, sequence_length)
y_composer_seq = create_target_sequences(y_composer, sequence_length)
y_piece_seq = create_target_sequences(y_piece, sequence_length)

In [15]:
# Step 6: Train-test split (80% train, 20% test)
train_size = int(len(X_seq) * 0.8)
X_train_seq, X_test_seq = X_seq[:train_size], X_seq[train_size:]
y_train_continuous_seq, y_test_continuous_seq = y_continuous_seq[:train_size], y_continuous_seq[train_size:]
y_train_midi_pitch_seq, y_test_midi_pitch_seq = y_midi_pitch_seq[:train_size], y_midi_pitch_seq[train_size:]
y_train_composer_seq, y_test_composer_seq = y_composer_seq[:train_size], y_composer_seq[train_size:]
y_train_piece_seq, y_test_piece_seq = y_piece_seq[:train_size], y_piece_seq[train_size:]

In [16]:
# Step 7: Build the model
input_layer = Input(shape=(sequence_length, X_train_seq.shape[2]))

In [17]:
# LSTM layers
x = LSTM(128, return_sequences=True)(input_layer)
x = Dropout(0.2)(x)
x = LSTM(128)(x)
x = Dropout(0.2)(x)

In [18]:
# Continuous output (e.g., Onset_Beats, Duration_Beats, etc.)
continuous_output = Dense(y_train_continuous_seq.shape[1], activation='linear', name='continuous_output')(x)

# Categorical outputs (e.g., Midi_Pitch, Composer, Piece)
midi_pitch_output = Dense(6, activation='softmax', name='midi_pitch_output')(x)
composer_output = Dense(47, activation='softmax', name='composer_output')(x)
piece_output = Dense(8, activation='softmax', name='piece_output')(x)

In [19]:
# Define the model
model = Model(inputs=input_layer, outputs=[continuous_output, midi_pitch_output, composer_output, piece_output])


In [20]:
# Compile the model with metrics for each output
model.compile(optimizer=Adam(), 
              loss={'continuous_output': 'mse', 
                    'midi_pitch_output': 'sparse_categorical_crossentropy', 
                    'composer_output': 'sparse_categorical_crossentropy', 
                    'piece_output': 'sparse_categorical_crossentropy'},
              metrics={'continuous_output': ['mae'],  # Mean Absolute Error for continuous outputs
                       'midi_pitch_output': ['accuracy'], 
                       'composer_output': ['accuracy'], 
                       'piece_output': ['accuracy']})

In [21]:
# Step 9: Train the model
model.fit(X_train_seq, 
          {'continuous_output': y_train_continuous_seq, 
           'midi_pitch_output': y_train_midi_pitch_seq, 
           'composer_output': y_train_composer_seq, 
           'piece_output': y_train_piece_seq},
          epochs=50, batch_size=64, validation_data=(X_test_seq, 
                                                     {'continuous_output': y_test_continuous_seq, 
                                                      'midi_pitch_output': y_test_midi_pitch_seq, 
                                                      'composer_output': y_test_composer_seq, 
                                                      'piece_output': y_test_piece_seq}))

Epoch 1/50


[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - composer_output_accuracy: 0.0012 - composer_output_loss: 3.7306 - continuous_output_loss: 2.6420 - continuous_output_mae: 64.0599 - loss: 12110.8965 - midi_pitch_output_accuracy: 0.0018 - midi_pitch_output_loss: 12102.4854 - piece_output_accuracy: 0.3134 - piece_output_loss: 1.8740 - val_composer_output_accuracy: 0.0000e+00 - val_composer_output_loss: 3.5075 - val_continuous_output_loss: 1.7918 - val_continuous_output_mae: 70.9676 - val_loss: 14954.1670 - val_midi_pitch_output_accuracy: 0.0031 - val_midi_pitch_output_loss: 15920.2754 - val_piece_output_accuracy: 0.0000e+00 - val_piece_output_loss: 2.2522
Epoch 2/50
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - composer_output_accuracy: 0.0000e+00 - composer_output_loss: 3.4484 - continuous_output_loss: 1.8023 - continuous_output_mae: 65.0877 - loss: 12072.5674 - midi_pitch_output_accuracy: 8.1520e-04 - midi_pitch_output_loss: 12065

<keras.src.callbacks.history.History at 0x13d231d59d0>

In [22]:
model.summary()

In [24]:
model.save('trained_model.keras')