## LSTM Neural Network for music generation

### Model without the intervall encoding

In [3]:
#Imports
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pickle

In [4]:
# load the dataset
path = "Dataset"

data = tf.data.TFRecordDataset.load(path)

KeyboardInterrupt: 

In [None]:
n_notes = 37241
seq_length = 25
vocab_size = 8
batch_size = 64
key_order = ['pitch', 'step', 'duration']
buffer_size = n_notes - seq_length  # the number of items in the dataset
train_ds = (data
            .shuffle(buffer_size)
            .batch(batch_size, drop_remainder=True)
            .cache()
            .prefetch(tf.data.experimental.AUTOTUNE))

In [None]:
# Developing the model

input_shape = (seq_length, 3)
learning_rate = 0.005

inputs = tf.keras.Input(input_shape)
x = tf.keras.layers.LSTM(128)(inputs)

outputs = {'pitch': tf.keras.layers.Dense(128, name='pitch')(x),
           'step': tf.keras.layers.Dense(1, name='step')(x),
           'duration': tf.keras.layers.Dense(1, name='duration')(x),
          }

model = tf.keras.Model(inputs, outputs)


loss = {'pitch': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        'step': tf.keras.losses.mse,
        'duration': tf.keras.losses.mse,
       }

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

model.compile(loss=loss, optimizer=optimizer)

model.summary()

In [None]:
# Compiling and fitting the model

model.compile(loss = loss, 
              loss_weights = {'pitch': 0.05, 'step': 1.0, 'duration':1.0,},
              optimizer = optimizer,)

epochs = 50
print(train_ds)
history = model.fit(train_ds, 
                 epochs=epochs)



<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 25, 3), dtype=tf.float64, name=None), {'pitch': TensorSpec(shape=(64,), dtype=tf.float64, name=None), 'step': TensorSpec(shape=(64,), dtype=tf.float64, name=None), 'duration': TensorSpec(shape=(64,), dtype=tf.float64, name=None)})>
Epoch 1/50
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 24ms/step - duration_loss: 0.0461 - loss: 2.4862 - pitch_loss: 4.3045 - step_loss: 2.2249
Epoch 2/50
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - duration_loss: 0.0276 - loss: 2.3986 - pitch_loss: 3.4373 - step_loss: 2.1991
Epoch 3/50
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - duration_loss: 0.0260 - loss: 2.3873 - pitch_loss: 3.3694 - step_loss: 2.1928
Epoch 4/50
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - duration_loss: 0.0256 - loss: 2.3810 - pitch_loss: 3.3352 - step_loss: 2.1886
Epoch 5/50
[1m108/108[0m [32m━━━━━━━━━━━━━━

In [None]:
#model.save('./models/pitch-model-Classic-Classifier.keras')

### Model with intervall encoding

In [6]:

with open("dataset.pkl", "rb") as f:
    df_x = pickle.load(f)

with open("dataset_target.pkl", "rb") as f:
    df_y = pickle.load(f)



seq_length = df_x.shape[1]
print(seq_length)
vocab_size = 300
extracted_data = df_x.applymap(lambda x: x if isinstance(x, list) else [])
X_train = np.array(extracted_data.values.tolist())
extracted_data = df_y.applymap(lambda x: x if isinstance(x, list) else [])
df_y = np.array(extracted_data.values.tolist())
print(df_y.shape)

10


  extracted_data = df_x.applymap(lambda x: x if isinstance(x, list) else [])
  extracted_data = df_y.applymap(lambda x: x if isinstance(x, list) else [])


(139444, 1, 4)


In [7]:
#single value
intervall = []
diff = []
step = []
duration = []
# Assuming df_y is a list of lists, where each inner list contains tuples or lists
for i in range(df_y.shape[1]):
    for lista in df_y:
        for valore in lista:
                intervall.append(valore[0])
                diff.append(valore[1])
                step.append(valore[2])
                duration.append(valore[3])
y_train_e = {
    'intervall': intervall,
    'diff': diff,
    'step': step,
    'duration': duration
}
y_train_e=pd.DataFrame(y_train_e)

#n_notes = df.size
batch_size = 64

print(y_train_e.shape[0],y_train_e.shape[1])
y_train_e['diff'].min()

139444 4


-4

In [None]:
# Developing the model
input_shape = (X_train.shape[1],X_train.shape[2])
learning_rate = 0.005
num_classes_intervall=12

inputs = tf.keras.Input(shape=input_shape)
x = tf.keras.layers.LSTM(128)(inputs)

outputs = {'intervall': tf.keras.layers.Dense(num_classes_intervall,name='intervall')(x),
           'diff': tf.keras.layers.Dense(9,activation='softmax', name='diff')(x),
           'step': tf.keras.layers.Dense(300,activation='softmax', name='step')(x),
           'duration': tf.keras.layers.Dense(300,activation='softmax', name='duration')(x),
          }

model = tf.keras.Model(inputs, outputs)

loss = {'intervall': tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        'diff': tf.keras.losses.CategoricalCrossentropy(),
        'step': tf.keras.losses.CategoricalCrossentropy(),
        'duration': tf.keras.losses.CategoricalCrossentropy(),
       }

optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

metrics = {    
    'intervall': ['accuracy'],
    'diff': ['accuracy'],
    'step': [''],
    'duration': ['accuracy'],}

model.compile(loss=loss, optimizer=optimizer,metrics=metrics)

model.summary()

In [9]:
# Compiling and fitting the model
epochs = 30
diff_encoded = tf.keras.utils.to_categorical(y_train_e['diff'] + 4, num_classes=9)
step_encoded =  tf.keras.utils.to_categorical(y_train_e['step'] , num_classes=300)
duration_encoded = tf.keras.utils.to_categorical(y_train_e['duration'] , num_classes=300)

history = model.fit(
    X_train,
    {
        'intervall':y_train_e['intervall'],
        'diff': diff_encoded,
        'step':step_encoded,
        'duration':duration_encoded
     },
    epochs=epochs
)



Epoch 1/30
[1m4358/4358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 7ms/step - diff_accuracy: 0.5603 - diff_loss: 1.1388 - duration_accuracy: 0.4319 - duration_loss: 1.7416 - intervall_accuracy: 0.1058 - intervall_loss: 2.4856 - loss: 6.6925 - step_accuracy: 0.5807 - step_loss: 1.3266
Epoch 2/30
[1m4358/4358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 7ms/step - diff_accuracy: 0.5819 - diff_loss: 1.0389 - duration_accuracy: 0.4664 - duration_loss: 1.6080 - intervall_accuracy: 0.1566 - intervall_loss: 2.4024 - loss: 6.1895 - step_accuracy: 0.6126 - step_loss: 1.1402
Epoch 3/30
[1m4358/4358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 10ms/step - diff_accuracy: 0.5860 - diff_loss: 1.0209 - duration_accuracy: 0.4707 - duration_loss: 1.5934 - intervall_accuracy: 0.1723 - intervall_loss: 2.3679 - loss: 6.0843 - step_accuracy: 0.6244 - step_loss: 1.1020
Epoch 4/30
[1m4358/4358[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 7ms/step - diff_accuracy:

In [10]:
model.save('./models/intervall-model-CLassic.keras')