In [25]:
import pandas as pd
from music21 import converter, note, stream, chord
import itertools
import warnings
import random
import numpy as np
import ast
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense, Activation
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import ast
import matplotlib.pyplot as plt
from collections import Counter
import joblib

warnings.filterwarnings("ignore")

In [19]:
#import de la table
path = '..\data\maestro-v3.0.0.csv'
df = pd.read_csv(path)

In [20]:
dfs = df.groupby('canonical_composer', group_keys=False).apply(lambda x: x.sample(frac=0.1, random_state=42))

In [21]:
def extraction_notes(file):
    '''
    Fonction pour extraires les notes d'un fichiers midi dans une liste
    avec les éléments int compris entre 0 et 127 
    Pour les accords on ne prend que la note la plus haute (la plus aigue)
    '''
    midi = converter.parse(file)
    L_notes = []
    for e in midi.recurse().notes:
        if e.isNote:
            N = e.pitch.midi
            L_notes.append(N)
        elif e.isChord:
            accords = [n.midi for n in e.pitches]
            N = max(accords)
            L_notes.append(N)
    if len(L_notes)<50:
        return 'null'
    else:
        return L_notes 
    
def ex_notes_tab(df,col_out):
    '''
    Création de la colonne contenant la liste des notes
    '''
    df['midi_file_data'] = '../data/'+df['midi_filename']
    df[col_out] = df['midi_file_data'].apply(extraction_notes)
    return df[df[col_out]!='null']

def sequence_list(L):
    ''' création des séquences de 50 notes et output cette liste et de la note suivante (cible)'''
    sequence_length = 50

    input_sequences = []
    output_notes = []

    for i in range(len(L) - sequence_length):
        seq_in = L[i:i + sequence_length]  
        seq_out = L[i + sequence_length]  
        input_sequences.append(seq_in)
        output_notes.append(seq_out)

    return input_sequences,output_notes

def sequence(dff):
    ''' 
    Application du séquencage à toutes les musiques
    '''
    X,y = [],[]
    for k in dff['notes'].to_list():
        xtemp,ytemp = sequence_list(k)
        X.append(xtemp)
        y.append(ytemp)
    X=list(itertools.chain(*X))
    y=list(itertools.chain(*y))

    return pd.DataFrame({"X":X,"y":y})

def colonnes(df):
    ''' mise en forme : 1 temps = 1 colonne = 1 note '''
    df_expanded = df['X'].apply(pd.Series)
    df_expanded['y'] = df['y']
    return df_expanded

In [None]:
dfs = colonnes(sequence(ex_notes_tab(dfs,'notes')))

i=0
L_retenu = []
while i<dfs.shape[0]:
    if (i==0) or (i%10==0):
        L_retenu.append(i)
    i+=1

dfs2 = dfs[dfs.index.isin(L_retenu)]
dfs2


In [31]:
df_octave, df_note = dfs2.copy(),dfs2.copy()

def def_octave(x):
    return x//12

def def_note(x):
    return x%12

for k in df_octave:
    df_octave[k] = df_octave[k].apply(def_octave)

for k in df_note:
    df_note[k] = df_note[k].apply(def_note)

In [34]:
dfo_train, dfo_temp = train_test_split(df_octave, test_size=0.3, random_state=42)
dfo_val, dfo_test = train_test_split(dfo_temp, test_size=0.5, random_state=42)

dfn_train, dfn_temp = train_test_split(df_note, test_size=0.3, random_state=42)
dfn_val, dfn_test = train_test_split(dfn_temp, test_size=0.5, random_state=42)

In [36]:
dfo_train.max()

0     8
1     8
2     8
3     8
4     8
5     8
6     8
7     8
8     8
9     8
10    8
11    8
12    8
13    8
14    8
15    8
16    8
17    8
18    8
19    8
20    8
21    8
22    8
23    8
24    8
25    8
26    8
27    8
28    8
29    8
30    8
31    8
32    8
33    9
34    8
35    8
36    8
37    8
38    8
39    8
40    8
41    8
42    8
43    9
44    8
45    8
46    8
47    8
48    8
49    8
y     8
dtype: int64

In [42]:
def df_to_tensor(df,num_classes):
    ''' mise en forme sous tenseurs avec les notes en tant que catégories '''
    col = [i for i in df.columns if i!='y']
    X_raw = df[col].values.astype(int)
    X = to_categorical(X_raw, num_classes=num_classes)
    y_raw = df["y"].astype(int).values
    y = to_categorical(y_raw, num_classes=num_classes)
    return X, y

nb_octave = 11
nb_note = 13

Xo_train, yo_train = df_to_tensor(pd.DataFrame(dfo_train),11)
Xo_val, yo_val = df_to_tensor(pd.DataFrame(dfo_val),11)
Xo_test, yo_test = df_to_tensor(pd.DataFrame(dfo_test),11)

Xn_train, yn_train = df_to_tensor(pd.DataFrame(dfn_train),13)
Xn_val, yn_val = df_to_tensor(pd.DataFrame(dfn_val),13)
Xn_test, yn_test = df_to_tensor(pd.DataFrame(dfn_test),13)


In [44]:
sequence_length = 50

modelo = Sequential()
modelo.add(LSTM(256, input_shape=(sequence_length, nb_octave), return_sequences=True))
modelo.add(Dropout(0.3))
modelo.add(LSTM(256))
modelo.add(Dropout(0.3))
modelo.add(Dense(nb_octave))
modelo.add(Activation('softmax'))

optimizer = RMSprop(learning_rate=0.001)
modelo.compile(loss='categorical_crossentropy', optimizer=optimizer)

historyo = modelo.fit(
    Xo_train, yo_train,
    validation_data=(Xo_val, yo_val),
    epochs=100,
    batch_size=32
)

Epoch 1/100
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 65ms/step - loss: 1.3871 - val_loss: 1.3200
Epoch 2/100
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 70ms/step - loss: 1.2594 - val_loss: 1.2774
Epoch 3/100
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 66ms/step - loss: 1.2546 - val_loss: 1.2606
Epoch 4/100
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 68ms/step - loss: 1.2442 - val_loss: 1.2476
Epoch 5/100
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 70ms/step - loss: 1.2427 - val_loss: 1.2560
Epoch 6/100
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 71ms/step - loss: 1.2388 - val_loss: 1.2424
Epoch 7/100
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 72ms/step - loss: 1.2348 - val_loss: 1.2452
Epoch 8/100
[1m823/823[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 71ms/step - loss: 1.2169 - val_loss: 1.2464
Epoch 9/100
[1m

KeyboardInterrupt: 

In [None]:
modeln = Sequential()
modeln.add(LSTM(256, input_shape=(sequence_length, nb_note), return_sequences=True))
modeln.add(Dropout(0.3))
modeln.add(LSTM(256))
modeln.add(Dropout(0.3))
modeln.add(Dense(nb_note))
modeln.add(Activation('softmax'))

optimizer = RMSprop(learning_rate=0.001)
modeln.compile(loss='categorical_crossentropy', optimizer=optimizer)

historyn = modeln.fit(
    Xn_train, yn_train,
    validation_data=(Xn_val, yn_val),
    epochs=200,
    batch_size=32
)

In [None]:
losso = historyo.history['loss']
val_losso = historyo.history['val_loss']

lossn = historyn.history['loss']
val_lossn = historyn.history['val_loss']


fig, axs = plt.subplots(1, 2, figsize=(12, 5))


axs[0].plot(losso, label='Loss Entraînement')
axs[0].plot(val_losso, label='Loss Validation')
axs[0].set_title('Évolution de la cross-entropy (historyo)')
axs[0].set_xlabel('Époques')
axs[0].set_ylabel('Perte (Loss)')
axs[0].legend()
axs[0].grid(True)

axs[1].plot(lossn, label='Loss Entraînement')
axs[1].plot(val_lossn, label='Loss Validation')
axs[1].set_title('Évolution de la cross-entropy (historyn)')
axs[1].set_xlabel('Époques')
axs[1].set_ylabel('Perte (Loss)')
axs[1].legend()
axs[1].grid(True)


plt.tight_layout()
plt.show()