In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.tree import plot_tree
from sklearn.ensemble import RandomForestRegressor
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import SimpleRNN, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import Dropout
import joblib

# Set random seeds for reproducibility

np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

In [None]:
df_60 = pd.read_csv('Dataset_60_bpm_complete_CSV.csv', sep=";")
df_80 = pd.read_csv('Dataset_80_bpm_complete_CSV.csv', sep=";")
df_100 = pd.read_csv('Dataset_100_bpm_complete_CSV.csv', sep=";")
df_120 = pd.read_csv('Dataset_120_bpm_complete_CSV.csv', sep=";")

## Add features

In [None]:
df_60['t_dt_note_on_ms'] = df_60['t_dt_note_on'] * (60 / df_60['bpm'] / 960)  * 1000
df_60['t_dt_note_off_ms'] = df_60['t_dt_note_off'] * (60 / df_60['bpm'] / 960)  * 1000
df_60['delta_onset_ms'] = np.rint((df_60['t_dt_note_on'] - df_60['t_dt_note_on_ref']) * (60 / df_60['bpm'] / 960)  * 1000)
df_60['delta_onset_ms-1'] = df_60['delta_onset_ms'].shift(+1)
df_60['delta_onset'] = df_60['t_dt_note_on'] - df_60['t_dt_note_on_ref']
df_60['delta_onset-1'] = df_60['delta_onset'].shift(+1)
df_60['prec_rest_ms'] = np.rint(df_60['prec_rest'] * (60 / df_60['bpm'] / 960)  * 1000)
df_60['IOI_ms'] = np.rint(df_60['IOI'] * (60 / df_60['bpm'] / 960)  * 1000)
df_60['PC-abs'] = df_60['PC'].abs()
df_60['SC-abs'] = df_60['SC'].abs()
df_60['velocity-1'] = df_60['velocity'].shift(+1)

df_80['t_dt_note_on_ms'] = df_80['t_dt_note_on'] * (60 / df_80['bpm'] / 960)  * 1000
df_80['t_dt_note_off_ms'] = df_80['t_dt_note_off'] * (60 / df_80['bpm'] / 960)  * 1000
df_80['delta_onset_ms'] = np.rint((df_80['t_dt_note_on'] - df_80['t_dt_note_on_ref']) * (60 / df_80['bpm'] / 960)  * 1000)
df_80['delta_onset_ms-1'] = df_80['delta_onset_ms'].shift(+1)
df_80['delta_onset'] = df_80['t_dt_note_on'] - df_80['t_dt_note_on_ref']
df_80['delta_onset-1'] = df_80['delta_onset'].shift(+1)
df_80['prec_rest_ms'] = np.rint(df_80['prec_rest'] * (60 / df_80['bpm'] / 960)  * 1000)
df_80['IOI_ms'] = np.rint(df_80['IOI'] * (60 / df_80['bpm'] / 960)  * 1000)
df_80['PC-abs'] = df_80['PC'].abs()
df_80['SC-abs'] = df_80['SC'].abs()
df_80['velocity-1'] = df_80['velocity'].shift(+1)

df_100['t_dt_note_on_ms'] = df_100['t_dt_note_on'] * (60 / df_100['bpm'] / 960) * 1000
df_100['t_dt_note_off_ms'] = df_100['t_dt_note_off'] * (60 / df_100['bpm'] / 960)  * 1000
df_100['delta_onset_ms'] = np.rint((df_100['t_dt_note_on'] - df_100['t_dt_note_on_ref']) * (60 / df_100['bpm'] / 960)  * 1000)
df_100['delta_onset_ms-1'] = df_100['delta_onset_ms'].shift(+1)
df_100['delta_onset'] = df_100['t_dt_note_on'] - df_100['t_dt_note_on_ref']
df_100['delta_onset-1'] = df_100['delta_onset'].shift(+1)
df_100['prec_rest_ms'] = np.rint(df_100['prec_rest'] * (60 / df_100['bpm'] / 960)  * 1000)
df_100['IOI_ms'] = np.rint(df_100['IOI'] * (60 / df_100['bpm'] / 960)  * 1000)
df_100['PC-abs'] = df_100['PC'].abs()
df_100['SC-abs'] = df_100['SC'].abs()
df_100['velocity-1'] = df_100['velocity'].shift(+1)

df_120['t_dt_note_on_ms'] = df_120['t_dt_note_on'] * (60 / df_120['bpm'] / 960)  * 1000
df_120['t_dt_note_off_ms'] = df_120['t_dt_note_off'] * (60 / df_120['bpm'] / 960)  * 1000
df_120['delta_onset_ms'] = np.rint((df_120['t_dt_note_on'] - df_120['t_dt_note_on_ref']) * (60 / df_120['bpm'] / 960)  * 1000)
df_120['delta_onset_ms-1'] = df_120['delta_onset_ms'].shift(+1)
df_120['delta_onset'] = df_120['t_dt_note_on'] - df_120['t_dt_note_on_ref']
df_120['delta_onset-1'] = df_120['delta_onset'].shift(+1)
df_120['prec_rest_ms'] = np.rint(df_120['prec_rest'] * (60 / df_120['bpm'] / 960)  * 1000)
df_120['IOI_ms'] = np.rint(df_120['IOI'] * (60 / df_120['bpm'] / 960)  * 1000)
df_120['PC-abs'] = df_120['PC'].abs()
df_120['SC-abs'] = df_120['SC'].abs()
df_120['velocity-1'] = df_120['velocity'].shift(+1)

# I remove the first note of each exercise because it has nan values for delta_onset-1, delta_onset_ms-1, velocity-1

df_60 = df_60[df_60['ex_note_numb'] != 1]
df_80 = df_80[df_80['ex_note_numb'] != 1]
df_100 = df_100[df_100['ex_note_numb'] != 1]
df_120 = df_120[df_120['ex_note_numb'] != 1]

## Onsets stats in MIDI ticks

In [None]:
def color_boxplot(data, color, pos=[0], ax=None):
    ax = ax or plt.gca()
    bp = ax.boxplot(data, patch_artist=False, positions=pos)
    for item in ['boxes', 'whiskers', 'fliers', 'medians', 'caps']:
        plt.setp(bp[item], color=color)

fig, ax = plt.subplots()
ax.set_ylabel('Onsets deviations in MIDI ticks')
bp_60 = color_boxplot(df_60['delta_onset'], 'green', [1])
bp_80 = color_boxplot(df_80['delta_onset'], 'red', [2])
bp_100 = color_boxplot(df_100['delta_onset'], 'purple', [3])
bp_120 = color_boxplot(df_120['delta_onset'], 'blue', [4])
ax.autoscale()
ax.set(xticks=[1,2,3,4], xticklabels=['60 bpm','80 bpm','100 bpm','120 bpm'])
plt.savefig('onset_stats_ticks_high_res.png', dpi=300)
plt.show()

mean_df_60 = np.mean(df_60['delta_onset']) 
mean_df_80 = np.mean(df_80['delta_onset']) 
mean_df_100 = np.mean(df_100['delta_onset']) 
mean_df_120 = np.mean(df_120['delta_onset']) 

median_df_60 = np.median(df_60['delta_onset']) 
median_df_80 = np.median(df_80['delta_onset']) 
median_df_100 = np.median(df_100['delta_onset']) 
median_df_120 = np.median(df_120['delta_onset']) 

sd_df_60 = np.std(df_60['delta_onset']) 
sd_df_80 = np.std(df_80['delta_onset']) 
sd_df_100 = np.std(df_100['delta_onset']) 
sd_df_120 = np.std(df_120['delta_onset']) 

print("Mean at 60 bpm: " + str(int(mean_df_60)) + " ticks; Median: " + str(int(median_df_60)) + " ticks; Standard Deviation: " + str(int(sd_df_60)) + " ticks") 
print("Mean at 80 bpm: " + str(int(mean_df_80)) + " ticks; Median: " + str(int(median_df_80)) + " ticks; Standard Deviation: " + str(int(sd_df_80)) + " ticks") 
print("Mean at 100 bpm: " + str(int(mean_df_100)) + " ticks; Median: " + str(int(median_df_100)) + " ticks; Standard Deviation: " + str(int(sd_df_100)) + " ticks") 
print("Mean at 100 bpm: " + str(int(mean_df_120)) + " ticks; Median: " + str(int(median_df_120)) + " ticks; Standard Deviation: " + str(int(sd_df_120)) + " ticks") 

## Onsets stats in ms

In [None]:
def color_boxplot(data, color, pos=[0], ax=None):
    ax = ax or plt.gca()
    bp = ax.boxplot(data, patch_artist=False, positions=pos)
    for item in ['boxes', 'whiskers', 'fliers', 'medians', 'caps']:
        plt.setp(bp[item], color=color)

fig, ax = plt.subplots()
ax.set_ylabel('Onsets deviations in ms')
bp_60 = color_boxplot(df_60[["delta_onset_ms"]].to_numpy(), 'green', [1])
bp_80 = color_boxplot(df_80[["delta_onset_ms"]].to_numpy(), 'red', [2])
bp_100 = color_boxplot(df_100[["delta_onset_ms"]].to_numpy(), 'purple', [3])
bp_120 = color_boxplot(df_120[["delta_onset_ms"]].to_numpy(), 'blue', [4])
ax.autoscale()
ax.set(xticks=[1,2,3,4], xticklabels=['60 bpm','80 bpm','100 bpm','120 bpm'])
plt.savefig('onset_stats_ms_high_res.png', dpi=300)
plt.show()

mean_onsets_dev_60_ms = np.mean(df_60[["delta_onset_ms"]].to_numpy()) 
mean_onsets_dev_80_ms = np.mean(df_80[["delta_onset_ms"]].to_numpy()) 
mean_onsets_dev_100_ms = np.mean(df_100[["delta_onset_ms"]].to_numpy()) 
mean_onsets_dev_120_ms = np.mean(df_120[["delta_onset_ms"]].to_numpy()) 

median_onsets_dev_60_ms = np.median(df_60[["delta_onset_ms"]].to_numpy()) 
median_onsets_dev_80_ms = np.median(df_80[["delta_onset_ms"]].to_numpy()) 
median_onsets_dev_100_ms = np.median(df_100[["delta_onset_ms"]].to_numpy()) 
median_onsets_dev_120_ms = np.median(df_120[["delta_onset_ms"]].to_numpy()) 

sd_onsets_dev_60_ms = np.std(df_60[["delta_onset_ms"]].to_numpy()) 
sd_onsets_dev_80_ms = np.std(df_80[["delta_onset_ms"]].to_numpy()) 
sd_onsets_dev_100_ms = np.std(df_100[["delta_onset_ms"]].to_numpy()) 
sd_onsets_dev_120_ms = np.std(df_120[["delta_onset_ms"]].to_numpy()) 

print("Mean at 60 bpm: " + str(int(mean_onsets_dev_60_ms)) + " ms; Median: " + str(int(median_onsets_dev_60_ms)) + " ms; Standard Deviation: " + str(int(sd_onsets_dev_60_ms)) + " ms") 
print("Mean at 80 bpm: " + str(int(mean_onsets_dev_80_ms)) + " ms; Median: " + str(int(median_onsets_dev_80_ms)) + " ms; Standard Deviation: " + str(int(sd_onsets_dev_80_ms)) + " ms") 
print("Mean at 100 bpm: " + str(int(mean_onsets_dev_100_ms)) + " ms; Median: " + str(int(median_onsets_dev_100_ms)) + " ms; Standard Deviation: " + str(int(sd_onsets_dev_100_ms)) + " ms") 
print("Mean at 100 bpm: " + str(int(mean_onsets_dev_120_ms)) + " ms; Median: " + str(int(median_onsets_dev_120_ms)) + " ms; Standard Deviation: " + str(int(sd_onsets_dev_120_ms)) + " ms") 

## Concatenate df

In [None]:
df_complete = pd.concat([df_60, df_80, df_100, df_120], ignore_index=True)

columns_order = [
    'bpm', 'exercise', 'ex_note_numb', 'start-stop', 'note_numb',
    'note_name', 'onbeat', 't_dt_note_on_ref', 't_dt_note_off_ref',
    'note_lng_ref', 'IOI_ref', 'prec_rest_ref', 'string', 'fret', 'finger',
    'position', 'PC', 'PC-abs', 'SC', 'SC-abs', 'HS', 'hammer-on', 'pull-off', 
    'bending', 'dt_note_on', 't_dt_note_on', 'dt_note_off', 't_dt_note_off',
    'note_lng', 'IOI', 'prec_rest', 'fllw_rest', 'velocity', 'velocity-1',
    't_dt_note_on_ms', 't_dt_note_off_ms', 'IOI_ms', 'prec_rest_ms', 'delta_onset',
    'delta_onset-1','delta_onset_ms', 'delta_onset_ms-1'
]

df_complete = df_complete[columns_order]

## PC - Previous rest

In [None]:
# Define the groups of 'PC' values
pc_groups = [
    [-9, -8, -7, -6],   # Group 1
    [-5, -4, -3],   # Group 2
    [-2, -1],   # Group 3
    [1, 2],      # Group 4
    [3, 4, 5],      # Group 5
    [6, 7, 8, 9]   # Group 6
]

# Create a list of labels for the groups
group_labels = ['-9 to -6', '-5 to -3', '-2 to -1', '1 to 2', '3 to 5', '6 to 9']

# Prepare data for boxplots and compute statistics for each group
boxplot_data = []
group_counts = []
group_means = []
group_medians = []
group_stds = []

for group in pc_groups:
    group_data = df_complete[df_complete['PC'].isin(group)]['prec_rest_ms']
    boxplot_data.append(group_data)
    group_counts.append(len(group_data))
    group_means.append(group_data.mean())
    group_medians.append(group_data.median())
    group_stds.append(group_data.std())

# Display the count, mean, median, and standard deviation for each group


for label, count, mean, median, std in zip(group_labels, group_counts, group_means, group_medians, group_stds):
    print(f"Group {label}:")
    print(f"  Count: {count}")
    print(f"  Mean: {mean:.2f}")
    print(f"  Median: {median:.2f}")
    print(f"  Standard Deviation: {std:.2f}\n")


# Create the boxplot
plt.figure(figsize=(10, 6))
plt.boxplot(boxplot_data, labels=group_labels)
plt.xlabel('PC')
plt.ylabel('prec_rest_ms')
plt.ylim(0, 410)  # Limit y-axis to the range [0, 410]
plt.grid(True)
plt.savefig('prev_rest_PC_high_res.png', dpi=300)
plt.show()

## SC - Previous rest

In [None]:
unique_sc_values = sorted(df_complete['SC'].unique())
unique_sc_values = [sc for sc in unique_sc_values if sc != 0]

group_labels = [str(sc) for sc in unique_sc_values]

boxplot_data = []
group_counts = []
group_means = []
group_medians = []
group_stds = []

for sc in unique_sc_values:
    group_data = df_complete[df_complete['SC'] == sc]['prec_rest_ms']
    boxplot_data.append(group_data)
    group_counts.append(len(group_data))
    group_means.append(group_data.mean())
    group_medians.append(group_data.median())
    group_stds.append(group_data.std())

# Display the count, mean, median, and standard deviation for each 'SC' value

for label, count, mean, median, std in zip(group_labels, group_counts, group_means, group_medians, group_stds):
    print(f"SC {label}:")
    print(f"  Count: {count}")
    print(f"  Mean: {mean:.2f}")
    print(f"  Median: {median:.2f}")
    print(f"  Standard Deviation: {std:.2f}\n")

# Boxplot generation

plt.figure(figsize=(10, 6))
plt.boxplot(boxplot_data, labels=group_labels)
plt.xlabel('SC')
plt.ylabel('prec_rest_ms')
plt.ylim(0, 310)
plt.grid(True)
plt.savefig('prev_rest_SC_high_res.png', dpi=300)
plt.show()

## Correlation

In [None]:
df_complete_corr = df_complete.drop(['bpm', 'exercise', 'ex_note_numb','start-stop', 'note_name','t_dt_note_on_ref','t_dt_note_off_ref','note_lng_ref','IOI_ref','prec_rest_ref','string','fret','finger','position','dt_note_on','t_dt_note_on','dt_note_off','t_dt_note_off','note_lng','IOI','prec_rest','fllw_rest','t_dt_note_on_ms','t_dt_note_off_ms', 'delta_onset', 'delta_onset-1'], axis=1)
df_complete_dt_rf = df_complete.drop(['bpm', 'exercise', 'ex_note_numb','start-stop', 'note_name','t_dt_note_on_ref','t_dt_note_off_ref','note_lng_ref','IOI_ref','prec_rest_ref','string','fret','finger','position','dt_note_on','t_dt_note_on','dt_note_off','t_dt_note_off','note_lng','IOI','prec_rest','fllw_rest','t_dt_note_on_ms','t_dt_note_off_ms', 'delta_onset', 'delta_onset-1'], axis=1)
df_complete_LSTM_RNN_ref = df_complete.drop(['bpm', 'exercise', 'start-stop', 'note_name','t_dt_note_on_ref','t_dt_note_off_ref','note_lng_ref','IOI_ref','prec_rest_ref','string','fret','finger','position','dt_note_on','t_dt_note_on','dt_note_off','t_dt_note_off','note_lng','IOI','prec_rest','fllw_rest','t_dt_note_on_ms','t_dt_note_off_ms', 'delta_onset', 'delta_onset-1'], axis=1)

## Correlation matrix

In [None]:
import seaborn as sns

correlation_matrix = df_complete_corr.corr()

# Heatmap generation

plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1, fmt='.2f')
plt.savefig('correlation_high_res.png', dpi=300, bbox_inches='tight')
plt.show()

## Decision tree onsets

In [None]:
df_decision_tree_onsets = df_complete_dt_rf.drop(['velocity', 'prec_rest_ms', 'PC-abs', 'SC-abs', 'IOI_ms'], axis=1)

print(df_decision_tree_onsets.columns)

# Features and target definition

X = df_decision_tree_onsets.drop(columns=['delta_onset_ms'])
y = df_decision_tree_onsets['delta_onset_ms']

# Train-Test Split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Decision Tree Regressor

dt = DecisionTreeRegressor(random_state=42, max_depth=10)  # Decision Tree model
dt.fit(X_train, y_train)
y_test_pred = dt.predict(X_test)
y_train_pred = dt.predict(X_train)

# Evaluation of the model (test and training set)

mse_test = mean_squared_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)
print(f"Test Mean Squared Error: {mse_test}")
print(f"Test R-squared: {r2_test}")

mse_train = mean_squared_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)
print(f"Train Mean Squared Error: {mse_train}")
print(f"Train R-squared: {r2_train}")

df_results_dt = pd.DataFrame({
    'Real Values': y_test,
    'Predicted Values': y_test_pred
})

df_results_dt['difference'] = df_results_dt['Real Values'] - df_results_dt['Predicted Values']

# Plot the real and predicted values for the first 100 test samples

plt.figure(figsize=(10, 6))
n_samples = 100
plt.plot(y_test.values[:n_samples], label='Real Values', color='blue')
plt.plot(y_test_pred[:n_samples], label='Predicted Values', color='red')
plt.xlabel('Index')
plt.ylabel('delta_onset_ms')
plt.ylim(-100, 100)
plt.legend()
plt.savefig('decision_tree_onsets_high_res.png', dpi=300)
plt.show()

## Random forest onsets

In [None]:
df_random_forest_onsets = df_complete_dt_rf.drop(['velocity', 'prec_rest_ms', 'PC-abs', 'SC-abs', 'IOI_ms'], axis=1)
print(df_random_forest_onsets.columns)

# Features and target definition

X = df_random_forest_onsets.drop(columns=['delta_onset_ms'])
y = df_random_forest_onsets['delta_onset_ms']

# Train-Test Split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest Regressor

rf = RandomForestRegressor(random_state=42, max_depth=10)  # Random Forest model
rf.fit(X_train, y_train)
y_test_pred = rf.predict(X_test)
y_train_pred = rf.predict(X_train)

# Evaluation of the model (test and training set)

mse_test = mean_squared_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)
print(f"Test Mean Squared Error: {mse_test}")
print(f"Test R-squared: {r2_test}")

mse_train = mean_squared_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)
print(f"Train Mean Squared Error: {mse_train}")
print(f"Train R-squared: {r2_train}")

# Export regressor for offline use

joblib.dump(rf, 'random_forest__regressor_onsets.pkl')

df_results_rf = pd.DataFrame({
    'Real Values': y_test,
    'Predicted Values': y_test_pred
})

df_results_rf['difference'] = df_results_rf['Real Values'] - df_results_rf['Predicted Values']

# Plot the real and predicted values for the first 100 test samples

plt.figure(figsize=(10, 6))
n_samples = 100
plt.plot(y_test.values[:n_samples], label='Real Values', color='blue')
plt.plot(y_test_pred[:n_samples], label='Predicted Values', color='red')
plt.xlabel('Index')
plt.ylabel('delta_onset_ms')
plt.ylim(-100, 100)
plt.legend()
plt.savefig('random_forest_onsets_high_res.png', dpi=300)
plt.show()

## LSTM onsets

In [None]:
df_complete_LSTM_RNN = df_complete_LSTM_RNN_ref.copy()

# Select features and target

features = ['note_numb', 'onbeat', 'PC', 'SC', 'HS', 'hammer-on', 'pull-off', 'bending', 'delta_onset_ms-1', 'velocity-1']
target = 'delta_onset_ms'

X = df_complete_LSTM_RNN[features].values
y = df_complete_LSTM_RNN[target].values

# Data normalization

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Create sequences

def create_sequences(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 1
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Remove sequences containing rows where 'ex_note_numb' == 2

indices_to_remove = df_complete_LSTM_RNN.index[df_complete_LSTM_RNN['ex_note_numb'] == 2].tolist()

filtered_X_seq = []
filtered_y_seq = []

for i in range(len(X_seq)):
    sequence_indices = list(range(i, i + time_steps + 1))
    if not any(idx in indices_to_remove for idx in sequence_indices):
        filtered_X_seq.append(X_seq[i])
        filtered_y_seq.append(y_seq[i])  
        
filtered_X_seq = np.array(filtered_X_seq)
filtered_y_seq = np.array(filtered_y_seq)

if filtered_X_seq.size == 0:
    print("No sequences remaining after filtering.")
else:
    X_filtered = filtered_X_seq.reshape(filtered_X_seq.shape[0], time_steps, -1)
    X_train, X_test, y_train, y_test = train_test_split(X_filtered, filtered_y_seq, test_size=0.2, random_state=42, shuffle=True)

    # LSTM model
    
    model = Sequential()
    model.add(LSTM(units=40, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Add Early Stopping
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

    epochs_used = len(history.epoch)
    print(f"Number of epochs used: {epochs_used}")

    # Test set predict
    
    y_pred_scaled = model.predict(X_test)

    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1))       

    # Calculate MSE and R-squared on the test set
    
    mse_test = mean_squared_error(y_test, y_pred)
    r2_test = r2_score(y_test, y_pred)
    print(f'Mean Squared Error (Test): {mse_test}')
    print(f'R-squared (Test): {r2_test}')

    # Train set predict
    
    y_train_pred_scaled = model.predict(X_train)

    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
    y_train = scaler_y.inverse_transform(y_train.reshape(-1, 1))                 

    # Calculate MSE and R-squared on the training set
    
    mse_train = mean_squared_error(y_train, y_train_pred)
    r2_train = r2_score(y_train, y_train_pred)
    print(f'Mean Squared Error (Training): {mse_train}')
    print(f'R-squared (Training): {r2_train}')

    # Plot the real and predicted values for the first 100 test samples
    
    window_size = 100
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.flatten()[:window_size], label='Real Values', color='blue')
    plt.plot(y_pred.flatten()[:window_size], label='Predicted Values', color='red')
    plt.xlabel('Index')
    plt.ylabel('delta_onset_ms')
    plt.ylim(-100, 100)
    plt.legend()
    plt.savefig('LSTM_onsets_high_res.png', dpi=300)
    plt.show()

## LSTM onsets 4 time steps

In [None]:
df_complete_LSTM_RNN = df_complete_LSTM_RNN_ref.copy()

# Select features and target

features = ['ex_note_numb', 'note_numb', 'onbeat', 'PC', 'SC', 'HS', 'hammer-on', 'pull-off', 'bending', 'delta_onset_ms-1', 'velocity-1']
target = 'delta_onset_ms'

X = df_complete_LSTM_RNN[features].values
y = df_complete_LSTM_RNN[target].values

# Data normalization

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(df_complete_LSTM_RNN.drop(columns=['ex_note_numb'])[features[1:]].values)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Create sequences for the LSTM

def create_sequences(X, y, time_steps=4):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 4
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Remove sequences containing rows where 'ex_note_numb' == 2, 3, 4, or 5

indices_to_remove = df_complete_LSTM_RNN.index[df_complete_LSTM_RNN['ex_note_numb'].isin([2, 3, 4, 5])].tolist()

filtered_X_seq = []
filtered_y_seq = []

for i in range(len(X_seq)):
    sequence_indices = list(range(i, i + time_steps + 1))
    if not any(idx in indices_to_remove for idx in sequence_indices):
        filtered_X_seq.append(X_seq[i])
        filtered_y_seq.append(y_seq[i])  

filtered_X_seq = np.array(filtered_X_seq)
filtered_y_seq = np.array(filtered_y_seq)

if filtered_X_seq.size == 0:
    print("No sequences remaining after filtering.")
else:
    X_filtered = filtered_X_seq.reshape(filtered_X_seq.shape[0], time_steps, -1)
    X_train, X_test, y_train, y_test = train_test_split(X_filtered, filtered_y_seq, test_size=0.2, random_state=42, shuffle=True)

    # LSTM model
    
    model = Sequential()
    model.add(LSTM(units=40, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Add Early Stopping
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

    epochs_used = len(history.epoch)
    print(f"Number of epochs used: {epochs_used}")

    # Test set predict
    
    y_pred_scaled = model.predict(X_test)

    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1))       

    # Calculate MSE and R-squared on the test set
    
    mse_test = mean_squared_error(y_test, y_pred)
    r2_test = r2_score(y_test, y_pred)
    print(f'Mean Squared Error (Test): {mse_test}')
    print(f'R-squared (Test): {r2_test}')

    # Train set predict
    
    y_train_pred_scaled = model.predict(X_train)

    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
    y_train = scaler_y.inverse_transform(y_train.reshape(-1, 1))                 

    # Calculate MSE and R-squared on the training set
    
    mse_train = mean_squared_error(y_train, y_train_pred)
    r2_train = r2_score(y_train, y_train_pred)
    print(f'Mean Squared Error (Training): {mse_train}')
    print(f'R-squared (Training): {r2_train}')

    # Plot the real and predicted values for the first 100 test samples
    
    window_size = 100
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.flatten()[:window_size], label='Real Values', color='blue')
    plt.plot(y_pred.flatten()[:window_size], label='Predicted Values', color='red')
    plt.xlabel('Index')
    plt.ylabel('delta_onset_ms')
    plt.ylim(-100, 100)
    plt.legend()
    plt.savefig('LSTM-4_onsets_high_res.png', dpi=300)
    plt.show()

## RNN onsets

In [None]:
df_complete_LSTM_RNN = df_complete_LSTM_RNN_ref.copy()

# Select features and target
features = ['note_numb', 'onbeat', 'PC', 'SC', 'HS', 'hammer-on', 'pull-off', 'bending', 'delta_onset_ms-1', 'velocity-1']
target = 'delta_onset_ms'

X = df_complete_LSTM_RNN[features].values
y = df_complete_LSTM_RNN[target].values

# Data normalization

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Create sequences

def create_sequences(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 1
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Remove sequences containing rows where 'ex_note_numb' == 2

indices_to_remove = df_complete_LSTM_RNN.index[df_complete_LSTM_RNN['ex_note_numb'] == 2].tolist()

filtered_X_seq = []
filtered_y_seq = []

for i in range(len(X_seq)):
    sequence_indices = list(range(i, i + time_steps + 1))
    if not any(idx in indices_to_remove for idx in sequence_indices):
        filtered_X_seq.append(X_seq[i])
        filtered_y_seq.append(y_seq[i])  
        
filtered_X_seq = np.array(filtered_X_seq)
filtered_y_seq = np.array(filtered_y_seq)

if filtered_X_seq.size == 0:
    print("No sequences remaining after filtering.")
else:
    X_filtered = filtered_X_seq.reshape(filtered_X_seq.shape[0], time_steps, -1)
    X_train, X_test, y_train, y_test = train_test_split(X_filtered, filtered_y_seq, test_size=0.2, random_state=42, shuffle=True)

    # RNN model
    
    model = Sequential()
    model.add(SimpleRNN(units=40, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Add Early Stopping
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

    epochs_used = len(history.epoch)
    print(f"Number of epochs used: {epochs_used}")

    # Test set predict
    
    y_pred_scaled = model.predict(X_test)

    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1))       

    # Calculate MSE and R-squared on the test set
    
    mse_test = mean_squared_error(y_test, y_pred)
    r2_test = r2_score(y_test, y_pred)
    print(f'Mean Squared Error (Test): {mse_test}')
    print(f'R-squared (Test): {r2_test}')

    # Train set predict
    
    y_train_pred_scaled = model.predict(X_train)

    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
    y_train = scaler_y.inverse_transform(y_train.reshape(-1, 1))                 

    # Calculate MSE and R-squared on the training set
    
    mse_train = mean_squared_error(y_train, y_train_pred)
    r2_train = r2_score(y_train, y_train_pred)
    print(f'Mean Squared Error (Training): {mse_train}')
    print(f'R-squared (Training): {r2_train}')

    # Plot the real and predicted values for the first 100 test samples
    
    window_size = 100
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.flatten()[:window_size], label='Real Values', color='blue')
    plt.plot(y_pred.flatten()[:window_size], label='Predicted Values', color='red')
    plt.xlabel('Index')
    plt.ylabel('delta_onset_ms')
    plt.ylim(-100, 100)
    plt.legend()
    plt.savefig('RNN_onsets_high_res.png', dpi=300)
    plt.show()

## RNN onsets 4 time steps

In [None]:
df_complete_LSTM_RNN = df_complete_LSTM_RNN_ref.copy()

# Select features and target

features = ['note_numb', 'onbeat', 'PC', 'SC', 'HS', 'hammer-on', 'pull-off', 'bending', 'delta_onset_ms-1', 'velocity-1']
target = 'delta_onset_ms'

X = df_complete_LSTM_RNN[features].values  # Removed 'ex_note_numb' here
y = df_complete_LSTM_RNN[target].values

# Data normalization

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Create sequences

def create_sequences(X, y, time_steps=4):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 4
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Remove sequences containing rows where 'ex_note_numb' == 2, 3, 4, or 5

indices_to_remove = df_complete_LSTM_RNN.index[df_complete_LSTM_RNN['ex_note_numb'].isin([2, 3, 4, 5])].tolist()

filtered_X_seq = []
filtered_y_seq = []

for i in range(len(X_seq)):
    sequence_indices = list(range(i, i + time_steps + 1))
    if not any(idx in indices_to_remove for idx in sequence_indices):
        filtered_X_seq.append(X_seq[i])
        filtered_y_seq.append(y_seq[i])  
        
filtered_X_seq = np.array(filtered_X_seq)
filtered_y_seq = np.array(filtered_y_seq)

if filtered_X_seq.size == 0:
    print("No sequences remaining after filtering.")
else:
    X_filtered = filtered_X_seq.reshape(filtered_X_seq.shape[0], time_steps, -1)
    X_train, X_test, y_train, y_test = train_test_split(X_filtered, filtered_y_seq, test_size=0.2, random_state=42, shuffle=True)

    # RNN model
    
    model = Sequential()
    model.add(SimpleRNN(units=40, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Add Early Stopping
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])
    
    epochs_used = len(history.epoch)
    print(f"Number of epochs used: {epochs_used}")

    # Test set predict
    
    y_pred_scaled = model.predict(X_test)
    
    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1))       

    # Calculate MSE and R-squared on the test set
    
    mse_test = mean_squared_error(y_test, y_pred)
    r2_test = r2_score(y_test, y_pred)
    print(f'Mean Squared Error (Test): {mse_test}')
    print(f'R-squared (Test): {r2_test}')

    # Train set predict
    
    y_train_pred_scaled = model.predict(X_train)
    
    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
    y_train = scaler_y.inverse_transform(y_train.reshape(-1, 1))                 

    # Calculate MSE and R-squared on the training set
    
    mse_train = mean_squared_error(y_train, y_train_pred)
    r2_train = r2_score(y_train, y_train_pred)
    print(f'Mean Squared Error (Training): {mse_train}')
    print(f'R-squared (Training): {r2_train}')

    # Plot the real and predicted values for the first 100 test samples
    
    window_size = 100
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.flatten()[:window_size], label='Real Values', color='blue')
    plt.plot(y_pred.flatten()[:window_size], label='Predicted Values', color='red')
    plt.xlabel('Index')
    plt.ylabel('delta_onset_ms')
    plt.ylim(-100, 100)
    plt.legend()
    plt.savefig('RNN-4_onsets_high_res.png', dpi=300)
    plt.show()

## Decision tree prec rest

In [None]:
df_decision_tree_prec_rest = df_complete_dt_rf.drop(['velocity', 'delta_onset_ms', 'PC-abs', 'SC-abs', 'IOI_ms'], axis=1)
print(df_decision_tree_prec_rest.columns)

# Features and target definition

X = df_decision_tree_prec_rest.drop(columns=['prec_rest_ms'])
y = df_decision_tree_prec_rest['prec_rest_ms']

# Train-Test Split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Decision Tree Regressor

dt = DecisionTreeRegressor(random_state=42, max_depth=10)  # Decision Tree model
dt.fit(X_train, y_train)
y_test_pred = dt.predict(X_test)
y_train_pred = dt.predict(X_train)

# Evaluation of the model (test and training set)

mse_test = mean_squared_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)
print(f"Test Mean Squared Error: {mse_test}")
print(f"Test R-squared: {r2_test}")

mse_train = mean_squared_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)
print(f"Train Mean Squared Error: {mse_train}")
print(f"Train R-squared: {r2_train}")

df_results_dt = pd.DataFrame({
    'Real Values': y_test,
    'Predicted Values': y_test_pred
})

df_results_dt['difference'] = df_results_dt['Real Values'] - df_results_dt['Predicted Values']

# Plot the real and predicted values for the first 100 test samples

plt.figure(figsize=(10, 6))
window_size = 100
plt.plot(y_test.values[0:window_size], label='Real Values', color='blue')
plt.plot(y_test_pred[0:window_size], label='Predicted Values', color='red')
plt.xlabel('Index')
plt.ylabel('prec_rest_ms')
plt.ylim(0, 250)
plt.legend()
plt.savefig('decision_tree_prec_rest_high_res.png', dpi=300)
plt.show()

## Random forest prec_rest

In [None]:
df_random_forest_prec_rest = df_complete_dt_rf.drop(['velocity', 'delta_onset_ms', 'PC-abs', 'SC-abs', 'IOI_ms'], axis=1)
print(df_random_forest_prec_rest.columns)

# Features and target definition

X = df_random_forest_prec_rest.drop(columns=['prec_rest_ms'])
y = df_random_forest_prec_rest['prec_rest_ms']

# Train-Test Split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest Regressor

rf = RandomForestRegressor(random_state=42, max_depth=10)  # Random Forest model
rf.fit(X_train, y_train)
y_test_pred = rf.predict(X_test)
y_train_pred = rf.predict(X_train)

# Evaluation of the model (test and training set)

mse_test = mean_squared_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)
print(f"Test Mean Squared Error: {mse_test}")
print(f"Test R-squared: {r2_test}")

mse_train = mean_squared_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)
print(f"Train Mean Squared Error: {mse_train}")
print(f"Train R-squared: {r2_train}")

df_results_rf = pd.DataFrame({
    'Real Values': y_test,
    'Predicted Values': y_test_pred
})

df_results_rf['difference'] = df_results_rf['Real Values'] - df_results_rf['Predicted Values']

# Plot the real and predicted values for the first 100 test samples

plt.figure(figsize=(10, 6))
window_size = 100
plt.plot(y_test.values[0:window_size], label='Real Values', color='blue')
plt.plot(y_test_pred[0:window_size], label='Predicted Values', color='red')
plt.xlabel('Index')
plt.ylabel('prec_rest_ms')
plt.ylim(0, 250)
plt.legend()
plt.savefig('random_forest_prec_rest_high_res.png', dpi=300)
plt.show()

## LSTM prec_rest

In [None]:
df_complete_LSTM_RNN = df_complete_LSTM_RNN_ref.copy()

# Select features and target

features = ['note_numb', 'onbeat', 'PC', 'SC', 'HS', 'hammer-on', 'pull-off', 'bending', 'delta_onset_ms-1', 'velocity-1']
target = 'prec_rest_ms'

X = df_complete_LSTM_RNN[features].values
y = df_complete_LSTM_RNN[target].values

# Data normalization

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Create sequences

def create_sequences(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 1
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Remove sequences containing rows where 'ex_note_numb' == 2

indices_to_remove = df_complete_LSTM_RNN.index[df_complete_LSTM_RNN['ex_note_numb'] == 2].tolist()

filtered_X_seq = []
filtered_y_seq = []

for i in range(len(X_seq)):
    sequence_indices = list(range(i, i + time_steps + 1))
    if not any(idx in indices_to_remove for idx in sequence_indices):
        filtered_X_seq.append(X_seq[i])
        filtered_y_seq.append(y_seq[i])  
        
filtered_X_seq = np.array(filtered_X_seq)
filtered_y_seq = np.array(filtered_y_seq)

if filtered_X_seq.size == 0:
    print("No sequences remaining after filtering.")
else:
    X_filtered = filtered_X_seq.reshape(filtered_X_seq.shape[0], time_steps, -1)
    X_train, X_test, y_train, y_test = train_test_split(X_filtered, filtered_y_seq, test_size=0.2, random_state=42, shuffle=True)

    # LSTM model
    
    model = Sequential()
    model.add(LSTM(units=40, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Add Early Stopping
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

    epochs_used = len(history.epoch)
    print(f"Number of epochs used: {epochs_used}")

    # Test set predict
    
    y_pred_scaled = model.predict(X_test)

    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1))       

    # Calculate MSE and R-squared on the test set
    
    mse_test = mean_squared_error(y_test, y_pred)
    r2_test = r2_score(y_test, y_pred)
    print(f'Mean Squared Error (Test): {mse_test}')
    print(f'R-squared (Test): {r2_test}')

    # Train set predict
    
    y_train_pred_scaled = model.predict(X_train)

    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
    y_train = scaler_y.inverse_transform(y_train.reshape(-1, 1))                 

    # Calculate MSE and R-squared on the training set
    
    mse_train = mean_squared_error(y_train, y_train_pred)
    r2_train = r2_score(y_train, y_train_pred)
    print(f'Mean Squared Error (Training): {mse_train}')
    print(f'R-squared (Training): {r2_train}')

    # Plot the real and predicted values for the first 100 test samples
    
    window_size = 100
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.flatten()[0:window_size], label='Real Values', color='blue')
    plt.plot(y_pred.flatten()[0:window_size], label='Predicted Values', color='red')
    plt.xlabel('Index')
    plt.ylabel('prec_rest_ms')
    plt.ylim(0, 250)
    plt.legend()
    plt.savefig('LSTM_prec_rest_high_res.png', dpi=300)
    plt.show()

## LSTM prec_rest 4 time steps

In [None]:
df_complete_LSTM_RNN = df_complete_LSTM_RNN_ref.copy()

# Select features and target

features = ['ex_note_numb', 'note_numb', 'onbeat', 'PC', 'SC', 'HS', 'hammer-on', 'pull-off', 'bending', 'delta_onset_ms-1', 'velocity-1']
target = 'prec_rest_ms'

X = df_complete_LSTM_RNN[features].values
y = df_complete_LSTM_RNN[target].values

# Data normalization

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(df_complete_LSTM_RNN.drop(columns=['ex_note_numb'])[features[1:]].values)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Create sequences

def create_sequences(X, y, time_steps=4):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 4
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Remove sequences containing rows where 'ex_note_numb' == 2, 3, 4, or 5

indices_to_remove = df_complete_LSTM_RNN.index[df_complete_LSTM_RNN['ex_note_numb'].isin([2, 3, 4, 5])].tolist()

filtered_X_seq = []
filtered_y_seq = []

for i in range(len(X_seq)):
    sequence_indices = list(range(i, i + time_steps + 1))
    if not any(idx in indices_to_remove for idx in sequence_indices):
        filtered_X_seq.append(X_seq[i])
        filtered_y_seq.append(y_seq[i])  

filtered_X_seq = np.array(filtered_X_seq)
filtered_y_seq = np.array(filtered_y_seq)

if filtered_X_seq.size == 0:
    print("No sequences remaining after filtering.")
else:
    X_filtered = filtered_X_seq.reshape(filtered_X_seq.shape[0], time_steps, -1)
    X_train, X_test, y_train, y_test = train_test_split(X_filtered, filtered_y_seq, test_size=0.2, random_state=42, shuffle=True)

    # LSTM model
    
    model = Sequential()
    model.add(LSTM(units=40, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Add Early Stopping
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

    epochs_used = len(history.epoch)
    print(f"Number of epochs used: {epochs_used}")

    # Test set predict
    
    y_pred_scaled = model.predict(X_test)

    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1))       

    # Calculate MSE and R-squared on the test set
    
    mse_test = mean_squared_error(y_test, y_pred)
    r2_test = r2_score(y_test, y_pred)
    print(f'Mean Squared Error (Test): {mse_test}')
    print(f'R-squared (Test): {r2_test}')

    # Train set predict
    
    y_train_pred_scaled = model.predict(X_train)

    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
    y_train = scaler_y.inverse_transform(y_train.reshape(-1, 1))                 

    # Calculate MSE and R-squared on the training set
    
    mse_train = mean_squared_error(y_train, y_train_pred)
    r2_train = r2_score(y_train, y_train_pred)
    print(f'Mean Squared Error (Training): {mse_train}')
    print(f'R-squared (Training): {r2_train}')

    # Plot the real and predicted values for the first 100 test samples
    
    window_size = 100
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.flatten()[0:window_size], label='Real Values', color='blue')
    plt.plot(y_pred.flatten()[0:window_size], label='Predicted Values', color='red')
    plt.xlabel('Index')
    plt.ylabel('prec_rest_ms')
    plt.ylim(0, 250)
    plt.legend()
    plt.savefig('LSTM-4_prec_rest_high_res.png', dpi=300)
    plt.show()

## RNN prec_rest

In [None]:
df_complete_LSTM_RNN = df_complete_LSTM_RNN_ref.copy()

# Select features and target

features = ['note_numb', 'onbeat', 'PC', 'SC', 'HS', 'hammer-on', 'pull-off', 'bending', 'delta_onset_ms-1', 'velocity-1']
target = 'prec_rest_ms'

X = df_complete_LSTM_RNN[features].values
y = df_complete_LSTM_RNN[target].values

# Data normalization

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Create sequences

def create_sequences(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 1
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Remove sequences containing rows where 'ex_note_numb' == 2

indices_to_remove = df_complete_LSTM_RNN.index[df_complete_LSTM_RNN['ex_note_numb'] == 2].tolist()

filtered_X_seq = []
filtered_y_seq = []

for i in range(len(X_seq)):
    sequence_indices = list(range(i, i + time_steps + 1))
    if not any(idx in indices_to_remove for idx in sequence_indices):
        filtered_X_seq.append(X_seq[i])
        filtered_y_seq.append(y_seq[i])  
        
filtered_X_seq = np.array(filtered_X_seq)
filtered_y_seq = np.array(filtered_y_seq)

if filtered_X_seq.size == 0:
    print("No sequences remaining after filtering.")
else:
    X_filtered = filtered_X_seq.reshape(filtered_X_seq.shape[0], time_steps, -1)
    X_train, X_test, y_train, y_test = train_test_split(X_filtered, filtered_y_seq, test_size=0.2, random_state=42, shuffle=True)

    # RNN model
    
    model = Sequential()
    model.add(SimpleRNN(units=40, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Add Early Stopping
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

    epochs_used = len(history.epoch)
    print(f"Number of epochs used: {epochs_used}")

    # Test set predict
    
    y_pred_scaled = model.predict(X_test)

    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1))       

    # Calculate MSE and R-squared on the test set
    
    mse_test = mean_squared_error(y_test, y_pred)
    r2_test = r2_score(y_test, y_pred)
    print(f'Mean Squared Error (Test): {mse_test}')
    print(f'R-squared (Test): {r2_test}')

    # Train set predict
    
    y_train_pred_scaled = model.predict(X_train)

    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
    y_train = scaler_y.inverse_transform(y_train.reshape(-1, 1))                 

    # Calculate MSE and R-squared on the training set
    
    mse_train = mean_squared_error(y_train, y_train_pred)
    r2_train = r2_score(y_train, y_train_pred)
    print(f'Mean Squared Error (Training): {mse_train}')
    print(f'R-squared (Training): {r2_train}')

    # Plot the real and predicted values for the first 100 test samples
    
    window_size = 100
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.flatten()[0:window_size], label='Real Values', color='blue')
    plt.plot(y_pred.flatten()[0:window_size], label='Predicted Values', color='red')
    plt.xlabel('Index')
    plt.ylabel('prec_rest_ms')
    plt.ylim(0, 250)
    plt.legend()
    plt.savefig('RNN_prec_rest_high_res.png', dpi=300)
    plt.show()

## RNN prec_rest 4 time steps

In [None]:
df_complete_LSTM_RNN = df_complete_LSTM_RNN_ref.copy()

# Select features and target 

features = ['note_numb', 'onbeat', 'PC', 'SC', 'HS', 'hammer-on', 'pull-off', 'bending', 'delta_onset_ms-1', 'velocity-1']
target = 'prec_rest_ms'

X = df_complete_LSTM_RNN[features].values  # Removed 'ex_note_numb' here
y = df_complete_LSTM_RNN[target].values

# Data normalization

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Save the scalers to disk

joblib.dump(scaler_X, 'scaler_X_prec_rest.pkl')
joblib.dump(scaler_y, 'scaler_y_prec_rest.pkl')

# Create sequences

def create_sequences(X, y, time_steps=4):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 4
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Remove sequences containing rows where 'ex_note_numb' == 2, 3, 4, or 5

indices_to_remove = df_complete_LSTM_RNN.index[df_complete_LSTM_RNN['ex_note_numb'].isin([2, 3, 4, 5])].tolist()

filtered_X_seq = []
filtered_y_seq = []

for i in range(len(X_seq)):
    sequence_indices = list(range(i, i + time_steps + 1))
    if not any(idx in indices_to_remove for idx in sequence_indices):
        filtered_X_seq.append(X_seq[i])
        filtered_y_seq.append(y_seq[i])  
        
filtered_X_seq = np.array(filtered_X_seq)
filtered_y_seq = np.array(filtered_y_seq)

if filtered_X_seq.size == 0:
    print("No sequences remaining after filtering.")
else:
    X_filtered = filtered_X_seq.reshape(filtered_X_seq.shape[0], time_steps, -1)
    X_train, X_test, y_train, y_test = train_test_split(X_filtered, filtered_y_seq, test_size=0.2, random_state=42, shuffle=True)
    
    # RNN model
    
    model = Sequential()
    model.add(SimpleRNN(units=40, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Add Early Stopping
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])
    
    epochs_used = len(history.epoch)
    print(f"Number of epochs used: {epochs_used}")

    # Test set predict
    
    y_pred_scaled = model.predict(X_test)
    
    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1))       

    # Calculate MSE and R-squared on the test set
    
    mse_test = mean_squared_error(y_test, y_pred)
    r2_test = r2_score(y_test, y_pred)
    print(f'Mean Squared Error (Test): {mse_test}')
    print(f'R-squared (Test): {r2_test}')

    # Train set predict
    
    y_train_pred_scaled = model.predict(X_train)
    
    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
    y_train = scaler_y.inverse_transform(y_train.reshape(-1, 1))                 

    # Calculate MSE and R-squared on the training set
    
    mse_train = mean_squared_error(y_train, y_train_pred)
    r2_train = r2_score(y_train, y_train_pred)
    print(f'Mean Squared Error (Training): {mse_train}')
    print(f'R-squared (Training): {r2_train}')

    # Export regressor for offline use
    
    model.save('RNN-4_regressor_prec_rests.h5')
    
    # Plot the real and predicted values for the first 100 test samples
    
    window_size = 100
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.flatten()[0:window_size], label='Real Values', color='blue')
    plt.plot(y_pred.flatten()[0:window_size], label='Predicted Values', color='red')
    plt.xlabel('Index')
    plt.ylabel('delta_onset_ms')
    plt.ylim(0, 250)
    plt.legend()
    plt.savefig('RNN-4_prec_rest_high_res.png', dpi=300)
    plt.show()

## Decision tree velocity

In [None]:
df_decision_tree_velocity = df_complete_dt_rf.drop(['prec_rest_ms', 'delta_onset_ms', 'PC-abs', 'SC-abs', 'IOI_ms'], axis=1)
print(df_decision_tree_velocity.columns)

# Features and target definition

X = df_decision_tree_velocity.drop(columns=['velocity'])
y = df_decision_tree_velocity['velocity']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Decision Tree Regressor
dt = DecisionTreeRegressor(random_state=42, max_depth=10)  # Decision Tree model
dt.fit(X_train, y_train)
y_test_pred = dt.predict(X_test)
y_train_pred = dt.predict(X_train)

# Evaluation of the model (test and training set)
mse_test = mean_squared_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)
print(f"Test Mean Squared Error: {mse_test}")
print(f"Test R-squared: {r2_test}")

mse_train = mean_squared_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)
print(f"Train Mean Squared Error: {mse_train}")
print(f"Train R-squared: {r2_train}")

df_results_dt = pd.DataFrame({
    'Real Values': y_test,
    'Predicted Values': y_test_pred
})

df_results_dt['difference'] = df_results_dt['Real Values'] - df_results_dt['Predicted Values']

# Plot the real and predicted values for the first 100 test samples

plt.figure(figsize=(10, 6))
n_samples = 100
plt.plot(y_test.values[:n_samples], label='Real Values', color='blue')
plt.plot(y_test_pred[:n_samples], label='Predicted Values', color='red')
plt.xlabel('Index')
plt.ylabel('velocity')
plt.ylim(0, 127)
plt.legend()
plt.savefig('decision_tree_velocity_high_res.png', dpi=300)
plt.show()

## Random forest velocity

In [None]:
df_random_forest_velocity = df_complete_dt_rf.drop(['prec_rest_ms', 'delta_onset_ms', 'PC-abs', 'SC-abs', 'IOI_ms'], axis=1)
print(df_random_forest_velocity.columns)

# Features and target definition

X = df_random_forest_velocity.drop(columns=['velocity'])
y = df_random_forest_velocity['velocity']

# Train-Test Split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest Regressor

rf = RandomForestRegressor(random_state=42, max_depth=10)  # Random Forest model
rf.fit(X_train, y_train)
y_test_pred = rf.predict(X_test)
y_train_pred = rf.predict(X_train)

# Evaluation of the model (test and training set)

mse_test = mean_squared_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)
print(f"Test Mean Squared Error: {mse_test}")
print(f"Test R-squared: {r2_test}")

mse_train = mean_squared_error(y_train, y_train_pred)
r2_train = r2_score(y_train, y_train_pred)
print(f"Train Mean Squared Error: {mse_train}")
print(f"Train R-squared: {r2_train}")

df_results_rf = pd.DataFrame({
    'Real Values': y_test,
    'Predicted Values': y_test_pred
})

df_results_rf['difference'] = df_results_rf['Real Values'] - df_results_rf['Predicted Values']

# Plot the real and predicted values for the first 100 test samples

plt.figure(figsize=(10, 6))
n_samples = 100
plt.plot(y_test.values[:n_samples], label='Real Values', color='blue')
plt.plot(y_test_pred[:n_samples], label='Predicted Values', color='red')
plt.xlabel('Index')
plt.ylabel('velocity')
plt.ylim(0, 127)
plt.legend()
plt.savefig('random_forest_velocity_high_res.png', dpi=300)
plt.show()

## LSTM velocity

In [None]:
df_complete_LSTM_RNN = df_complete_LSTM_RNN_ref.copy()

# Select features and target

features = ['note_numb', 'onbeat', 'PC', 'SC', 'HS', 'hammer-on', 'pull-off', 'bending', 'delta_onset_ms-1', 'velocity-1']
target = 'velocity'

X = df_complete_LSTM_RNN[features].values
y = df_complete_LSTM_RNN[target].values

# Data normalization

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Create sequences 

def create_sequences(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 1
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Remove sequences containing rows where 'ex_note_numb' == 2

indices_to_remove = df_complete_LSTM_RNN.index[df_complete_LSTM_RNN['ex_note_numb'] == 2].tolist()

filtered_X_seq = []
filtered_y_seq = []

for i in range(len(X_seq)):
    sequence_indices = list(range(i, i + time_steps + 1))
    if not any(idx in indices_to_remove for idx in sequence_indices):
        filtered_X_seq.append(X_seq[i])
        filtered_y_seq.append(y_seq[i])  
        
filtered_X_seq = np.array(filtered_X_seq)
filtered_y_seq = np.array(filtered_y_seq)

if filtered_X_seq.size == 0:
    print("No sequences remaining after filtering.")
else:
    X_filtered = filtered_X_seq.reshape(filtered_X_seq.shape[0], time_steps, -1)

    X_train, X_test, y_train, y_test = train_test_split(X_filtered, filtered_y_seq, test_size=0.2, random_state=42, shuffle=True)

    # LSTM model
    
    model = Sequential()
    model.add(LSTM(units=40, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Add Early Stopping
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

    epochs_used = len(history.epoch)
    print(f"Number of epochs used: {epochs_used}")

    # Test set predict
    
    y_pred_scaled = model.predict(X_test)

    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1))       

    # Calculate MSE and R-squared on the test set
    
    mse_test = mean_squared_error(y_test, y_pred)
    r2_test = r2_score(y_test, y_pred)
    print(f'Mean Squared Error (Test): {mse_test}')
    print(f'R-squared (Test): {r2_test}')

    # Train set predict
    
    y_train_pred_scaled = model.predict(X_train)

    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
    y_train = scaler_y.inverse_transform(y_train.reshape(-1, 1))                 

    # Calculate MSE and R-squared on the training set
    
    mse_train = mean_squared_error(y_train, y_train_pred)
    r2_train = r2_score(y_train, y_train_pred)
    print(f'Mean Squared Error (Training): {mse_train}')
    print(f'R-squared (Training): {r2_train}')

    # Plot the real and predicted values for the first 100 test samples
    
    window_size = 100
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.flatten()[:window_size], label='Real Values', color='blue')
    plt.plot(y_pred.flatten()[:window_size], label='Predicted Values', color='red')
    plt.xlabel('Index')
    plt.ylabel('velocity')
    plt.ylim(0, 127)
    plt.legend()
    plt.savefig('LSTM_velocity_high_res.png', dpi=300)
    plt.show()

## LSTM velocity 4 time steps

In [None]:
df_complete_LSTM_RNN = df_complete_LSTM_RNN_ref.copy()

# Select features and target

features = ['ex_note_numb', 'note_numb', 'onbeat', 'PC', 'SC', 'HS', 'hammer-on', 'pull-off', 'bending', 'delta_onset_ms-1', 'velocity-1']
target = 'velocity'

X = df_complete_LSTM_RNN[features].values
y = df_complete_LSTM_RNN[target].values

# Data normalization 

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(df_complete_LSTM_RNN.drop(columns=['ex_note_numb'])[features[1:]].values)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Create sequences

def create_sequences(X, y, time_steps=4):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 4
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Remove sequences containing rows where 'ex_note_numb' == 2, 3, 4, or 5

indices_to_remove = df_complete_LSTM_RNN.index[df_complete_LSTM_RNN['ex_note_numb'].isin([2, 3, 4, 5])].tolist()

filtered_X_seq = []
filtered_y_seq = []

for i in range(len(X_seq)):
    sequence_indices = list(range(i, i + time_steps + 1))
    if not any(idx in indices_to_remove for idx in sequence_indices):
        filtered_X_seq.append(X_seq[i])
        filtered_y_seq.append(y_seq[i])  
        
filtered_X_seq = np.array(filtered_X_seq)
filtered_y_seq = np.array(filtered_y_seq)

if filtered_X_seq.size == 0:
    print("No sequences remaining after filtering.")
else:
    X_filtered = filtered_X_seq.reshape(filtered_X_seq.shape[0], time_steps, -1)
    X_train, X_test, y_train, y_test = train_test_split(X_filtered, filtered_y_seq, test_size=0.2, random_state=42, shuffle=True)

    # LSTM model
    
    model = Sequential()
    model.add(LSTM(units=40, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Add Early Stopping
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

    epochs_used = len(history.epoch)
    print(f"Number of epochs used: {epochs_used}")

    # Test set predict
    
    y_pred_scaled = model.predict(X_test)

    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1))       

    # Calculate MSE and R-squared on the test set
    
    mse_test = mean_squared_error(y_test, y_pred)
    r2_test = r2_score(y_test, y_pred)
    print(f'Mean Squared Error (Test): {mse_test}')
    print(f'R-squared (Test): {r2_test}')

    # Train set predict
    
    y_train_pred_scaled = model.predict(X_train)

    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
    y_train = scaler_y.inverse_transform(y_train.reshape(-1, 1))                 

    # Calculate MSE and R-squared on the training set
    
    mse_train = mean_squared_error(y_train, y_train_pred)
    r2_train = r2_score(y_train, y_train_pred)
    print(f'Mean Squared Error (Training): {mse_train}')
    print(f'R-squared (Training): {r2_train}')

    # Plot the real and predicted values for the first 100 test samples
    
    window_size = 100
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.flatten()[:window_size], label='Real Values', color='blue')
    plt.plot(y_pred.flatten()[:window_size], label='Predicted Values', color='red')
    plt.xlabel('Index')
    plt.ylabel('velocity')
    plt.ylim(0, 127)
    plt.legend()
    plt.savefig('LSTM-4_velocity_high_res.png', dpi=300)
    plt.show()

## RNN velocity

In [None]:
df_complete_LSTM_RNN = df_complete_LSTM_RNN_ref.copy()

# Select features and target

features = ['note_numb', 'onbeat', 'PC', 'SC', 'HS', 'hammer-on', 'pull-off', 'bending', 'delta_onset_ms-1', 'velocity-1']
target = 'velocity'

X = df_complete_LSTM_RNN[features].values
y = df_complete_LSTM_RNN[target].values

# Data normalization

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Create sequences

def create_sequences(X, y, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 1
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Remove sequences containing rows where 'ex_note_numb' == 2
indices_to_remove = df_complete_LSTM_RNN.index[df_complete_LSTM_RNN['ex_note_numb'] == 2].tolist()

filtered_X_seq = []
filtered_y_seq = []

for i in range(len(X_seq)):
    sequence_indices = list(range(i, i + time_steps + 1))
    if not any(idx in indices_to_remove for idx in sequence_indices):
        filtered_X_seq.append(X_seq[i])
        filtered_y_seq.append(y_seq[i])  
        
filtered_X_seq = np.array(filtered_X_seq)
filtered_y_seq = np.array(filtered_y_seq)

if filtered_X_seq.size == 0:
    print("No sequences remaining after filtering.")
else:
    X_filtered = filtered_X_seq.reshape(filtered_X_seq.shape[0], time_steps, -1)
    X_train, X_test, y_train, y_test = train_test_split(X_filtered, filtered_y_seq, test_size=0.2, random_state=42, shuffle=True)

    # RNN model
    
    model = Sequential()
    model.add(SimpleRNN(units=40, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Add Early Stopping
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

    epochs_used = len(history.epoch)
    print(f"Number of epochs used: {epochs_used}")

    # Test set predict
    
    y_pred_scaled = model.predict(X_test)

    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1))       

    # Calculate MSE and R-squared on the test set
    
    mse_test = mean_squared_error(y_test, y_pred)
    r2_test = r2_score(y_test, y_pred)
    print(f'Mean Squared Error (Test): {mse_test}')
    print(f'R-squared (Test): {r2_test}')

    # Train set predict
    
    y_train_pred_scaled = model.predict(X_train)

    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
    y_train = scaler_y.inverse_transform(y_train.reshape(-1, 1))                 

    # Calculate MSE and R-squared on the training set
    
    mse_train = mean_squared_error(y_train, y_train_pred)
    r2_train = r2_score(y_train, y_train_pred)
    print(f'Mean Squared Error (Training): {mse_train}')
    print(f'R-squared (Training): {r2_train}')

    # Plot the real and predicted values for the first 100 test samples
    window_size = 100
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.flatten()[:window_size], label='Real Values', color='blue')
    plt.plot(y_pred.flatten()[:window_size], label='Predicted Values', color='red')
    plt.xlabel('Index')
    plt.ylabel('velocity')
    plt.ylim(0, 127)
    plt.legend()
    plt.savefig('RNN_velocity_high_res.png', dpi=300)
    plt.show()

## RNN velocity 4 time steps

In [None]:
df_complete_LSTM_RNN = df_complete_LSTM_RNN_ref.copy()

# Select features and target

features = ['ex_note_numb', 'note_numb', 'onbeat', 'PC', 'SC', 'HS', 'hammer-on', 'pull-off', 'bending', 'delta_onset_ms-1', 'velocity-1']
target = 'velocity'

X = df_complete_LSTM_RNN.drop(columns=['ex_note_numb'])[features[1:]].values
y = df_complete_LSTM_RNN[target].values

# Data normalization

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1))

# Save the scalers to disk

joblib.dump(scaler_X, 'scaler_X_velocity.pkl')
joblib.dump(scaler_y, 'scaler_y_velocity.pkl')

# Create sequences

def create_sequences(X, y, time_steps=4):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:i+time_steps])
        ys.append(y[i+time_steps])
    return np.array(Xs), np.array(ys)

time_steps = 4
X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)

# Remove sequences containing rows where 'ex_note_numb' == 2, 3, 4, or 5

indices_to_remove = df_complete_LSTM_RNN.index[df_complete_LSTM_RNN['ex_note_numb'].isin([2, 3, 4, 5])].tolist()

filtered_X_seq = []
filtered_y_seq = []

for i in range(len(X_seq)):
    sequence_indices = list(range(i, i + time_steps + 1))
    if not any(idx in indices_to_remove for idx in sequence_indices):
        filtered_X_seq.append(X_seq[i])
        filtered_y_seq.append(y_seq[i])  
        
filtered_X_seq = np.array(filtered_X_seq)
filtered_y_seq = np.array(filtered_y_seq)

if filtered_X_seq.size == 0:
    print("No sequences remaining after filtering.")
else:
    X_filtered = filtered_X_seq.reshape(filtered_X_seq.shape[0], time_steps, -1)
    X_train, X_test, y_train, y_test = train_test_split(X_filtered, filtered_y_seq, test_size=0.2, random_state=42, shuffle=True)

    # RNN model
    
    model = Sequential()
    model.add(SimpleRNN(units=40, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dense(1))
    optimizer = Adam(learning_rate=0.0001, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='mean_squared_error')

    # Add Early Stopping
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    # Train the model
    
    history = model.fit(X_train, y_train, epochs=500, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping])

    epochs_used = len(history.epoch)
    print(f"Number of epochs used: {epochs_used}")

    # Test set predict
    
    y_pred_scaled = model.predict(X_test)

    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
    y_test = scaler_y.inverse_transform(y_test.reshape(-1, 1))       

    # Calculate MSE and R-squared on the test set
    
    mse_test = mean_squared_error(y_test, y_pred)
    r2_test = r2_score(y_test, y_pred)
    print(f'Mean Squared Error (Test): {mse_test}')
    print(f'R-squared (Test): {r2_test}')

    # Train set predict
    
    y_train_pred_scaled = model.predict(X_train)

    y_train_pred = scaler_y.inverse_transform(y_train_pred_scaled.reshape(-1, 1))
    y_train = scaler_y.inverse_transform(y_train.reshape(-1, 1))                 

    # Calculate MSE and R-squared on the training set
    
    mse_train = mean_squared_error(y_train, y_train_pred)
    r2_train = r2_score(y_train, y_train_pred)
    print(f'Mean Squared Error (Training): {mse_train}')
    print(f'R-squared (Training): {r2_train}')
    
    # Export the model for offline use
    
    model.save('RNN-4_regressor_velocity.h5')

    # Plot the real and predicted values for the first 100 test samples
    
    window_size = 100
    plt.figure(figsize=(10, 6))
    plt.plot(y_test.flatten()[:window_size], label='Real Values', color='blue')
    plt.plot(y_pred.flatten()[:window_size], label='Predicted Values', color='red')
    plt.xlabel('Index')
    plt.ylabel('velocity')
    plt.ylim(0, 127)
    plt.legend()
    plt.savefig('RNN-4_velocity_high_res.png', dpi=300)
    plt.show()