# What does the data look like?

In [None]:
import pandas as pd

df_data_dp = pd.read_csv("/content/dp_rep1.csv")
df_data_norm = pd.read_csv("/content/norm_rep1.csv")
df_data_sp = pd.read_csv("/content/sp_rep1.csv")

df_data_dp.head(), df_data_norm.head(), df_data_sp.head()

# Import Training Set

In [None]:
train_file_paths = [
    '/content/dp_rep1.csv', '/content/dp_rep2.csv', '/content/dp_rep3.csv', '/content/dp_rep4.csv', '/content/dp_rep5.csv', '/content/dp_rep6.csv', '/content/dp_rep7.csv', '/content/dp_rep8.csv',
    '/content/norm_rep1.csv', '/content/norm_rep2.csv', '/content/norm_rep3.csv', '/content/norm_rep4.csv', '/content/norm_rep5.csv', '/content/norm_rep6.csv', '/content/norm_rep7.csv', '/content/norm_rep8.csv',
    '/content/sp_rep1.csv', '/content/sp_rep2.csv', '/content/sp_rep3.csv', '/content/sp_rep4.csv', '/content/sp_rep5.csv', '/content/sp_rep6.csv', '/content/sp_rep7.csv', '/content/sp_rep8.csv'
]

# Load and concatenate all datasets
train_data = pd.concat([pd.read_csv(file_path) for file_path in train_file_paths], ignore_index=True)
train_data.shape

# Finalise the Training Set

In [None]:
# Finalise the v1 leading training data
x_train_v1 = train_data['Violin 1'].values.reshape(-1, 1)
y_train_v1 = train_data[['Violin 2', 'Viola', 'Cello']].values

x_train_v1.shape, y_train_v1.shape

# Import Validation Set


In [None]:
validation_file_paths = [
    '/content/dp_rep9.csv', '/content/dp_rep10.csv',
    '/content/norm_rep9.csv', '/content/norm_rep10.csv',
    '/content/sp_rep9.csv', '/content/sp_rep10.csv'
]

# Load and concatenate all datasets
validation_data = pd.concat([pd.read_csv(file_path) for file_path in validation_file_paths], ignore_index=True)
validation_data.shape

# Finalise the Validation Set

In [None]:
# Finalise the v1 leading validation data
x_validation_v1 = validation_data['Violin 1'].values.reshape(-1, 1)
y_validation_v1 = validation_data[['Violin 2', 'Viola', 'Cello']].values

x_validation_v1.shape, y_validation_v1.shape

# Import Testing Set

In [None]:
test_file_paths = [
    '/content/dp_rep11.csv', '/content/dp_rep12.csv',
    '/content/norm_rep11.csv', '/content/norm_rep12.csv',
    '/content/sp_rep11.csv', '/content/sp_rep12.csv'
]

# Load and concatenate all datasets
test_data = pd.concat([pd.read_csv(file_path) for file_path in test_file_paths], ignore_index=True)
test_data.shape

# Finalise the Testing Set

In [None]:
# Finalise the v1 leading trainign data
x_test_v1 = test_data['Violin 1'].values.reshape(-1, 1)
y_test_v1 = test_data[['Violin 2', 'Viola', 'Cello']].values

x_test_v1.shape, y_test_v1.shape

# Standardise the Training Set

In [None]:
from sklearn.preprocessing import StandardScaler

# Standardize the x training data
scaler_x_train_v1 = StandardScaler().fit(x_train_v1)
x_train_v1_std = scaler_x_train_v1.transform(x_train_v1)

# Standardize the y training data
scaler_y_train_v1 = StandardScaler().fit(y_train_v1)
y_train_v1_std = scaler_y_train_v1.transform(y_train_v1)

x_train_v1_std.shape, y_train_v1_std.shape,

# Standardise the Validation Set

In [None]:
# Standardize the x validation data
scaler_x_validation_v1 = StandardScaler().fit(x_validation_v1)
x_validation_v1_std = scaler_x_validation_v1.transform(x_validation_v1)

# Standardize the y validation data
scaler_y_validation_v1 = StandardScaler().fit(y_validation_v1)
y_validation_v1_std = scaler_y_validation_v1.transform(y_validation_v1)

x_validation_v1_std.shape, y_validation_v1_std.shape,

# Standardise the Testing Set

In [None]:
# Standardize the x testing data
scaler_x_test_v1 = StandardScaler().fit(x_test_v1)
x_test_v1_std = scaler_x_test_v1.transform(x_test_v1)

# Standardize the y testing data
scaler_y_test_v1 = StandardScaler().fit(y_test_v1)
y_test_v1_std = scaler_y_test_v1.transform(y_test_v1)

x_test_v1_std.shape, y_test_v1_std.shape

# Model v7.0

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Bidirectional, BatchNormalization, LeakyReLU
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, LearningRateScheduler
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam

# Define the model
def sequence_prediction_model(input_shape, l1_reg=1e-4, l2_reg=1e-4):
    main_input = Input(shape=input_shape, name='main_input')

    # LSTM layers
    x = Bidirectional(LSTM(128, return_sequences=True, dropout=0.3, recurrent_dropout=0.2, kernel_regularizer=tf.keras.regularizers.l1_l2(l1=l1_reg, l2=l2_reg)))(main_input)
    x = BatchNormalization()(x)
    x = LSTM(128, return_sequences=False, dropout=0.3, recurrent_dropout=0.2)(x)
    x = BatchNormalization()(x)

    # Dense layers
    violin2_output = Dense(1, name='violin2_dense')(x)
    violin2_output = LeakyReLU(name='violin2_output')(violin2_output)

    viola_output = Dense(1, name='viola_dense')(x)
    viola_output = LeakyReLU(name='viola_output')(viola_output)

    cello_output = Dense(1, name='cello_dense')(x)
    cello_output = LeakyReLU(name='cello_output')(cello_output)

    # Model compilation
    model = Model(inputs=main_input, outputs=[violin2_output, viola_output, cello_output])

    optimizer = Adam(learning_rate=3e-4)
    model.compile(optimizer=optimizer, loss='mse')

    return model

# Model instantiaiton
input_shape = (6000, 1)
sequence_prediction_model = sequence_prediction_model(input_shape)
sequence_prediction_model.summary()

# Learning rate schedule
def lr_schedule(epoch):
    if epoch < 5:
        return 3e-4
    else:
        return 1e-4

# Callbacks
lr_callback = LearningRateScheduler(lr_schedule, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=1e-6)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


# Training the model
history = sequence_prediction_model.fit(
    x_train_v1_std,
    {'violin2_output': y_train_v1_std[:, 0],
     'viola_output': y_train_v1_std[:, 1],
     'cello_output': y_train_v1_std[:, 2]},
    epochs=10,
    batch_size=32,
    validation_data=(x_validation_v1_std,
    {'violin2_output': y_validation_v1_std[:, 0],
    'viola_output': y_validation_v1_std[:, 1],
    'cello_output': y_validation_v1_std[:, 2]}),
    callbacks=[lr_callback, early_stopping]
)

# Model Evaluation

In [None]:
loss, violin2_mse, viola_mse, cello_mse = sequence_prediction_model.evaluate(
    x_test_v1_std,
    {'violin2_output': y_test_v1_std[:, 0],
     'viola_output': y_test_v1_std[:, 1],
     'cello_output': y_test_v1_std[:, 2]},
    verbose=1
)

print(f"Loss: {loss}, Violin2 MSE: {violin2_mse}, Viola MSE: {viola_mse}, Cello MSE: {cello_mse}")

# Standardise Prediction Data

In [None]:
# Load the pred datasets
pred_data = pd.read_csv('/content/dp_rep12.csv')
pred_data.shape

# Prepare data for the section where v1 is leading using the test dataset
x_pred_v1 = pred_data['Violin 1'].values.reshape(-1, 1)
y_pred_v1 = pred_data[['Violin 2', 'Viola', 'Cello']].values

x_pred_v1.shape, y_pred_v1.shape

from sklearn.preprocessing import StandardScaler

# Standardize the x testing data
scaler_x_pred_v1 = StandardScaler().fit(x_pred_v1)
x_pred_v1_std = scaler_x_pred_v1.transform(x_pred_v1)

# Standardize the y testing data
scaler_y_pred_v1 = StandardScaler().fit(y_pred_v1)
y_pred_v1_std = scaler_y_pred_v1.transform(y_pred_v1)

x_pred_v1_std.shape, y_pred_v1_std.shape

# Prediction

In [None]:
predictions = sequence_prediction_model.predict(x_pred_v1_std)

violin2_prediction, viola_prediction, cello_prediction = predictions

# Plot Prediction against True Value

In [None]:
import matplotlib.pyplot as plt

# Violin 2
plt.figure(figsize=(10, 6))
plt.plot(y_pred_v1_std[:, 0], label='True Values', color='blue')
plt.plot(violin2_prediction, label='Predictions', color='red', linestyle='--')
plt.title('Violin 2: True vs Predicted')
plt.legend()
plt.show()

# Viola
plt.figure(figsize=(10, 6))
plt.plot(y_pred_v1_std[:, 1], label='True Values', color='blue')
plt.plot(viola_prediction, label='Predictions', color='red', linestyle='--')
plt.title('Viola: True vs Predicted')
plt.legend()
plt.show()

# Cello
plt.figure(figsize=(10, 6))
plt.plot(y_pred_v1_std[:, 2], label='True Values', color='blue')
plt.plot(cello_prediction, label='Predictions', color='red', linestyle='--')
plt.title('Cello: True vs Predicted')
plt.legend()
plt.show()