In [3]:
import json
import os
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import KFold, train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2

# Define data folder
data_folder = '/content/Data'
output_folder = '/content/Output'

if not os.path.exists(data_folder):
    print("Data folder not found.")
os.makedirs(output_folder, exist_ok=True)
session_records = []

for filename in os.listdir(data_folder):
    if filename.endswith(".json"):
        file_path = os.path.join(data_folder, filename)
        with open(file_path, 'r') as file:
            player_data = json.load(file)
            for session in player_data['sessions']:
                session_record = session.copy()

                target_labels = {
                    "avgCompetence": round(session.get('avgCompetence', 0)),
                    "avgFlow": round(session.get('avgFlow', 0)),
                    "avgTension": round(session.get('avgTension', 0)),
                    "avgChallenge": round(session.get('avgChallenge', 0)),
                    "avgNegativeAffect": round(session.get('avgNegativeAffect', 0)),
                    "avgPositiveAffect": round(session.get('avgPositiveAffect', 0))
                }
                session_record.update(target_labels)

                normalization_factor = session.get('timesRestarted', 0) + 1
                for key, value in session.items():
                    if key not in target_labels and key not in ['timesRestarted', 'levelComplete']:
                        session_record[key] = value / normalization_factor

                walk_speed = 5
                sprint_speed = 10
                time_sprinting = session.get('timeSprinting', 0)
                time_walking = session.get('timeWalking', 0)
                total_distance = (time_sprinting * sprint_speed) + (time_walking * walk_speed)
                session_record['averageSpeed'] = total_distance / session.get('levelTime', 1)

                session_records.append(session_record)

df = pd.DataFrame(session_records)
target_columns = ['avgCompetence', 'avgFlow', 'avgTension', 'avgChallenge', 'avgNegativeAffect', 'avgPositiveAffect']
X = df.drop(columns=target_columns)
y = df[target_columns]

# Standardization
features_to_scale = ['levelTime', 'timesRestarted', 'timesDied', 'timeSprinting', 'timeWalking',
                'damageTakenNormal', 'damageTakenBomb', 'damageTakenBullet',
                'enemyJumpsMissed', 'enemyBulletsMissed', 'enemyBombsMissed',
                'bombsIgnited', 'timesPaused', 'detectionTime', 'collisions', 'averageSpeed']
scaler = StandardScaler()
X[features_to_scale] = scaler.fit_transform(X[features_to_scale])

print("Means:", scaler.mean_)
print("Scales:", scaler.scale_)

Means: [3.97287895e+01 1.49180328e+00 5.12568306e-01 2.72957648e+01
 1.24330243e+01 1.35027322e+00 4.03825137e-01 2.20464481e+00
 5.81748634e+00 1.53251366e+00 2.96010929e+00 3.39125683e+00
 3.33333333e-02 3.32922187e+01 6.92540984e+00 8.48746378e+00]
Scales: [14.8812342   1.27553204  0.37670868  9.99241378  5.91071603  0.75941865
  0.56369676  1.03115022  3.77709088  1.70013775  2.20201778  2.0637089
  0.14074354 25.3835224   2.95719093  0.4086001 ]


In [14]:
# Final Model Training (80/20 Split)
X_train_final, X_test_final, y_train_final, y_test_final = train_test_split(X, y, test_size=0.2)

final_model = Sequential([
    Dense(128, activation=LeakyReLU(), input_shape=(X_train_final.shape[1],), kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.4),
    Dense(64, activation=LeakyReLU(), kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.4),
    Dense(32, activation=LeakyReLU(), kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dense(y_train_final.shape[1])
])

final_model.compile(optimizer='adam', loss=tf.keras.losses.Huber(delta=1.0), metrics=['mae'])
early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

history_final = final_model.fit(X_train_final, y_train_final, epochs=150, validation_data=(X_test_final, y_test_final), callbacks=[early_stopping])
#history_final = final_model.fit(X_train_final, y_train_final, epochs=150, validation_data=(X_test_final, y_test_final))

y_final_pred = np.clip(np.round(final_model.predict(X_test_final)), 1, 5)

# Calculate accuracy and save results
rmses = []  # To store RMSE for each target
with open(os.path.join(output_folder, 'final_model_results.txt'), 'w') as rmse_results_file:
    for i, column in enumerate(target_columns):
        actual_values = y_test_final[column].values
        predicted_values = y_final_pred[:, i]

        # Calculate Root Mean Squared Error
        mse = np.mean((actual_values - predicted_values) ** 2)
        rmse = np.sqrt(mse)
        rmses.append(rmse)

        # Calculate Percentage Accuracy based on RMSE
        max_error = 4  # Maximum error for range 1 to 5
        percentage_accuracy = (1 - (rmse / max_error)) * 100

        # Write RMSE and Percentage Accuracy result for each target to file
        rmse_results_file.write(f'Root Mean Squared Error for {column}: {rmse:.2f}\n')
        rmse_results_file.write(f'Percentage Accuracy for {column}: {percentage_accuracy:.2f}%\n')
        print(f'Root Mean Squared Error for {column}: {rmse:.2f}')
        print(f'Percentage Accuracy for {column}: {percentage_accuracy:.2f}%')

    # Calculate and write overall RMSE and Percentage Accuracy
    overall_rmse = np.mean(rmses)
    overall_percentage_accuracy = (1 - (overall_rmse / max_error)) * 100
    rmse_results_file.write(f'Overall Root Mean Squared Error: {overall_rmse:.2f}\n')
    rmse_results_file.write(f'Overall Percentage Accuracy: {overall_percentage_accuracy:.2f}%\n')
    print(f'Overall Root Mean Squared Error: {overall_rmse:.2f}')
    print(f'Overall Percentage Accuracy: {overall_percentage_accuracy:.2f}%\n')

# Final Model Visualization
for i, column in enumerate(target_columns):
    plt.figure(figsize=(8, 6))
    sns.scatterplot(x=y_test_final[column], y=y_final_pred[:, i], color='blue')
    plt.plot([1, 5], [1, 5], 'r--')
    plt.title(f'Final Model: Actual vs Predicted for {column}')
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.xlim(1, 5)
    plt.ylim(1, 5)
    plt.grid()
    plt.savefig(os.path.join(output_folder, f'final_model_actual_vs_predicted_{column}.png'))
    plt.close()

# Final Loss and MAE Plots
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history_final.history['loss'], label='Training Loss')
plt.plot(history_final.history['val_loss'], label='Validation Loss')
plt.title('Final Model - Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history_final.history['mae'], label='Training MAE')
plt.plot(history_final.history['val_mae'], label='Validation MAE')
plt.title('Final Model - MAE')
plt.ylabel('Mean Absolute Error')
plt.xlabel('Epoch')
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(output_folder, 'final_model_loss_mae.png'))
plt.close()

Epoch 1/150


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4s/step - loss: 2.7660 - mae: 3.0775 - val_loss: 2.7097 - val_mae: 3.0454
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 29ms/step - loss: 2.7747 - mae: 3.0957 - val_loss: 2.7110 - val_mae: 3.0477
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 2.7572 - mae: 3.0827 - val_loss: 2.7153 - val_mae: 3.0532
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 2.7440 - mae: 3.0642 - val_loss: 2.7166 - val_mae: 3.0555
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - loss: 2.7399 - mae: 3.0635 - val_loss: 2.7248 - val_mae: 3.0646
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - loss: 2.7245 - mae: 3.0492 - val_loss: 2.7316 - val_mae: 3.0721
Epoch 7/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - loss: 2.7235 - mae: 3.0462 -

In [15]:
output_folder_models = "Models"
os.makedirs(output_folder, exist_ok=True)

# Save model in TensorFlow SavedModel format
saved_model_path = os.path.join(output_folder_models, "final_model_saved")
final_model.export(saved_model_path)
print(f"Model saved in SavedModel format at '{saved_model_path}'")

# Save model in Keras HDF5 format
h5_model_path = os.path.join(output_folder_models, "final_model.h5")
final_model.save(h5_model_path)
print(f"Model saved as Keras .h5 format at '{h5_model_path}'")

# Save model in Keras .keras format
keras_model_path = os.path.join(output_folder_models, "final_model.keras")
final_model.save(keras_model_path)
print(f"Model saved as Keras .keras format at '{keras_model_path}'")

# Save Final Model to TensorFlow Lite format
converter = tf.lite.TFLiteConverter.from_keras_model(final_model)
tflite_model = converter.convert()
tflite_model_path = os.path.join(output_folder_models, 'final_model.tflite')
with open(tflite_model_path, 'wb') as f:
    f.write(tflite_model)
print(f"Model converted to TensorFlow Lite format and saved at '{tflite_model_path}'")

Saved artifact at 'Models/final_model_saved'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 17), dtype=tf.float32, name='keras_tensor_70')
Output Type:
  TensorSpec(shape=(None, 6), dtype=tf.float32, name=None)
Captures:
  138511616558704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616561344: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616562752: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616563280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616557824: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616558352: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616566096: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616565920: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616566800: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616568912: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616565



Model saved in SavedModel format at 'Models/final_model_saved'
Model saved as Keras .h5 format at 'Models/final_model.h5'
Model saved as Keras .keras format at 'Models/final_model.keras'
Saved artifact at '/tmp/tmp_r4ejq11'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 17), dtype=tf.float32, name='keras_tensor_70')
Output Type:
  TensorSpec(shape=(None, 6), dtype=tf.float32, name=None)
Captures:
  138511616558704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616561344: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616562752: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616563280: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616557824: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616558352: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616566096: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138511616565920: TensorSpec(shape=(), dtype

In [17]:
!pip install tf2onnx
import tf2onnx

onnx_model_path = os.path.join(output_folder_models, "final_model.onnx")
# Convert from SavedModel to ONNX
!python -m tf2onnx.convert --saved-model ./Models/final_model_saved --output ./Models/final_model.onnx --opset 13
print(f"Model converted to ONNX format and saved at '{onnx_model_path}'")

!pip install onnxruntime
import onnx
import onnxruntime as ort

# Load and check the ONNX model
onnx_model = onnx.load(onnx_model_path)
onnx.checker.check_model(onnx_model)

# Run inference with ONNX Runtime
ort_session = ort.InferenceSession(onnx_model_path)
print("ONNX model loaded and verified successfully.")

I0000 00:00:1730826072.524431   19756 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1730826072.577658   19756 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1730826072.577964   19756 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1730826072.578677   19756 cuda_executor.cc:1015] successful NUMA node read from SysFS ha

In [20]:
# K-Fold Cross Validation
kfold = KFold(n_splits=5, shuffle=True)
overall_accuracies = {column: [] for column in target_columns}
model_results = []

for fold, (train_index, test_index) in enumerate(kfold.split(X)):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Model Setup
    model = Sequential([
        Dense(128, activation=LeakyReLU(), input_shape=(X_train_final.shape[1],), kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.4),
        Dense(64, activation=LeakyReLU(), kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dropout(0.4),
        Dense(32, activation=LeakyReLU(), kernel_regularizer=l2(0.001)),
        BatchNormalization(),
        Dense(y_train_final.shape[1])
    ])

    model.compile(optimizer='adam', loss=tf.keras.losses.Huber(delta=1.0), metrics=['mae'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)

    # Train Model
    history = model.fit(X_train, y_train, epochs=150, validation_data=(X_test, y_test), callbacks=[early_stopping])
    #history = model.fit(X_train, y_train, epochs=150, validation_data=(X_test, y_test))

    # Predictions
    y_pred = np.clip(np.round(model.predict(X_test)), 1, 5)

    # Calculate RMSE and Percentage Accuracy
    rmses = []  # To store RMSE for each target
    percentage_accuracies = []  # To store percentage accuracy for each target
    max_error = 4  # Maximum error for range 1 to 5
    print(f'Fold {fold + 1}:')
    for i, column in enumerate(target_columns):
        actual_values = y_test[column].values
        predicted_values = y_pred[:, i]

        # Calculate RMSE
        mse = np.mean((actual_values - predicted_values) ** 2)
        rmse = np.sqrt(mse)
        rmses.append(rmse)

        # Calculate Percentage Accuracy
        percentage_accuracy = (1 - (rmse / max_error)) * 100
        percentage_accuracies.append(percentage_accuracy)

        # Print Results for Each Target Variable
        print(f'Root Mean Squared Error for {column}: {rmse:.2f}')
        print(f'Percentage Accuracy for {column}: {percentage_accuracy:.2f}%')

    # Overall RMSE and Percentage Accuracy
    overall_rmse = np.mean(rmses)
    overall_percentage_accuracy = (1 - (overall_rmse / max_error)) * 100
    model_results.append({"fold": fold + 1, "rmses": rmses, "percentage_accuracies": percentage_accuracies})

    # Print Overall Results
    print(f'Overall Root Mean Squared Error: {overall_rmse:.2f}')
    print(f'Overall Percentage Accuracy: {overall_percentage_accuracy:.2f}%\n')

    # Visualization for Actual vs Predicted
    for i, column in enumerate(target_columns):
        plt.figure(figsize=(8, 6))
        sns.scatterplot(x=y_test[column], y=y_pred[:, i], color='blue')
        plt.plot([1, 5], [1, 5], 'r--')  # Perfect Prediction Line
        plt.title(f'Actual vs Predicted for {column} - Fold {fold + 1}')
        plt.xlabel('Actual Values')
        plt.ylabel('Predicted Values')
        plt.xlim(1, 5)
        plt.ylim(1, 5)
        plt.grid()
        plt.savefig(os.path.join(output_folder, f'fold_{fold + 1}_actual_vs_predicted_{column}.png'))
        plt.close()

    # Loss and MAE Plots
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'Fold {fold + 1} - Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(history.history['mae'], label='Training MAE')
    plt.plot(history.history['val_mae'], label='Validation MAE')
    plt.title(f'Fold {fold + 1} - MAE')
    plt.ylabel('Mean Absolute Error')
    plt.xlabel('Epoch')
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(output_folder, f'fold_{fold + 1}_loss_mae_fold.png'))
    plt.close()

# Save K-Fold Results with formatted output
with open(os.path.join(output_folder, 'kfold_model_results.txt'), 'w') as results_file:
    overall_rmses = np.zeros(len(target_columns))
    overall_percentage_accuracies = np.zeros(len(target_columns))

    for result in model_results:
        results_file.write(f"Fold {result['fold']}:\n")
        for i, column in enumerate(target_columns):
            rms = result["rmses"][i]
            percentage_acc = result["percentage_accuracies"][i]
            results_file.write(f'Root Mean Squared Error for {column}: {rms:.2f}\n')
            results_file.write(f'Percentage Accuracy for {column}: {percentage_acc:.2f}%\n')
            overall_rmses[i] += rms
            overall_percentage_accuracies[i] += percentage_acc

        overall_rmse_fold = np.mean(result["rmses"])
        overall_percentage_accuracy_fold = (1 - (overall_rmse_fold / max_error)) * 100
        results_file.write(f'Overall Root Mean Squared Error: {overall_rmse_fold:.2f}\n')
        results_file.write(f'Overall Percentage Accuracy: {overall_percentage_accuracy_fold:.2f}%\n\n')

    # Calculate overall metrics across all folds
    num_folds = len(model_results)
    results_file.write("Across all folds:\n")
    print("Across all folds:")
    for i, column in enumerate(target_columns):
        avg_rmse = overall_rmses[i] / num_folds
        results_file.write(f'Root Mean Squared Error for {column}: {avg_rmse:.2f}\n')
        print(f'Root Mean Squared Error for {column}: {avg_rmse:.2f}')
        avg_accuracy = overall_percentage_accuracies[i] / num_folds
        results_file.write(f'Percentage Accuracy for {column}: {avg_accuracy:.2f}%\n')
        print(f'Percentage Accuracy for {column}: {avg_accuracy:.2f}%')

    # Calculate overall RMSE and Percentage Accuracy for all folds
    overall_rmse_final = np.mean(overall_rmses / num_folds)
    overall_percentage_accuracy_final = (1 - (overall_rmse_final / max_error)) * 100
    results_file.write(f'Overall Root Mean Squared Error: {overall_rmse_final:.2f}\n')
    results_file.write(f'Overall Percentage Accuracy: {overall_percentage_accuracy_final:.2f}%\n')
    print(f'Overall Root Mean Squared Error: {overall_rmse_final:.2f}')
    print(f'Overall Percentage Accuracy: {overall_percentage_accuracy_final:.2f}%')

Epoch 1/150


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3s/step - loss: 2.7331 - mae: 3.0474 - val_loss: 2.7022 - val_mae: 3.0405
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - loss: 2.7648 - mae: 3.0915 - val_loss: 2.6912 - val_mae: 3.0302
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 2.7405 - mae: 3.0611 - val_loss: 2.6868 - val_mae: 3.0263
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 2.6946 - mae: 3.0167 - val_loss: 2.6808 - val_mae: 3.0206
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 2.7299 - mae: 3.0495 - val_loss: 2.6732 - val_mae: 3.0133
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 2.6859 - mae: 3.0067 - val_loss: 2.6603 - val_mae: 3.0007
Epoch 7/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 2.7175 - mae: 3.0467 -

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2s/step - loss: 2.7819 - mae: 3.1044 - val_loss: 2.6178 - val_mae: 2.9350
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 2.7639 - mae: 3.0857 - val_loss: 2.6061 - val_mae: 2.9220
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 2.7620 - mae: 3.0850 - val_loss: 2.5991 - val_mae: 2.9149
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 2.7538 - mae: 3.0808 - val_loss: 2.5887 - val_mae: 2.9046
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 2.7495 - mae: 3.0808 - val_loss: 2.5819 - val_mae: 2.9005
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 2.7429 - mae: 3.0743 - val_loss: 2.5743 - val_mae: 2.8953
Epoch 7/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 2.6964 - mae: 3.0291 -

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4s/step - loss: 2.8038 - mae: 3.1248 - val_loss: 2.8038 - val_mae: 3.1381
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 2.8210 - mae: 3.1433 - val_loss: 2.7833 - val_mae: 3.1194
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 2.7695 - mae: 3.0881 - val_loss: 2.7669 - val_mae: 3.1047
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 2.7278 - mae: 3.0466 - val_loss: 2.7504 - val_mae: 3.0893
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - loss: 2.7219 - mae: 3.0468 - val_loss: 2.7369 - val_mae: 3.0765
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step - loss: 2.7649 - mae: 3.0836 - val_loss: 2.7197 - val_mae: 3.0601
Epoch 7/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 2.6890 - mae: 3.0170 -

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3s/step - loss: 2.7997 - mae: 3.1126 - val_loss: 2.8261 - val_mae: 3.1689
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 50ms/step - loss: 2.7771 - mae: 3.0998 - val_loss: 2.8060 - val_mae: 3.1493
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 2.7668 - mae: 3.0931 - val_loss: 2.7893 - val_mae: 3.1329
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step - loss: 2.6858 - mae: 3.0096 - val_loss: 2.7729 - val_mae: 3.1168
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - loss: 2.7139 - mae: 3.0343 - val_loss: 2.7591 - val_mae: 3.1032
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step - loss: 2.7350 - mae: 3.0647 - val_loss: 2.7419 - val_mae: 3.0862
Epoch 7/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - loss: 2.6902 - mae: 3.0196 -

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2s/step - loss: 2.8435 - mae: 3.1592 - val_loss: 2.5348 - val_mae: 2.8430
Epoch 2/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - loss: 2.8051 - mae: 3.1174 - val_loss: 2.5315 - val_mae: 2.8429
Epoch 3/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 2.7570 - mae: 3.0819 - val_loss: 2.5259 - val_mae: 2.8413
Epoch 4/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 2.7144 - mae: 3.0307 - val_loss: 2.5168 - val_mae: 2.8362
Epoch 5/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 2.7034 - mae: 3.0254 - val_loss: 2.5103 - val_mae: 2.8324
Epoch 6/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 2.6979 - mae: 3.0211 - val_loss: 2.5080 - val_mae: 2.8350
Epoch 7/150
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 2.6955 - mae: 3.0203 -

In [46]:
import shap

X_train_final_tensor = np.asarray(X_train_final).astype('float32')
X_test_final_tensor = np.asarray(X_test_final).astype('float32')

# Initialize SHAP Explainer
explainer = shap.KernelExplainer(final_model.predict, X_train_final_tensor)

# Calculate SHAP values for the test set
shap_values = explainer.shap_values(X_test_final_tensor)

target_columns = ['avgCompetence', 'avgFlow', 'avgTension', 'avgChallenge', 'avgNegativeAffect', 'avgPositiveAffect']
num_targets = len(target_columns)  # Get the number of target variables

# Check if shap_values is a list or an array and get the shape accordingly
if isinstance(shap_values, list):
    shap_values_shape = [shap_values[i].shape for i in range(num_targets)]
else:
    shap_values_shape = shap_values.shape  # Expecting shape (num_samples, num_features, num_targets)

# Ensure the target index does not exceed the available targets
for target_index in range(num_targets):
    # Select shap values for the target variable
    if isinstance(shap_values, list):
        shap_values_target = shap_values[target_index]
    else:
        shap_values_target = shap_values[:, :, target_index]  # Select shap values for the target if it's a 3D array

    # Plot summary of SHAP values for the selected target variable
    shap.summary_plot(shap_values_target,
                      X_test_final.values,
                      feature_names=X_test_final.columns.tolist(),
                      show=False)

    # Save the plot with the corresponding target variable name
    plt.savefig(os.path.join(output_folder, f'shap_feature_importance_{target_columns[target_index]}.png'))
    plt.close()

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 


  0%|          | 0/13 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m3123/3123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3123/3123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m3123/3123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m3123/3123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m3123/3123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m3123/3123[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m3123/3123[0m [32m━━━━━━━━━━━

In [47]:
!zip -r output.zip Output/
!zip -r models.zip Models/

updating: Output/ (stored 0%)
updating: Output/final_model_actual_vs_predicted_avgTension.png (deflated 11%)
updating: Output/fold_3_actual_vs_predicted_avgPositiveAffect.png (deflated 11%)
updating: Output/fold_5_loss_mae_fold.png (deflated 5%)
updating: Output/fold_4_actual_vs_predicted_avgPositiveAffect.png (deflated 11%)
updating: Output/fold_4_actual_vs_predicted_avgCompetence.png (deflated 11%)
updating: Output/fold_1_actual_vs_predicted_avgPositiveAffect.png (deflated 11%)
updating: Output/final_model_results.txt (deflated 71%)
updating: Output/fold_1_actual_vs_predicted_avgChallenge.png (deflated 11%)
updating: Output/fold_1_actual_vs_predicted_avgTension.png (deflated 12%)
updating: Output/fold_1_actual_vs_predicted_avgFlow.png (deflated 11%)
updating: Output/fold_3_actual_vs_predicted_avgNegativeAffect.png (deflated 11%)
updating: Output/fold_3_actual_vs_predicted_avgChallenge.png (deflated 11%)
updating: Output/final_model_actual_vs_predicted_avgCompetence.png (deflated 11%)