# Shuttle

In [1]:
import pandas as pd
from skeLCS import eLCS
from sklearn.metrics import accuracy_score, balanced_accuracy_score, classification_report
from sklearn.metrics import fbeta_score, roc_auc_score, average_precision_score, matthews_corrcoef
import numpy as np
from collections import defaultdict

# Load the training dataset
train_data = pd.read_csv("shuttle_train.csv")

# Load the test dataset
test_data = pd.read_csv("shuttle_test.csv")

# Parameters
num_runs = 5  # Number of iterations

# Initialize accumulators
accuracy_scores = []
balanced_accuracy_scores = []
f2_scores = []
#auc_roc_scores = []
auc_pr_scores = []
mcc_scores = []

# Initialize metrics accumulators for classification report
class_metrics = defaultdict(lambda: defaultdict(float))  # Stores precision/recall/F1 for each class

for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    
    # Shuffle data
    train_data = train_data.sample(frac=1).reset_index(drop=True)
    test_data = test_data.sample(frac=1).reset_index(drop=True)
    
    # Replace infinities and fill NaN
    train_data.replace([np.inf, -np.inf], np.nan, inplace=True)
    train_data.fillna(train_data.mean(), inplace=True)
    test_data.replace([np.inf, -np.inf], np.nan, inplace=True)
    test_data.fillna(test_data.mean(), inplace=True)
    
    # Split features and labels
    train_y = train_data['label'].values
    train_X = train_data.drop(['label'], axis=1).values
    test_y = test_data['label'].values
    test_X = test_data.drop(['label'], axis=1).values
    
    # Train the model
    model = eLCS()
    trainedModel = model.fit(train_X,train_y)
    
    # Make predictions
    test_predictions = model.predict(test_X)
    test_probabilities = model.predict_proba(test_X)
    
    # Calculate accuracy
    accuracy = accuracy_score(test_y, test_predictions)
    accuracy_scores.append(accuracy)
    
    # Calculate balanced accuracy
    balanced_accuracy = balanced_accuracy_score(test_y, test_predictions)
    balanced_accuracy_scores.append(balanced_accuracy)
    
    # Calculate F2-Score
    f2_score = fbeta_score(test_y, test_predictions, beta=2, average='weighted')
    f2_scores.append(f2_score)
    
    # Calculate AUC-ROC (One-vs-Rest)
    #auc_roc = roc_auc_score(test_y, test_probabilities, multi_class='ovr')
    #auc_roc_scores.append(auc_roc)
    
    # Calculate AUC-PR (One-vs-Rest)
    auc_pr = average_precision_score(test_y, test_probabilities, average='weighted')
    auc_pr_scores.append(auc_pr)
    
    # Calculate MCC
    mcc = matthews_corrcoef(test_y, test_predictions)
    mcc_scores.append(mcc)
    
    # Classification Report
    report = classification_report(test_y, test_predictions, output_dict=True, zero_division=0)
    
    for class_label, metrics in report.items():
        if isinstance(metrics, dict):  # Skip non-class metrics
            for metric_name, metric_value in metrics.items():
                class_metrics[class_label][metric_name] += metric_value

# Calculate averages
average_accuracy = np.mean(accuracy_scores)
balanced_average_accuracy = np.mean(balanced_accuracy_scores)
average_f2_score = np.mean(f2_scores)
#average_auc_roc = np.mean(auc_roc_scores)
average_auc_pr = np.mean(auc_pr_scores)
average_mcc = np.mean(mcc_scores)

# Average the classification report metrics
average_class_metrics = {
    class_label: {metric_name: metric_value / num_runs for metric_name, metric_value in metrics.items()}
    for class_label, metrics in class_metrics.items()
}

# Output results
print(f"Average Test Accuracy: {average_accuracy:.4f}")
print(f"Average Balanced Test Accuracy: {balanced_average_accuracy:.4f}")
print(f"Average F2-Score (Weighted): {average_f2_score:.4f}")
#print(f"Average AUC-ROC (One-vs-Rest): {average_auc_roc:.4f}")
print(f"Average AUC-PR: {average_auc_pr:.4f}")
print(f"Average Matthews Correlation Coefficient: {average_mcc:.4f}")

print("\nAverage Classification Report:")
for class_label, metrics in average_class_metrics.items():
    print(f"Class {class_label}:")
    for metric_name, metric_value in metrics.items():
        print(f"  {metric_name}: {metric_value:.4f}")


Run 1/5
Run 2/5
Run 3/5
Run 4/5
Run 5/5
Average Test Accuracy: 0.8451
Average Balanced Test Accuracy: 0.2866
Average F2-Score (Weighted): 0.8163
Average AUC-PR: 0.9602
Average Matthews Correlation Coefficient: 0.5102

Average Classification Report:
Class 1:
  precision: 0.8363
  recall: 1.0000
  f1-score: 0.9108
  support: 9117.0000
Class 2:
  precision: 0.0000
  recall: 0.0000
  f1-score: 0.0000
  support: 10.0000
Class 3:
  precision: 0.0000
  recall: 0.0000
  f1-score: 0.0000
  support: 34.0000
Class 4:
  precision: 0.2000
  recall: 0.0254
  f1-score: 0.0450
  support: 1781.0000
Class 5:
  precision: 0.9829
  recall: 0.9810
  f1-score: 0.9817
  support: 653.0000
Class 6:
  precision: 0.0000
  recall: 0.0000
  f1-score: 0.0000
  support: 2.0000
Class 7:
  precision: 0.0000
  recall: 0.0000
  f1-score: 0.0000
  support: 3.0000
Class macro avg:
  precision: 0.2885
  recall: 0.2866
  f1-score: 0.2768
  support: 11600.0000
Class weighted avg:
  precision: 0.7433
  recall: 0.8451
  f1-sco

# Covertype

In [2]:
import pandas as pd
from skeLCS import eLCS
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import fbeta_score, roc_auc_score, average_precision_score, matthews_corrcoef
import numpy as np
from collections import defaultdict

# Load the training dataset
train_data = pd.read_csv("covtype_train.csv")

# Load the test dataset
test_data = pd.read_csv("covtype_test.csv")

# Parameters
num_runs = 5  # Number of iterations

# Initialize accumulators
accuracy_scores = []
balanced_accuracy_scores = []
f2_scores = []
#auc_roc_scores = []
auc_pr_scores = []
mcc_scores = []

# Initialize metrics accumulators for classification report
class_metrics = defaultdict(lambda: defaultdict(float))  # Stores precision/recall/F1 for each class

for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    
    # Shuffle data
    train_data = train_data.sample(frac=1).reset_index(drop=True)
    test_data = test_data.sample(frac=1).reset_index(drop=True)
    
    # Replace infinities and fill NaN
    train_data.replace([np.inf, -np.inf], np.nan, inplace=True)
    train_data.fillna(train_data.mean(), inplace=True)
    test_data.replace([np.inf, -np.inf], np.nan, inplace=True)
    test_data.fillna(test_data.mean(), inplace=True)
    
    # Split features and labels
    train_y = train_data['label'].values
    train_X = train_data.drop(['label'], axis=1).values
    test_y = test_data['label'].values
    test_X = test_data.drop(['label'], axis=1).values
    
    # Train the model
    model = eLCS()
    trainedModel = model.fit(train_X,train_y)
    
    # Make predictions
    test_predictions = model.predict(test_X)
    test_probabilities = model.predict_proba(test_X)
    
    # Calculate accuracy
    accuracy = accuracy_score(test_y, test_predictions)
    accuracy_scores.append(accuracy)
    
    # Calculate balanced accuracy
    balanced_accuracy = balanced_accuracy_score(test_y, test_predictions)
    balanced_accuracy_scores.append(balanced_accuracy)
    
    # Calculate F2-Score
    f2_score = fbeta_score(test_y, test_predictions, beta=2, average='weighted')
    f2_scores.append(f2_score)
    
    # Calculate AUC-ROC (One-vs-Rest)
    #auc_roc = roc_auc_score(test_y, test_probabilities, multi_class='ovr')
    #auc_roc_scores.append(auc_roc)
    
    # Calculate AUC-PR (One-vs-Rest)
    auc_pr = average_precision_score(test_y, test_probabilities, average='weighted')
    auc_pr_scores.append(auc_pr)
    
    # Calculate MCC
    mcc = matthews_corrcoef(test_y, test_predictions)
    mcc_scores.append(mcc)
    
    # Classification Report
    report = classification_report(test_y, test_predictions, output_dict=True, zero_division=0)
    
    for class_label, metrics in report.items():
        if isinstance(metrics, dict):  # Skip non-class metrics
            for metric_name, metric_value in metrics.items():
                class_metrics[class_label][metric_name] += metric_value

# Calculate averages
average_accuracy = np.mean(accuracy_scores)
balanced_average_accuracy = np.mean(balanced_accuracy_scores)
average_f2_score = np.mean(f2_scores)
#average_auc_roc = np.mean(auc_roc_scores)
average_auc_pr = np.mean(auc_pr_scores)
average_mcc = np.mean(mcc_scores)

# Average the classification report metrics
average_class_metrics = {
    class_label: {metric_name: metric_value / num_runs for metric_name, metric_value in metrics.items()}
    for class_label, metrics in class_metrics.items()
}

# Output results
print(f"Average Test Accuracy: {average_accuracy:.4f}")
print(f"Average Balanced Test Accuracy: {balanced_average_accuracy:.4f}")
print(f"Average F2-Score (Weighted): {average_f2_score:.4f}")
#print(f"Average AUC-ROC (One-vs-Rest): {average_auc_roc:.4f}")
print(f"Average AUC-PR: {average_auc_pr:.4f}")
print(f"Average Matthews Correlation Coefficient: {average_mcc:.4f}")

print("\nAverage Classification Report:")
for class_label, metrics in average_class_metrics.items():
    print(f"Class {class_label}:")
    for metric_name, metric_value in metrics.items():
        print(f"  {metric_name}: {metric_value:.4f}")


Run 1/5
Run 2/5
Run 3/5
Run 4/5
Run 5/5
Average Test Accuracy: 0.6833
Average Balanced Test Accuracy: 0.3532
Average F2-Score (Weighted): 0.6722
Average AUC-PR: 0.6916
Average Matthews Correlation Coefficient: 0.4706

Average Classification Report:
Class 1:
  precision: 0.6909
  recall: 0.6171
  f1-score: 0.6506
  support: 42368.0000
Class 2:
  precision: 0.6904
  recall: 0.8279
  f1-score: 0.7525
  support: 56661.0000
Class 3:
  precision: 0.6231
  recall: 0.7174
  f1-score: 0.6615
  support: 7151.0000
Class 4:
  precision: 0.0571
  recall: 0.0058
  f1-score: 0.0106
  support: 549.0000
Class 5:
  precision: 0.0000
  recall: 0.0000
  f1-score: 0.0000
  support: 1899.0000
Class 6:
  precision: 0.4011
  recall: 0.0530
  f1-score: 0.0784
  support: 3473.0000
Class 7:
  precision: 0.7372
  recall: 0.2513
  f1-score: 0.3622
  support: 4102.0000
Class macro avg:
  precision: 0.4571
  recall: 0.3532
  f1-score: 0.3594
  support: 116203.0000
Class weighted avg:
  precision: 0.6652
  recall: 0.

# KDD

In [2]:
import pandas as pd
from skeLCS import eLCS
from sklearn.metrics import accuracy_score, balanced_accuracy_score, classification_report
from sklearn.metrics import fbeta_score, roc_auc_score, average_precision_score, matthews_corrcoef
import numpy as np
from collections import defaultdict

# Define label mapping for KDD dataset labels
label_mapping = {
    'normal.': 0, 'satan.': 1, 'ipsweep.': 2, 'portsweep.': 3, 'nmap.': 4,
    'back.': 5, 'warezclient.': 6, 'teardrop.': 7, 'pod.': 8, 'guess_passwd.': 9,
    'buffer_overflow.': 10, 'land.': 11, 'warezmaster.': 12, 'imap.': 13, 'rootkit.': 14,
    'loadmodule.': 15, 'multihop.': 16, 'ftp_write.': 17, 'phf.': 18, 'perl.': 19, 'spy.': 20
}

# Load the training dataset
train_data = pd.read_csv("kdd_train.csv")
train_data['label'] = train_data['label'].map(label_mapping)  # Convert labels to numeric values

# Load the test dataset
test_data = pd.read_csv("kdd_test.csv")
test_data['label'] = test_data['label'].map(label_mapping)  # Convert labels to numeric values

# Parameters
num_runs = 5  # Number of iterations

# Initialize accumulators
accuracy_scores = []
balanced_accuracy_scores = []
f2_scores = []
#auc_roc_scores = []
auc_pr_scores = []
mcc_scores = []

# Initialize metrics accumulators for classification report
class_metrics = defaultdict(lambda: defaultdict(float))  # Stores precision/recall/F1 for each class

for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    
    # Shuffle data
    train_data = train_data.sample(frac=1).reset_index(drop=True)
    test_data = test_data.sample(frac=1).reset_index(drop=True)
    
    # Replace infinities and fill NaN
    train_data.replace([np.inf, -np.inf], np.nan, inplace=True)
    train_data.fillna(train_data.mean(), inplace=True)
    test_data.replace([np.inf, -np.inf], np.nan, inplace=True)
    test_data.fillna(test_data.mean(), inplace=True)
    
    # Split features and labels
    train_y = train_data['label'].values
    train_X = train_data.drop(['label'], axis=1).values
    test_y = test_data['label'].values
    test_X = test_data.drop(['label'], axis=1).values
    
    # Train the model
    model = eLCS()
    trainedModel = model.fit(train_X,train_y)
    
    # Make predictions
    test_predictions = model.predict(test_X)
    test_probabilities = model.predict_proba(test_X)

    print(test_probabilities)  # Inspect the first few probability vectors
    print(np.sum(test_probabilities, axis=1))  # Check if they sum to 1

    test_probabilities = test_probabilities / np.sum(test_probabilities, axis=1, keepdims=True)
    print(test_probabilities)  # Inspect the first few probability vectors
    print(np.sum(test_probabilities, axis=1))  # Check if they sum to 1
    
    # Calculate accuracy
    accuracy = accuracy_score(test_y, test_predictions)
    accuracy_scores.append(accuracy)
    
    # Calculate balanced accuracy
    balanced_accuracy = balanced_accuracy_score(test_y, test_predictions)
    balanced_accuracy_scores.append(balanced_accuracy)
    
    # Calculate F2-Score
    f2_score = fbeta_score(test_y, test_predictions, beta=2, average='weighted')
    f2_scores.append(f2_score)
    
    # Calculate AUC-ROC (One-vs-Rest)
    #auc_roc = roc_auc_score(test_y, test_probabilities, multi_class='ovr')
    #auc_roc_scores.append(auc_roc)
    
    # Calculate AUC-PR (One-vs-Rest)
    #auc_pr = average_precision_score(test_y, test_probabilities, average='weighted')
    #auc_pr_scores.append(auc_pr)
    
    # Calculate MCC
    mcc = matthews_corrcoef(test_y, test_predictions)
    mcc_scores.append(mcc)
    
    # Classification Report
    report = classification_report(test_y, test_predictions, output_dict=True, zero_division=0)
    
    for class_label, metrics in report.items():
        if isinstance(metrics, dict):  # Skip non-class metrics
            for metric_name, metric_value in metrics.items():
                class_metrics[class_label][metric_name] += metric_value

# Calculate averages
average_accuracy = np.mean(accuracy_scores)
balanced_average_accuracy = np.mean(balanced_accuracy_scores)
average_f2_score = np.mean(f2_scores)
#average_auc_roc = np.mean(auc_roc_scores)
average_auc_pr = np.mean(auc_pr_scores)
average_mcc = np.mean(mcc_scores)

# Average the classification report metrics
average_class_metrics = {
    class_label: {metric_name: metric_value / num_runs for metric_name, metric_value in metrics.items()}
    for class_label, metrics in class_metrics.items()
}

# Output results
print(f"Average Test Accuracy: {average_accuracy:.4f}")
print(f"Average Balanced Test Accuracy: {balanced_average_accuracy:.4f}")
print(f"Average F2-Score (Weighted): {average_f2_score:.4f}")
#print(f"Average AUC-ROC (One-vs-Rest): {average_auc_roc:.4f}")
print(f"Average AUC-PR: {average_auc_pr:.4f}")
print(f"Average Matthews Correlation Coefficient: {average_mcc:.4f}")

print("\nAverage Classification Report:")
for class_label, metrics in average_class_metrics.items():
    print(f"Class {class_label}:")
    for metric_name, metric_value in metrics.items():
        print(f"  {metric_name}: {metric_value:.4f}")


Run 1/5
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
[1. 1. 1. ... 1. 1. 1.]
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
[1. 1. 1. ... 1. 1. 1.]


  test_probabilities = test_probabilities / np.sum(test_probabilities, axis=1, keepdims=True)


Run 2/5
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
[1. 1. 1. ... 1. 1. 1.]
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
[1. 1. 1. ... 1. 1. 1.]


  test_probabilities = test_probabilities / np.sum(test_probabilities, axis=1, keepdims=True)


Run 3/5
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
[1. 1. 1. ... 1. 1. 1.]
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
[1. 1. 1. ... 1. 1. 1.]


  test_probabilities = test_probabilities / np.sum(test_probabilities, axis=1, keepdims=True)


Run 4/5
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
[1. 1. 1. ... 1. 1. 1.]
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
[1. 1. 1. ... 1. 1. 1.]


  test_probabilities = test_probabilities / np.sum(test_probabilities, axis=1, keepdims=True)


Run 5/5
[[0. 1. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
[1. 1. 1. ... 1. 1. 1.]
[[0. 1. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]]
[1. 1. 1. ... 1. 1. 1.]


  test_probabilities = test_probabilities / np.sum(test_probabilities, axis=1, keepdims=True)


Average Test Accuracy: 0.9898
Average Balanced Test Accuracy: 0.2182
Average F2-Score (Weighted): 0.9888
Average AUC-PR: nan
Average Matthews Correlation Coefficient: 0.8760

Average Classification Report:
Class 0:
  precision: 0.9916
  recall: 0.9996
  f1-score: 0.9956
  support: 194557.0000
Class 1:
  precision: 0.9856
  recall: 0.8712
  f1-score: 0.9248
  support: 3178.0000
Class 2:
  precision: 0.8963
  recall: 0.8801
  f1-score: 0.8866
  support: 2496.0000
Class 3:
  precision: 0.9601
  recall: 0.9247
  f1-score: 0.9418
  support: 2083.0000
Class 4:
  precision: 0.9989
  recall: 0.3806
  f1-score: 0.5506
  support: 463.0000
Class 5:
  precision: 0.0000
  recall: 0.0000
  f1-score: 0.0000
  support: 441.0000
Class 6:
  precision: 0.0000
  recall: 0.0000
  f1-score: 0.0000
  support: 204.0000
Class 7:
  precision: 0.7988
  recall: 0.3102
  f1-score: 0.3832
  support: 196.0000
Class 8:
  precision: 0.4000
  recall: 0.0528
  f1-score: 0.0862
  support: 53.0000
Class 9:
  precision: 0.

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


# Darknet

In [3]:
import pandas as pd
from skeLCS import eLCS
from sklearn.metrics import accuracy_score, balanced_accuracy_score, classification_report
from sklearn.metrics import fbeta_score, roc_auc_score, average_precision_score, matthews_corrcoef
import numpy as np
from collections import defaultdict

# Define label mapping for Darknet dataset labels
label_mapping = {
    'Normal': 0, 'Darknet_Audio-Streaming': 1, 'Darknet_Chat': 2, 'Darknet_File-Transfer': 3, 'Darknet_VOIP': 4,
    'Darknet_Video-Streaming': 5, 'Darknet_Email': 6, 'Darknet_Browsing': 7, 'Darknet_P2P': 8
}

# Load the training dataset
train_data = pd.read_csv("darknet_train.csv")
train_data['label'] = train_data['label'].map(label_mapping)  # Convert labels to numeric values

# Load the test dataset
test_data = pd.read_csv("darknet_test.csv")
test_data['label'] = test_data['label'].map(label_mapping)  # Convert labels to numeric values

# Parameters
num_runs = 5  # Number of iterations

# Initialize accumulators
accuracy_scores = []
balanced_accuracy_scores = []
f2_scores = []
#auc_roc_scores = []
auc_pr_scores = []
mcc_scores = []

# Initialize metrics accumulators for classification report
class_metrics = defaultdict(lambda: defaultdict(float))  # Stores precision/recall/F1 for each class

for run in range(num_runs):
    print(f"Run {run + 1}/{num_runs}")
    
    # Shuffle data
    train_data = train_data.sample(frac=1).reset_index(drop=True)
    test_data = test_data.sample(frac=1).reset_index(drop=True)
    
    # Replace infinities and fill NaN
    train_data.replace([np.inf, -np.inf], np.nan, inplace=True)
    train_data.fillna(train_data.mean(), inplace=True)
    test_data.replace([np.inf, -np.inf], np.nan, inplace=True)
    test_data.fillna(test_data.mean(), inplace=True)
    
    # Split features and labels
    train_y = train_data['label'].values
    train_X = train_data.drop(['label'], axis=1).values
    test_y = test_data['label'].values
    test_X = test_data.drop(['label'], axis=1).values
    
    # Train the model
    model = eLCS()
    trainedModel = model.fit(train_X,train_y)
    
    # Make predictions
    test_predictions = model.predict(test_X)
    test_probabilities = model.predict_proba(test_X)
    
    # Calculate accuracy
    accuracy = accuracy_score(test_y, test_predictions)
    accuracy_scores.append(accuracy)
    
    # Calculate balanced accuracy
    balanced_accuracy = balanced_accuracy_score(test_y, test_predictions)
    balanced_accuracy_scores.append(balanced_accuracy)
    
    # Calculate F2-Score
    f2_score = fbeta_score(test_y, test_predictions, beta=2, average='weighted')
    f2_scores.append(f2_score)
    
    # Calculate AUC-ROC (One-vs-Rest)
    #auc_roc = roc_auc_score(test_y, test_probabilities, multi_class='ovr')
    #auc_roc_scores.append(auc_roc)
    
    # Calculate AUC-PR (One-vs-Rest)
    auc_pr = average_precision_score(test_y, test_probabilities, average='weighted')
    auc_pr_scores.append(auc_pr)
    
    # Calculate MCC
    mcc = matthews_corrcoef(test_y, test_predictions)
    mcc_scores.append(mcc)
    
    # Classification Report
    report = classification_report(test_y, test_predictions, output_dict=True, zero_division=0)
    
    for class_label, metrics in report.items():
        if isinstance(metrics, dict):  # Skip non-class metrics
            for metric_name, metric_value in metrics.items():
                class_metrics[class_label][metric_name] += metric_value

# Calculate averages
average_accuracy = np.mean(accuracy_scores)
balanced_average_accuracy = np.mean(balanced_accuracy_scores)
average_f2_score = np.mean(f2_scores)
#average_auc_roc = np.mean(auc_roc_scores)
average_auc_pr = np.mean(auc_pr_scores)
average_mcc = np.mean(mcc_scores)

# Average the classification report metrics
average_class_metrics = {
    class_label: {metric_name: metric_value / num_runs for metric_name, metric_value in metrics.items()}
    for class_label, metrics in class_metrics.items()
}

# Output results
print(f"Average Test Accuracy: {average_accuracy:.4f}")
print(f"Average Balanced Test Accuracy: {balanced_average_accuracy:.4f}")
print(f"Average F2-Score (Weighted): {average_f2_score:.4f}")
#print(f"Average AUC-ROC (One-vs-Rest): {average_auc_roc:.4f}")
print(f"Average AUC-PR: {average_auc_pr:.4f}")
print(f"Average Matthews Correlation Coefficient: {average_mcc:.4f}")

print("\nAverage Classification Report:")
for class_label, metrics in average_class_metrics.items():
    print(f"Class {class_label}:")
    for metric_name, metric_value in metrics.items():
        print(f"  {metric_name}: {metric_value:.4f}")


Run 1/5
Run 2/5
Run 3/5
Run 4/5
Run 5/5
Average Test Accuracy: 0.9500
Average Balanced Test Accuracy: 0.4195
Average F2-Score (Weighted): 0.9451
Average AUC-PR: 0.9570
Average Matthews Correlation Coefficient: 0.8045

Average Classification Report:
Class 0:
  precision: 0.9522
  recall: 0.9967
  f1-score: 0.9739
  support: 26862.0000
Class 1:
  precision: 0.9838
  recall: 0.8820
  f1-score: 0.9298
  support: 2657.0000
Class 2:
  precision: 0.9495
  recall: 0.8465
  f1-score: 0.8949
  support: 908.0000
Class 3:
  precision: 0.8665
  recall: 0.3111
  f1-score: 0.4111
  support: 522.0000
Class 4:
  precision: 0.4907
  recall: 0.0894
  f1-score: 0.1455
  support: 293.0000
Class 5:
  precision: 0.7527
  recall: 0.1019
  f1-score: 0.1777
  support: 269.0000
Class 6:
  precision: 0.6189
  recall: 0.1500
  f1-score: 0.2400
  support: 116.0000
Class 7:
  precision: 0.3337
  recall: 0.0566
  f1-score: 0.0914
  support: 53.0000
Class 8:
  precision: 0.1285
  recall: 0.3409
  f1-score: 0.1800
  su