In [1]:
import os

import matplotlib.pyplot as plt
import numpy as np

"Machine learning tools"
import pickle

from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import StratifiedKFold, train_test_split


from classification.datasets import Dataset
from classification.utils.audio_student import AudioUtil, Feature_vector_DS

from classification.utils.plots import (
    plot_decision_boundaries,
    plot_specgram,
    show_confusion_matrix,
)
from classification.utils.utils import accuracy

In [2]:
np.random.seed(0)

In [3]:
### TO RUN
dataset = Dataset()
classnames = dataset.list_classes()

print("\n".join(classnames))

chainsaw
fire
fireworks
gunshot


In [4]:
### TO RUN
fm_dir = "data/feature_matrices/"  # where to save the features matrices
new_dataset_dir = "src/classification/datasets/new_dataset/melvecs/"
model_dir = "data/models/xgboost"  # where to save the models
os.makedirs(fm_dir, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)

In [5]:
### TO RUN

"Creation of the dataset"
myds = Feature_vector_DS(dataset, Nft=512, nmel=20, duration=950, shift_pct=0.0)

"Some attributes..."
myds.nmel
myds.duration
myds.shift_pct
myds.sr
myds.data_aug
myds.ncol

idx = 0

# XGBOOST PARAMETERS
n_estimators = 231
max_depth = 7
learning_rate = 0.0538
subsample = 0.5679
colsample_bytree = 0.7183

In [6]:
# TRANSFORMATION ON FEATURE VECTOR

def add_noise(feature_vector, snr_db=20):
    """Adds white noise to a feature vector based on the given SNR (Signal-to-Noise Ratio)."""
    power_signal = np.mean(feature_vector ** 2)
    power_noise = power_signal / (10 ** (snr_db / 10))
    noise = np.random.normal(0, np.sqrt(power_noise), feature_vector.shape)
    return feature_vector + noise

def shifting(feature_vector, shift_max=20):
    """Shifts mel spectrogram feature vectors along the time axis by a random shift between 0 and shift_max."""
    shift = np.random.randint(0, shift_max)
    return np.roll(feature_vector, shift, axis=0)  # Rolling along the first axis

In [7]:
### FEATURE EXTRACTION FROM SOUND
"Random split of 70:30 between training and validation"
train_pct = 0.7

featveclen = len(myds["fire", 0, "", ""])  # number of items in a feature vector
nitems = len(myds)  # number of sounds in the dataset
naudio = dataset.naudio  # number of audio files in each class
nclass = dataset.nclass  # number of classes
nlearn = round(naudio * train_pct)  # number of sounds among naudio for training

data_aug_factor = 1
class_ids_aug = np.repeat(classnames, naudio * data_aug_factor)

X = np.zeros((data_aug_factor * nclass * naudio, featveclen))
for s in range(data_aug_factor):
    for class_idx, classname in enumerate(classnames):
        for idx in range(naudio):
            featvec = myds[classname, idx, "", "lowpass"]
            X[s * nclass * naudio + class_idx * naudio + idx, :] = featvec
np.save(fm_dir + "X_basic.npy", X)
y = class_ids_aug.copy()
np.save(fm_dir + "y_basic.npy", y)

print(f"Shape of the basic feature matrix : {X.shape}")
print(f"Number of labels : {len(y)}")




Shape of the basic feature matrix : (304, 400)
Number of labels : 304


We can now create a new augmented dataset and observe if the classification results improve. 

In [8]:
### AUGMENTED DATASET
list_augmentation = ["original", "noise", "shifting"]
myds.mod_data_aug(list_augmentation)
print("Number of transformations : ", myds.data_aug_factor)
y_basic_aug = np.repeat(classnames, dataset.naudio * myds.data_aug_factor)
X_basic_aug = np.zeros((myds.data_aug_factor * nclass * naudio, featveclen))

for s in range(len(list_augmentation)):
    aug = list_augmentation[s]
    for idx in range(dataset.naudio):
        for class_idx, classname in enumerate(classnames):
            featvec = myds[classname, idx, aug, "lowpass"]
            X_basic_aug[s * nclass * naudio + class_idx * naudio + idx, :] = featvec
            y_basic_aug[s * nclass * naudio + class_idx * naudio + idx] = classname

np.save(fm_dir + "X_basic_aug.npy", X_basic_aug)
np.save(fm_dir + "y_basic_aug.npy", y_basic_aug)

print(f"Shape of the feature matrix : {X_basic_aug.shape}")
print(f"------------------------------------------------------------")
print(f"200 of each transformation. Order : chainsaw1, fire1, fireworks1, gun1, chainsaw2, fire2, ...")


Number of transformations :  3
Shape of the feature matrix : (912, 400)
------------------------------------------------------------
200 of each transformation. Order : chainsaw1, fire1, fireworks1, gun1, chainsaw2, fire2, ...


FINAL MODEL SAVE

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix, precision_score, recall_score
from sklearn.preprocessing import LabelEncoder

# Load datasets
X_basic_aug = np.load(os.path.join(fm_dir, "X_basic_aug.npy"))
y_basic_aug = np.load(os.path.join(fm_dir, "y_basic_aug.npy"))

X_basic = np.load(os.path.join(fm_dir, "X_basic.npy"))
y_basic = np.load(os.path.join(fm_dir, "y_basic.npy"))

# Encode labels
label_encoder = LabelEncoder()
y_basic = label_encoder.fit_transform(y_basic)
y_basic_aug = label_encoder.transform(y_basic_aug)

# Split datasets
X_train, X_test, y_train, y_test = train_test_split(X_basic, y_basic, test_size=0.3, stratify=y_basic)
X_train_aug, X_test_aug, y_train_aug, y_test_aug = train_test_split(X_basic_aug, y_basic_aug, test_size=0.3, stratify=y_basic_aug)

# =========================
# SCENARIO A: WITH PCA (no aug)
# =========================
pca = PCA(n_components=0.99)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

pca_filename = os.path.join(model_dir, "pca_noaug_nonorm.pickle")
with open(pca_filename, "wb") as f:
    pickle.dump(pca, f)

xgb_pca = XGBClassifier(n_estimators=100, max_depth=5, learning_rate=0.1,
                        subsample=0.8, colsample_bytree=0.8, eval_metric='mlogloss', random_state=42)
xgb_pca.fit(X_train_pca, y_train)

model_filename = os.path.join(model_dir, "xgb_pca_noaug_nonorm.pickle")
with open(model_filename, "wb") as f:
    pickle.dump(xgb_pca, f)

# =========================
# SCENARIO B: WITHOUT PCA (no aug)
# =========================
xgb_no_pca = XGBClassifier(n_estimators=100, max_depth=5, learning_rate=0.1,
                           subsample=0.8, colsample_bytree=0.8, eval_metric='mlogloss', random_state=42)
xgb_no_pca.fit(X_train, y_train)

model_filename = os.path.join(model_dir, "xgb_nopca_noaug_nonorm.pickle")
with open(model_filename, "wb") as f:
    pickle.dump(xgb_no_pca, f)

# =========================
# SCENARIO C: WITH PCA (aug)
# =========================
pca = PCA(n_components=0.99)
X_train_aug_pca = pca.fit_transform(X_train_aug)
X_test_aug_pca = pca.transform(X_test_aug)

pca_filename = os.path.join(model_dir, "pca_aug_nonorm.pickle")
with open(pca_filename, "wb") as f:
    pickle.dump(pca, f)

xgb_model_pca = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate,
                              subsample=subsample, colsample_bytree=colsample_bytree,
                              eval_metric='mlogloss', random_state=42)
xgb_model_pca.fit(X_train_aug_pca, y_train_aug)

model_filename = os.path.join(model_dir, "xgb_pca_aug_nonorm.pickle")
with open(model_filename, "wb") as f:
    pickle.dump(xgb_model_pca, f)

# =========================
# SCENARIO D: WITHOUT PCA (aug)
# =========================
xgb_model_no_pca = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate,
                                 subsample=subsample, colsample_bytree=colsample_bytree,
                                 eval_metric='mlogloss', random_state=42)
xgb_model_no_pca.fit(X_train_aug, y_train_aug)

model_filename = os.path.join(model_dir, "xgb_nopca_aug_nonorm.pickle")
with open(model_filename, "wb") as f:
    pickle.dump(xgb_model_no_pca, f)

# =========================
# SCENARIO E: NO DATA TRANSFORMATION (no aug)
# =========================
xgb_model_no_transform = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate,
                                       subsample=subsample, colsample_bytree=colsample_bytree,
                                       eval_metric='mlogloss', random_state=42)
xgb_model_no_transform.fit(X_train, y_train)

model_filename = os.path.join(model_dir, "xgb_nopca_noaug_nonorm.pickle")
with open(model_filename, "wb") as f:
    pickle.dump(xgb_model_no_transform, f)

# =========================
# SCENARIO F: NORMALIZATION + PCA (no aug)
# =========================
X_train_norm = np.array([x/np.linalg.norm(x) if np.linalg.norm(x) != 0 else x for x in X_train])
X_test_norm = np.array([x/np.linalg.norm(x) if np.linalg.norm(x) != 0 else x for x in X_test])

pca = PCA(n_components=0.99)
X_train_norm_pca = pca.fit_transform(X_train_norm)
X_test_norm_pca = pca.transform(X_test_norm)

pca_filename = os.path.join(model_dir, "pca_noaug_norm.pickle")
with open(pca_filename, "wb") as f:
    pickle.dump(pca, f)

xgb_model_norm_pca = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate,
                                   subsample=subsample, colsample_bytree=colsample_bytree,
                                   eval_metric='mlogloss', random_state=42)
xgb_model_norm_pca.fit(X_train_norm_pca, y_train)

model_filename = os.path.join(model_dir, "xgb_pca_noaug_norm.pickle")
with open(model_filename, "wb") as f:
    pickle.dump(xgb_model_norm_pca, f)

# =========================
# SCENARIO G: NORMALIZATION + AUG + PCA
# =========================
X_train_aug_norm = np.array([x/np.linalg.norm(x) if np.linalg.norm(x) != 0 else x for x in X_train_aug])
X_test_aug_norm = np.array([x/np.linalg.norm(x) if np.linalg.norm(x) != 0 else x for x in X_test_aug])

pca = PCA(n_components=0.99)
X_train_aug_norm_pca = pca.fit_transform(X_train_aug_norm)
X_test_aug_norm_pca = pca.transform(X_test_aug_norm)

pca_filename = os.path.join(model_dir, "pca_aug_norm.pickle")
with open(pca_filename, "wb") as f:
    pickle.dump(pca, f)

xgb_model_norm_aug_pca = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate,
                                       subsample=subsample, colsample_bytree=colsample_bytree,
                                       eval_metric='mlogloss', random_state=42)
xgb_model_norm_aug_pca.fit(X_train_aug_norm_pca, y_train_aug)

model_filename = os.path.join(model_dir, "xgb_pca_aug_norm.pickle")
with open(model_filename, "wb") as f:
    pickle.dump(xgb_model_norm_aug_pca, f)

# =========================
# EVALUATION FUNCTION
# =========================
def evaluate_model(model, X_test, y_test, description):
    predict = model.predict(X_test)

    classes = np.unique(y_test)
    precision_per_class = precision_score(y_test, predict, average=None, labels=classes)
    recall_per_class = recall_score(y_test, predict, average=None, labels=classes)
    test_accuracy_per_class = []
    conf_matrix = confusion_matrix(y_test, predict, labels=classes)

    for i, cls in enumerate(classes):
        acc = conf_matrix[i, i] / conf_matrix[i, :].sum()
        test_accuracy_per_class.append(acc)

    cv_scores = cross_val_score(model, X_test, y_test, cv=5, scoring='accuracy')
    mean_cv_accuracy = np.mean(cv_scores)

    print(f"\n=== {description} ===")
    print(f"Test Accuracy (Overall): {np.mean(predict == y_test):.4f}")
    print(f"Mean CV Accuracy: {mean_cv_accuracy:.4f}")

    print("\nPer-Class Metrics:")
    for i, cls in enumerate(classes):
        print(f"Class {cls}: Precision={precision_per_class[i]:.4f}, Recall={recall_per_class[i]:.4f}, Accuracy={test_accuracy_per_class[i]:.4f}")

# =========================
# EVALUATE ALL MODELS
# =========================
evaluate_model(xgb_pca, X_test_pca, y_test, "Scenario A: PCA NOAUG NONORM")
evaluate_model(xgb_no_pca, X_test, y_test, "Scenario B: NOPCA NOAUG NONORM")
evaluate_model(xgb_model_pca, X_test_aug_pca, y_test_aug, "Scenario C: PCA AUG NONORM")
evaluate_model(xgb_model_no_pca, X_test, y_test, "Scenario D: NOPCA AUG NONORM")
evaluate_model(xgb_model_no_transform, X_test, y_test, "Scenario E: NOPCA NOAUG NONORM")
evaluate_model(xgb_model_norm_pca, X_test_norm_pca, y_test, "Scenario F: PCA NOAUG NORM")
evaluate_model(xgb_model_norm_aug_pca, X_test_aug_norm_pca, y_test_aug, "Scenario G: PCA AUG NORM")



=== Scenario A: WITH PCA (NO AUG) ===
Test Accuracy (Overall): 0.7065
Mean CV Accuracy: 0.5433

Per-Class Metrics:
Class 0: Precision=0.6538, Recall=0.7391, Accuracy=0.7391
Class 1: Precision=0.7500, Recall=0.7826, Accuracy=0.7826
Class 2: Precision=0.7500, Recall=0.3913, Accuracy=0.3913
Class 3: Precision=0.7000, Recall=0.9130, Accuracy=0.9130

=== Scenario B: WITHOUT PCA (NO AUG) ===
Test Accuracy (Overall): 0.8696
Mean CV Accuracy: 0.7468

Per-Class Metrics:
Class 0: Precision=0.8000, Recall=0.6957, Accuracy=0.6957
Class 1: Precision=0.8750, Recall=0.9130, Accuracy=0.9130
Class 2: Precision=0.8696, Recall=0.8696, Accuracy=0.8696
Class 3: Precision=0.9200, Recall=1.0000, Accuracy=1.0000

=== Scenario C: WITH PCA (AUG) ===
Test Accuracy (Overall): 0.6977
Mean CV Accuracy: 0.5928

Per-Class Metrics:
Class 0: Precision=0.6364, Recall=0.9545, Accuracy=0.9545
Class 1: Precision=0.6538, Recall=0.8095, Accuracy=0.8095
Class 2: Precision=0.8125, Recall=0.5909, Accuracy=0.5909
Class 3: Preci

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



=== Scenario D: WITHOUT PCA (AUG) ===
Test Accuracy (Overall): 0.2500
Mean CV Accuracy: 0.7251

Per-Class Metrics:
Class 0: Precision=0.0000, Recall=0.0000, Accuracy=0.0000
Class 1: Precision=0.0000, Recall=0.0000, Accuracy=0.0000
Class 2: Precision=0.0000, Recall=0.0000, Accuracy=0.0000
Class 3: Precision=0.2500, Recall=1.0000, Accuracy=1.0000

=== Scenario E: NO DATA TRANSFORMATION ===
Test Accuracy (Overall): 0.8370
Mean CV Accuracy: 0.7251

Per-Class Metrics:
Class 0: Precision=0.7826, Recall=0.7826, Accuracy=0.7826
Class 1: Precision=0.9048, Recall=0.8261, Accuracy=0.8261
Class 2: Precision=0.7826, Recall=0.7826, Accuracy=0.7826
Class 3: Precision=0.8800, Recall=0.9565, Accuracy=0.9565

=== Scenario F: NORMALIZATION + PCA (NO AUG) ===
Test Accuracy (Overall): 0.7283
Mean CV Accuracy: 0.6836

Per-Class Metrics:
Class 0: Precision=0.6296, Recall=0.7391, Accuracy=0.7391
Class 1: Precision=0.8947, Recall=0.7391, Accuracy=0.7391
Class 2: Precision=0.6250, Recall=0.6522, Accuracy=0.652

HYPERPARAMETER TUNING

In [10]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt

from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import confusion_matrix, precision_score, recall_score
from bayes_opt import BayesianOptimization

# Your custom accuracy function
from classification.utils.utils import accuracy

# --- CONFIG FLAGS ---
NORMALIZATION = True
TRANSFORMATION = True

# --- STEP 1: Load/Select Data ---
if TRANSFORMATION:
    try:
        X = X_basic_aug   # Make sure X_basic_aug is defined in your environment
        y = y_basic_aug   # Make sure y_basic_aug is defined in your environment
    except NameError:
        raise ValueError("X_aug and y_aug must be defined before running this script.")
else:
    try:
        X = X_basic       # Make sure X_basic is defined in your environment
        y = y_basic       # Make sure y_basic is defined in your environment
    except NameError:
        raise ValueError("X and y must be defined before running this script.")

# Optional normalization
if NORMALIZATION:
    X = np.array([
        x / np.linalg.norm(x) if np.linalg.norm(x) != 0 else x
        for x in X
    ])

# --- STEP 2: Define the Objective Function for Bayesian Optimization ---
def xgb_cv(
    n_estimators,
    max_depth,
    learning_rate,
    subsample,
    colsample_bytree
):
    """
    This function trains an XGBClassifier with given hyperparameters
    and returns the mean CV accuracy as the objective to maximize.
    """
    # Convert some parameters to int, as required by XGBoost
    n_estimators = int(n_estimators)
    max_depth = int(max_depth)

    model = XGBClassifier(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate, subsample=subsample, colsample_bytree=colsample_bytree,
        eval_metric='mlogloss',
        # remove use_label_encoder (deprecated)
        random_state=42
    )
    
    # 5-fold cross-validation on the *entire dataset* X, y
    cv_scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
    
    # Return the mean of cross-validation accuracy
    return cv_scores.mean()

# --- STEP 3: Set Up the Bayesian Optimizer ---
# Hyperparameter search space
pbounds = {
    'n_estimators': (50, 400),      # e.g. from 50 to 300
    'max_depth': (2, 15),           # integer between 2 and 12
    'learning_rate': (0.01, 0.3),   # from 0.01 to 0.3
    'subsample': (0.5, 1),        # from 0.5 to 1.0
    'colsample_bytree': (0.5, 1)  # from 0.5 to 1.0
}

optimizer = BayesianOptimization(
    f=xgb_cv,        # The function we want to maximize
    pbounds=pbounds, # The search space
    random_state=42  # Ensures reproducibility
)

# --- STEP 4: Run the Bayesian Optimization Loop ---
# We'll do a few initial random explorations (init_points) 
# and then a certain number of optimization steps (n_iter).
init_points = 3
n_iter = 20

print("Starting Bayesian Optimization...")
best_score_so_far = -1.0
early_stop_threshold = 0.90  # Stop if we exceed 90% cross-val accuracy

optimizer.maximize(init_points=init_points, n_iter=n_iter)

for i, res in enumerate(optimizer.res):
    score = res['target']
    print(f"Iteration {i+1}, CV Accuracy: {score:.4f}, Parameters: {res['params']}")
    
    if score > best_score_so_far:
        best_score_so_far = score
    
    # Early stopping if we found a "good" configuration
    if best_score_so_far > early_stop_threshold:
        print(f"\nEarly stopping: Found cross-validation accuracy above {early_stop_threshold}\n")
        break

# --- STEP 5: Get the Best Found Hyperparameters ---
best_params = optimizer.max['params']
best_n_estimators = int(best_params['n_estimators'])
best_max_depth = int(best_params['max_depth'])
best_learning_rate = best_params['learning_rate']
best_subsample = best_params['subsample']
best_colsample_bytree = best_params['colsample_bytree']

print("\n=== BEST HYPERPARAMETERS FOUND ===")
print(f"n_estimators = {best_n_estimators}")
print(f"max_depth = {best_max_depth}")
print(f"learning_rate = {best_learning_rate:.4f}")
print(f"subsample = {best_subsample:.4f}")
print(f"colsample_bytree = {best_colsample_bytree:.4f}")
print(f"CV Accuracy = {optimizer.max['target']:.4f}")

# --- STEP 6: Train/Validate Model Once More on a Train/Test Split ---
# Final check on a separate holdout set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=999
)

final_model = XGBClassifier(
    n_estimators=best_n_estimators,
    max_depth=best_max_depth,
    learning_rate=best_learning_rate,
    subsample=best_subsample,
    colsample_bytree=best_colsample_bytree,
    eval_metric='mlogloss',
    random_state=999
)

final_model.fit(X_train, y_train)

y_pred = final_model.predict(X_test)
test_acc = accuracy(y_pred, y_test)

print("\n=== FINAL EVALUATION ON HOLDOUT TEST SET ===")
print(f"Test Accuracy: {test_acc:.4f}")


Starting Bayesian Optimization...
|   iter    |  target   | colsam... | learni... | max_depth | n_esti... | subsample |
-------------------------------------------------------------------------------------
| [39m1        [39m | [39m0.6035   [39m | [39m0.6873   [39m | [39m0.2857   [39m | [39m11.52    [39m | [39m259.5    [39m | [39m0.578    [39m |


KeyboardInterrupt: 

MEAN ACCURACY ON 100 ITERATIONS

In [None]:
import os
import numpy as np
import pickle
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix, precision_score, recall_score
from classification.utils.utils import accuracy


NORMALIZATION = True
TRANSFORMATION = True

# Ensure dataset (X_aug, y_aug) exists
if TRANSFORMATION:
    try:
        X = X_basic_aug
        y = y_basic_aug
    except NameError:
        raise ValueError("X_aug and y_aug must be defined before running this script.")
else:
    try:
        X = X_basic
        y = y_basic
    except NameError:
        raise ValueError("X and y must be defined before running this script.")

# Normalize if needed
if NORMALIZATION:
    X = np.array([x / np.linalg.norm(x) if np.linalg.norm(x) != 0 else x for x in X])

# Number of iterations
num_iterations = 3

# Lists to store scores
accuracy_scores = []
cv_accuracy_scores = []

for i in range(num_iterations):
    print(f"\nIteration {i + 1}/{num_iterations}")
    
    # Split the dataset into training and testing subsets
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, stratify=y, random_state=i
    )

    # Train the XGBoost model
    model = XGBClassifier(
        n_estimators=200,
        max_depth=5,
        learning_rate=0.1,
        subsample=0.8,
        colsample_bytree=0.8,
        eval_metric='mlogloss',
        random_state=i
    )
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Compute overall accuracy
    test_accuracy = accuracy(y_pred, y_test)
    accuracy_scores.append(test_accuracy)

    # Perform cross-validation on the training set
    cv_scores = cross_val_score(model, X_train, y_train, cv=10, scoring='accuracy')
    mean_cv_accuracy = np.mean(cv_scores)
    cv_accuracy_scores.append(mean_cv_accuracy)

    print(f"Test Accuracy: {test_accuracy:.4f} | Mean CV Accuracy: {mean_cv_accuracy:.4f}")

# Compute overall statistics
mean_test_accuracy = np.mean(accuracy_scores)
std_test_accuracy = np.std(accuracy_scores)

mean_cv_accuracy = np.mean(cv_accuracy_scores)
std_cv_accuracy = np.std(cv_accuracy_scores)

# Print final results
print("\n=== FINAL RESULTS AFTER 20 ITERATIONS ===")
print(f"Mean Test Accuracy: {mean_test_accuracy:.4f} ± {std_test_accuracy:.4f}")
print(f"Mean Cross-Validation Accuracy: {mean_cv_accuracy:.4f} ± {std_cv_accuracy:.4f}")



Iteration 1/3
Test Accuracy: 0.7674 | Mean CV Accuracy: 0.6834

Iteration 2/3
Test Accuracy: 0.7326 | Mean CV Accuracy: 0.7942

Iteration 3/3
Test Accuracy: 0.6279 | Mean CV Accuracy: 0.7337

=== FINAL RESULTS AFTER 20 ITERATIONS ===
Mean Test Accuracy: 0.7093 ± 0.0593
Mean Cross-Validation Accuracy: 0.7371 ± 0.0453
