In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import classification_report, accuracy_score
import pickle

In [None]:
# Load preprocessed data
with open(
    "C:/Users/syafi/Desktop/TM/Heart_Rate_Classification/new classification task/saved_data/preprocessed_data.pkl",
    "rb",
) as f:
    X_train, X_test, y_train, y_test = pickle.load(f)

# Display train and test sizes
print(f"Train size: {len(X_train)}")
print(f"Test size: {len(X_test)}")

In [None]:
# Class labels for reporting
class_labels = [
    "Atherosclerosis",
    "Hypertension",
    "Cardiovascular Disease (CVD)",
    "Chronic Fatigue Syndrome (CFS)",
    "Respiratory Disease (COPD or Asthma)",
    "Stress-related Disorders",
    "Arrhythmias",
    "Healthy",
    "Autonomic Dysfunction",
    "Diabetes",
    "Anaemia",
]

In [None]:
# Define desired support for each class in the test set
desired_support = {
    0: 9090,
    1: 9702,
    2: 3420,
    3: 9117,
    4: 5166,
    5: 8766,
    6: 9630,
    7: 9234,
    8: 7254,
    9: 9171,
    10: 9450,
}

In [None]:
# Container for results
adaboost_results = []
best_accuracy = 0
best_model = None
best_sample_size = 0

# Iterate through sample, training, and testing sizes
for sample_size, train_size, test_size in zip(
    sample_sizes, training_sizes, testing_sizes
):
    # Select subset of training and testing data
    X_train_subset, y_train_subset = X_train[:train_size], y_train[:train_size]
    X_test_subset, y_test_subset = X_test[:test_size], y_test[:test_size]

    # Initialize the AdaBoost model
    adaboost_model = AdaBoostClassifier(n_estimators=50, random_state=42)

    # Train the AdaBoost model
    adaboost_model.fit(X_train_subset, y_train_subset)

    # Evaluate the AdaBoost model
    accuracy, report, predictions, dynamic_labels = evaluate_adaboost_model(
        adaboost_model, X_test_subset, y_test_subset, class_labels
    )

    # Print results for the current iteration
    print(f"\nAdaBoost Sample size {sample_size} - Accuracy: {accuracy:.4f}")
    print(
        classification_report(
            y_test_subset,
            predictions,
            labels=np.unique(y_test_subset),
            target_names=dynamic_labels,
            zero_division=0,
        )
    )

    # Store results
    report_flattened = {
        **{
            f"{label}_{metric}": value
            for label, metrics in report.items()
            if isinstance(metrics, dict)
            for metric, value in metrics.items()
        },
        "sample_size": sample_size,
        "train_size": train_size,
        "test_size": test_size,
        "accuracy": accuracy,
    }
    adaboost_results.append(report_flattened)

    # Update the best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = adaboost_model
        best_sample_size = sample_size
        print(
            f"New best model found for sample size {sample_size} with accuracy {accuracy:.4f}"
        )

In [None]:
# Save the best model
if best_model is not None:
    best_model_file = f"best_adaboost_model_sample_size_{best_sample_size}.pkl"
    with open(best_model_file, "wb") as model_file:
        pickle.dump(best_model, model_file)
    print(
        f"\nBest AdaBoost model saved as {best_model_file} with accuracy {best_accuracy:.4f}"
    )

In [None]:
# Save results to a CSV file for further analysis
adaboost_results_df = pd.DataFrame(adaboost_results)
adaboost_results_df.to_csv("adaboost_results.csv", index=False)
print("\nResults saved to 'adaboost_results.csv'")