# PRUEBAS CON NAIVE BAYES DISTRIBUIDO

In [10]:
import pandas as pd
from collections import Counter
from river.naive_bayes import GaussianNB
from utils import read_dataset, evaluate_model_online_learning, calculate_metrics
from collections import Counter
from sklearn.metrics import precision_recall_fscore_support
from river.naive_bayes import GaussianNB



In [14]:
# Dictionary of dataset names and file paths
data_name = {
    "elec": "electricity.csv",
}

# Load dataset
name = "elec"
dataset = read_dataset(name, data_name)
dataset = dataset.iloc[:5000]  # Use a subset for faster testing

# Separate features and labels
feature_columns = dataset.columns[:-1]  # All except the last column
label_column = dataset.columns[-1]      # The last column is the label


# Split dataset into training and testing
train_data = dataset.iloc[:3000]  # First 3000 samples for training
test_data = dataset.iloc[3000:4000]  # Next 1000 samples for testing

# Initialize models for each node
node_models = {
    "node_1": GaussianNB(),
    "node_2": GaussianNB(),
    "node_3": GaussianNB(),
}

# Train and evaluate each model using interleaved samples
print("Training and evaluating models with interleaved samples...")
for node_idx, (node_name, model) in enumerate(node_models.items()):
    interleaved_data = train_data.iloc[node_idx::3]  # Interleaved samples
    conf_matrix, elapsed_time = evaluate_model_online_learning(model, interleaved_data)
    metrics = calculate_metrics(conf_matrix)
    print(f"\nMetrics for {node_name}:")
    print(f"  Accuracy: {conf_matrix['TP'] / (conf_matrix['TP'] + conf_matrix['FN']):.3f}")
    print(f"  Precision: {metrics['precision']:.3f}")
    print(f"  Recall: {metrics['recall']:.3f}")
    print(f"  F1-score: {metrics['f1']:.3f}")
    print(f"  Execution time: {elapsed_time:.2f} seconds")

# Predict with the aggregated model using probabilities
def predict_with_proba_aggregation(models, X):
    """
    Predicts classes using aggregated probabilities across all models.

    Args:
        models (list): List of trained GaussianNB models.
        X (list of dict): Test dataset (list of feature dictionaries).

    Returns:
        list: Predicted classes for each sample based on aggregated probabilities.
    """
    predictions = []
    for xi in X:
        # Aggregate probabilities
        avg_proba = Counter()
        for model in models:
            proba = model.predict_proba_one(xi)
            for class_label, p in proba.items():
                avg_proba[class_label] += p
        # Normalize and select the class with the highest probability
        total_models = len(models)
        aggregated_proba = {k: v / total_models for k, v in avg_proba.items()}
        best_class = max(aggregated_proba, key=aggregated_proba.get)
        predictions.append(best_class)
    return predictions

# Evaluate the aggregated model on the test dataset
print("\nEvaluating Aggregated Model...")
conf_matrix_agg = {"TP": 0, "FP": 0, "FN": 0, "TN": 0}

for _, row in test_data.iterrows():
    x = {i: row[col] for i, col in enumerate(feature_columns)}
    y = row[label_column]

    # Predict using aggregated probabilities
    y_pred = predict_with_proba_aggregation(list(node_models.values()), [x])[0]

    # Update confusion matrix
    if y == y_pred:
        if y == 1:
            conf_matrix_agg["TP"] += 1
        else:
            conf_matrix_agg["TN"] += 1
    else:
        if y == 1:
            conf_matrix_agg["FN"] += 1
        else:
            conf_matrix_agg["FP"] += 1

# Calculate metrics for the aggregated model
metrics_agg = calculate_metrics(conf_matrix_agg)
accuracy_agg = conf_matrix_agg["TP"] / (conf_matrix_agg["TP"] + conf_matrix_agg["FN"])
print("\nMetrics for Aggregated Model:")
print(f"  Accuracy: {accuracy_agg:.3f}")
print(f"  Precision: {metrics_agg['precision']:.3f}")
print(f"  Recall: {metrics_agg['recall']:.3f}")
print(f"  F1-score: {metrics_agg['f1']:.3f}")


  dataset.replace({'UP': 1, 'DOWN': 0, 'True': 1, 'False': 0}, inplace=True)


Training and evaluating models with interleaved samples...

Metrics for node_1:
  Accuracy: 0.626
  Precision: 0.784
  Recall: 0.626
  F1-score: 0.696
  Execution time: 0.09 seconds

Metrics for node_2:
  Accuracy: 0.663
  Precision: 0.793
  Recall: 0.663
  F1-score: 0.722
  Execution time: 0.09 seconds

Metrics for node_3:
  Accuracy: 0.611
  Precision: 0.769
  Recall: 0.611
  F1-score: 0.681
  Execution time: 0.10 seconds

Evaluating Aggregated Model...

Metrics for Aggregated Model:
  Accuracy: 0.629
  Precision: 0.888
  Recall: 0.629
  F1-score: 0.736
