In [7]:
import pandas as pd
from river import naive_bayes, tree, metrics, drift
from sklearn.model_selection import train_test_split

In [8]:
def get_data():
    df = pd.read_csv("dataverse/rt_8873985678962563_abrupto.csv")
    return df[['X1', 'X2']], df['class']

In [9]:
# Load the dataset
X, y = get_data()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [10]:
# Initialize drift detectors
drift_detectors = {
    'ADWIN': drift.ADWIN(),
    'KSWIN': drift.KSWIN(),
    'DDM': drift.binary.DDM(),
    'EDDM': drift.binary.EDDM(),
    'PageHinkley': drift.PageHinkley()
}

# Initialize base learners
base_learners = {
    'Naive Bayes': naive_bayes.GaussianNB(),
    'Hoeffding Tree': tree.HoeffdingTreeClassifier()
}

# Initialize metrics
metrics_dict = {name: {'accuracy': metrics.Accuracy(), 'auc': metrics.ROCAUC()} for name in base_learners}

In [11]:

# Function to train and test models with different drift detectors
def train_and_evaluate(base_learner_name, base_learner, drift_detector):
    # Reset base learner and metrics
    model = base_learner.clone()
    model_metrics = {metric_name: metric.clone() for metric_name, metric in metrics_dict[base_learner_name].items()}

    # Training
    for x, y_true in zip(X_train.to_dict(orient='records'), y_train):
        # Update drift detector and check for drift
        drift_response = drift_detector.update(y_true)

        # Check for drift based on detector type
        in_drift = drift_response[0] if drift_response is not None else False

        if in_drift:
            # Reset model on drift detection
            model = base_learner.clone()

        # Train model
        model.learn_one(x, y_true)

        # Update metrics
        for metric in model_metrics.values():
            metric.update(y_true, model.predict_one(x))

    # Testing
    for x, y_true in zip(X_test.to_dict(orient='records'), y_test):
        for metric in model_metrics.values():
            metric.update(y_true, model.predict_one(x))

    return model_metrics



In [12]:

# Evaluate each combination of base learner and drift detector
for drift_name, drift_detector in drift_detectors.items():
    for learner_name, learner in base_learners.items():
        print(f"Evaluating {learner_name} with {drift_name}:")
        evaluated_metrics = train_and_evaluate(learner_name, learner, drift_detector.clone())
        for metric_name, metric in evaluated_metrics.items():
            print(f"{metric_name}: {metric.get()}")


Evaluating Naive Bayes with ADWIN:
accuracy: 0.656725
auc: 0.654661007428267
Evaluating Hoeffding Tree with ADWIN:
accuracy: 0.67975
auc: 0.6786962489427549
Evaluating Naive Bayes with KSWIN:
accuracy: 0.656725
auc: 0.654661007428267
Evaluating Hoeffding Tree with KSWIN:
accuracy: 0.67975
auc: 0.6786962489427549
Evaluating Naive Bayes with DDM:
accuracy: 0.656725
auc: 0.654661007428267
Evaluating Hoeffding Tree with DDM:
accuracy: 0.67975
auc: 0.6786962489427549
Evaluating Naive Bayes with EDDM:
accuracy: 0.656725
auc: 0.654661007428267
Evaluating Hoeffding Tree with EDDM:
accuracy: 0.67975
auc: 0.6786962489427549
Evaluating Naive Bayes with PageHinkley:
accuracy: 0.656725
auc: 0.654661007428267
Evaluating Hoeffding Tree with PageHinkley:
accuracy: 0.67975
auc: 0.6786962489427549
