In [5]:
from river import drift, metrics, tree, ensemble
from river.evaluate import progressive_val_score
import numpy as np

In [6]:


# Generate synthetic dataset
def generate_data(size=40000, drift_points=(10000, 20000, 30000), width=1000):
    data = []
    for i in range(size):
        # Simulate abrupt drift
        if i in range(drift_points[0], drift_points[0] + width):
            concept = 1
        else:
            concept = 0

        # Generate random data based on the current concept
        X1, X2 = np.random.random(), np.random.random()
        y = 1 if (X1 + X2 > 1) if concept == 0 else (X1 + X2 < 1) else 0
        data.append(({'X1': X1, 'X2': X2}, y))
    return data

SyntaxError: expected 'else' after 'if' expression (718510223.py, line 13)

In [None]:
# Constants for the synthetic dataset
total_instances = 40000
drift_positions = [10000, 20000, 30000]
drift_width = 1000

# Generating synthetic data for X1 and X2
np.random.seed(0)  # For reproducibility
X1 = np.random.rand(total_instances)
X2 = np.random.rand(total_instances)

# Function to apply drift to a variable
def apply_drift(var, drift_pos, width):
    start = max(0, drift_pos - width // 2)
    end = min(total_instances, drift_pos + width // 2)
    var[start:end] = 1 - var[start:end]

# Applying drifts to X1 and X2
for drift_pos in drift_positions:
    apply_drift(X1, drift_pos, drift_width)
    apply_drift(X2, drift_pos, drift_width)

# Generating class labels (0 or 1) with drifts
class_labels = np.zeros(total_instances, dtype=int)
for i, drift_pos in enumerate(drift_positions + [total_instances]):
    start = drift_positions[i - 1] if i > 0 else 0
    end = drift_pos
    class_labels[start:end] = i % 2

In [None]:


data_stream = generate_data()

# Define drift detectors
detectors = {
    'ADWIN': drift.ADWIN(),
    'DDM': drift.DDM(),
    'EDDM': drift.EDDM(),
    'Page-Hinkley': drift.PageHinkley()
}

# Define base learners
learners = {
    'Decision Tree': tree.HoeffdingTreeClassifier(),
    'Random Forest': ensemble.AdaptiveRandomForestClassifier()
}

# Evaluation
for learner_name, learner in learners.items():
    for detector_name, detector in detectors.items():
        metric = metrics.ROCAUC()
        for X, y in data_stream:
            # Predict
            y_pred = learner.predict_one(X)
            metric = metric.update(y, y_pred)
            
            # Train
            learner = learner.learn_one(X, y)
            
            # Check for drift
            in_drift, in_warning = detector.update(y_pred == y)
            if in_drift:
                learner.reset()  # Reset the model in case of drift

        print(f"{learner_name} with {detector_name}: AUC: {metric.get()}")
