# Change Point Detection

This notebook demonstrates the `ChangePointDetector`, which detects anomalies by comparing local statistics to global statistics using rolling window statistics.

Change point detection is particularly useful for:
- Detecting level shifts
- Identifying regime changes
- Finding structural breaks in time series


In [None]:
import numpy as np
import pandas as pd
from plotsmith import plot_timeseries
import matplotlib.pyplot as plt

from anomsmith import detect_anomalies, ThresholdRule
from anomsmith.primitives.detectors.change_point import ChangePointDetector

np.random.seed(42)


## Creating Data with Change Points

We'll create data with level shifts and regime changes.


In [None]:
def create_change_point_data(n: int = 200, seed: int = 42):
    """Create data with change points."""
    np.random.seed(seed)
    
    y = np.zeros(n)
    
    # Segment 1: Normal operation
    y[0:50] = np.random.randn(50) * 0.5 + 10
    
    # Change point 1: Level shift up
    y[50:100] = np.random.randn(50) * 0.5 + 13  # Shift up by 3
    
    # Change point 2: Level shift down
    y[100:150] = np.random.randn(50) * 0.5 + 8  # Shift down by 5
    
    # Segment 4: Return to normal with spike
    y[150:180] = np.random.randn(30) * 0.5 + 10
    y[180] += 5  # Spike anomaly
    y[181:] = np.random.randn(19) * 0.5 + 10
    
    index = pd.date_range("2020-01-01", periods=n, freq="D")
    return pd.Series(y, index=index)

y = create_change_point_data(n=200)
print(f"Created time series with {len(y)} points")
print(f"\nData statistics:")
print(y.describe())


In [None]:
# Visualize the data
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(y.index, y.values, 'b-', linewidth=1.5, alpha=0.7, label='Time Series')
ax.axvline(y.index[50], color='r', linestyle='--', alpha=0.5, label='Change Point 1')
ax.axvline(y.index[100], color='orange', linestyle='--', alpha=0.5, label='Change Point 2')
ax.axvline(y.index[150], color='green', linestyle='--', alpha=0.5, label='Change Point 3')
ax.scatter(y.index[180], y.values[180], color='red', s=200, marker='x', 
          linewidths=3, label='Spike Anomaly', zorder=5)
ax.set_xlabel('Date', fontsize=12)
ax.set_ylabel('Value', fontsize=12)
ax.set_title('Time Series with Change Points', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


## Change Point Detection with Different Parameters

Let's test different window sizes and threshold multipliers.


In [None]:
# Create detectors with different parameters
cp_small_window = ChangePointDetector(window_size=5, threshold_multiplier=2.0)
cp_medium_window = ChangePointDetector(window_size=10, threshold_multiplier=3.0)
cp_large_window = ChangePointDetector(window_size=20, threshold_multiplier=3.0)

# Fit all detectors
cp_small_window.fit(y.values)
cp_medium_window.fit(y.values)
cp_large_window.fit(y.values)

# Note: ChangePointDetector is a detector, so it has its own threshold
# But we can also use ThresholdRule for additional control
threshold_rule = ThresholdRule(method="quantile", value=0.9, quantile=0.9)

# Detect anomalies
result_small = detect_anomalies(y, cp_small_window, threshold_rule)
result_medium = detect_anomalies(y, cp_medium_window, threshold_rule)
result_large = detect_anomalies(y, cp_large_window, threshold_rule)

# Compare results
comparison = pd.DataFrame({
    'Small Window (5)': [
        result_small['flag'].sum(),
        result_small['flag'].mean(),
        result_small['score'].mean(),
        result_small['score'].std()
    ],
    'Medium Window (10)': [
        result_medium['flag'].sum(),
        result_medium['flag'].mean(),
        result_medium['score'].mean(),
        result_medium['score'].std()
    ],
    'Large Window (20)': [
        result_large['flag'].sum(),
        result_large['flag'].mean(),
        result_large['score'].mean(),
        result_large['score'].std()
    ]
}, index=['Anomalies Detected', 'Anomaly Rate', 'Mean Score', 'Std Score'])

print("Change Point Detector Comparison:")
print(comparison.round(4))


In [None]:
# Visualize detection results
fig, axes = plt.subplots(3, 1, figsize=(14, 12), sharex=True)

detectors = [
    ('Small Window (5)', result_small, 'blue'),
    ('Medium Window (10)', result_medium, 'green'),
    ('Large Window (20)', result_large, 'orange')
]

for idx, (name, result, color) in enumerate(detectors):
    ax = axes[idx]
    ax.plot(y.index, y.values, 'b-', linewidth=1.5, alpha=0.7, label='Time Series')
    
    # Mark change points
    ax.axvline(y.index[50], color='gray', linestyle='--', alpha=0.3)
    ax.axvline(y.index[100], color='gray', linestyle='--', alpha=0.3)
    ax.axvline(y.index[150], color='gray', linestyle='--', alpha=0.3)
    
    # Detected anomalies
    anomaly_mask = result['flag'] == 1
    ax.scatter(y.index[anomaly_mask], y.values[anomaly_mask], 
              color='red', s=100, marker='x', linewidths=2, 
              label=f'Detected ({anomaly_mask.sum()})', zorder=5)
    
    ax.set_ylabel('Value', fontsize=12)
    ax.set_title(f'Change Point Detection: {name}', fontsize=12, fontweight='bold')
    ax.legend()
    ax.grid(True, alpha=0.3)

axes[-1].set_xlabel('Date', fontsize=12)
plt.tight_layout()
plt.show()


In [None]:
# Visualize scores
fig, axes = plt.subplots(3, 1, figsize=(14, 12), sharex=True)

for idx, (name, result, color) in enumerate(detectors):
    ax = axes[idx]
    ax.plot(y.index, result['score'], color=color, linewidth=1.5, alpha=0.7, label=f'{name} Score')
    threshold_value = np.quantile(result['score'], 0.9)
    ax.axhline(threshold_value, color='r', linestyle='--', linewidth=2, 
              label=f'Threshold ({threshold_value:.2f})')
    
    anomaly_mask = result['flag'] == 1
    ax.scatter(y.index[anomaly_mask], result['score'][anomaly_mask], 
              color='red', s=50, marker='x', linewidths=1.5, zorder=5)
    
    ax.set_ylabel('Score', fontsize=12)
    ax.set_title(f'{name} Anomaly Scores', fontsize=12, fontweight='bold')
    ax.legend()
    ax.grid(True, alpha=0.3)

axes[-1].set_xlabel('Date', fontsize=12)
plt.tight_layout()
plt.show()


## Summary

In this notebook, we've explored:
1. **ChangePointDetector** with different window sizes
2. How window size affects detection sensitivity
3. Detection of level shifts and regime changes

Key takeaways:
- Smaller windows are more sensitive to local changes
- Larger windows provide smoother detection but may miss quick changes
- Change point detection is excellent for identifying structural breaks and regime changes
