# Anomaly Detection Example

This notebook demonstrates how to use Time Series RAG for anomaly detection in sensor data.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from timeseries_rag.models import TimeSeriesEmbedder
from timeseries_rag.rag import TimeSeriesRAG, TimeSeriesDocument
from timeseries_rag.analytics import TimeSeriesAnalytics

## Load Sensor Data

In [None]:
# Load sensor data
temperature_df = pd.read_csv('../../../data/sensor/temperature.csv')
vibration_df = pd.read_csv('../../../data/sensor/vibration.csv')
pressure_df = pd.read_csv('../../../data/sensor/pressure.csv')

# Convert timestamps
temperature_df['timestamp'] = pd.to_datetime(temperature_df['timestamp'])
vibration_df['timestamp'] = pd.to_datetime(vibration_df['timestamp'])
pressure_df['timestamp'] = pd.to_datetime(pressure_df['timestamp'])

## Detect Anomalies

In [None]:
# Initialize analytics for each sensor
temp_analytics = TimeSeriesAnalytics(temperature_df['value'].values)
vib_analytics = TimeSeriesAnalytics(vibration_df['value'].values)
press_analytics = TimeSeriesAnalytics(pressure_df['value'].values)

# Detect anomalies
temp_anomalies = temp_analytics.detect_anomalies(window_size=24)
vib_anomalies = vib_analytics.detect_anomalies(window_size=24)
press_anomalies = press_analytics.detect_anomalies(window_size=24)

# Plot results
plt.figure(figsize=(15, 10))

# Temperature
plt.subplot(3, 1, 1)
plt.plot(temperature_df['timestamp'], temperature_df['value'], label='Temperature')
if temp_anomalies:
    anomaly_idx = [x[0] for x in temp_anomalies]
    plt.scatter(temperature_df['timestamp'].iloc[anomaly_idx],
                temperature_df['value'].iloc[anomaly_idx],
                color='red', label='Anomalies')
plt.title('Temperature Sensor')
plt.legend()

# Vibration
plt.subplot(3, 1, 2)
plt.plot(vibration_df['timestamp'], vibration_df['value'], label='Vibration')
if vib_anomalies:
    anomaly_idx = [x[0] for x in vib_anomalies]
    plt.scatter(vibration_df['timestamp'].iloc[anomaly_idx],
                vibration_df['value'].iloc[anomaly_idx],
                color='red', label='Anomalies')
plt.title('Vibration Sensor')
plt.legend()

# Pressure
plt.subplot(3, 1, 3)
plt.plot(pressure_df['timestamp'], pressure_df['value'], label='Pressure')
if press_anomalies:
    anomaly_idx = [x[0] for x in press_anomalies]
    plt.scatter(pressure_df['timestamp'].iloc[anomaly_idx],
                pressure_df['value'].iloc[anomaly_idx],
                color='red', label='Anomalies')
plt.title('Pressure Sensor')
plt.legend()

plt.tight_layout()
plt.show()

## Analyze Anomaly Patterns

In [None]:
# Initialize RAG system
embedder = TimeSeriesEmbedder()
rag = TimeSeriesRAG()

# Function to extract windows around anomalies
def extract_anomaly_windows(data, anomalies, window_size=12):
    windows = []
    for idx, _ in anomalies:
        start = max(0, idx - window_size//2)
        end = min(len(data), idx + window_size//2)
        windows.append(data[start:end])
    return windows

# Extract and store anomaly patterns
for sensor, anomalies, data in [
    ('temperature', temp_anomalies, temperature_df['value'].values),
    ('vibration', vib_anomalies, vibration_df['value'].values),
    ('pressure', press_anomalies, pressure_df['value'].values)
]:
    windows = extract_anomaly_windows(data, anomalies)
    for i, window in enumerate(windows):
        embedding = embedder.embed(window)
        doc = TimeSeriesDocument(
            id=f'{sensor}_anomaly_{i}',
            data=window,
            metadata={'sensor': sensor, 'type': 'anomaly'},
            embedding=embedding
        )
        rag.add_document(doc)

# Find similar anomaly patterns
if temp_anomalies:  # Use first temperature anomaly as query
    query_window = extract_anomaly_windows(
        temperature_df['value'].values,
        [temp_anomalies[0]]
    )[0]
    query_embedding = embedder.embed(query_window)
    results = rag.search(query_embedding, k=3)
    
    # Plot similar anomalies
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(query_window)
    plt.title('Query Anomaly Pattern')
    
    plt.subplot(1, 2, 2)
    for result in results:
        plt.plot(result['data'],
                 label=f'{result["metadata"]["sensor"]} '
                       f'(dist: {result["distance"]:.2f})')
    plt.title('Similar Anomaly Patterns')
    plt.legend()
    plt.tight_layout()
    plt.show()