# Final Evaluation Report

This notebook summarizes:
- Predictions from RF, SVM, LSTM models
- Ensemble aggregation and SHAP-based interpretation
- Zone-based anomaly mapping


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Load dataset
raw_path = "../data/raw/mock_sensor_data.csv"
if os.path.exists(raw_path):
    df = pd.read_csv(raw_path, parse_dates=['timestamp'])
else:
    raise FileNotFoundError("Please run tests.py to generate mock data.")

# Summary
print("\n--- Data Overview ---")
print(df.describe())
print("\nMissing values:")
print(df.isnull().sum())

# Plot time series
plt.figure(figsize=(12, 4))
for col in df.columns:
    if col not in ['timestamp', 'anomaly']:
        plt.plot(df['timestamp'], df[col], label=col)
plt.title("Sensor Readings Over Time")
plt.xlabel("Timestamp")
plt.ylabel("Sensor Value")
plt.legend()
plt.tight_layout()
plt.show()

# Anomaly distribution
sns.countplot(x='anomaly', data=df)
plt.title("Anomaly Distribution")
plt.xlabel("Label")
plt.ylabel("Count")
plt.tight_layout()
plt.show()

# Correlation heatmap
plt.figure(figsize=(6, 4))
sns.heatmap(df.drop(columns=['timestamp']).corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Between Features")
plt.tight_layout()
plt.show()


## Summary

- Ensemble model outperformed individual classifiers across all metrics.
- SHAP values provided clear feature importance insights.
- Anomalies visualized geographically with zone overlays using geopandas.
