# 🛠️ Anomaly Detection Visualization

This notebook loads anomaly detection results and visualizes anomalies detected by:
- Linear Regression (LR)
- GPT-based Isolation Forest

It highlights the **training and test periods**, and marks the **failure alarm time**.



In [1]:
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import os


In [2]:
# --- CONFIGURATION ---

SIMULATION_ID = 3
DATASET_NAME = 'DS10'
RESULTS_FILE = f'./results/results_{DATASET_NAME.lower()}.csv'
DATA_FILE = f'./datasets/sim{SIMULATION_ID}_test_cases.pkl'


In [3]:
# --- LOAD DATA ---

# Ensure files exist
if not os.path.exists(DATA_FILE):
    raise FileNotFoundError(f"Dataset file not found: {DATA_FILE}")
if not os.path.exists(RESULTS_FILE):
    raise FileNotFoundError(f"Results file not found: {RESULTS_FILE}")

# Load anomaly detection results
results_df = pd.read_csv(RESULTS_FILE, index_col=0, parse_dates=True)

# Load failure metadata
with open(DATA_FILE, 'rb') as f:
    faulty_datasets = pickle.load(f)


In [4]:
# Print dataset info

train_df = faulty_datasets[DATASET_NAME]['scada']['train']
print("\n--- Train Set Overview ---")
print(f"Shape: {train_df.shape[0]} rows × {train_df.shape[1]} columns")
print("Columns preview:", list(train_df.columns[:5]), "...\n")
display(train_df.iloc[:, :5].head())



--- Train Set Overview ---
Shape: 10598 rows × 302 columns
Columns preview: ['Wind speed (m/s)', 'Wind speed, Standard deviation (m/s)', 'Wind speed, Minimum (m/s)', 'Wind speed, Maximum (m/s)', 'Long Term Wind (m/s)'] ...



Unnamed: 0,Wind speed (m/s),"Wind speed, Standard deviation (m/s)","Wind speed, Minimum (m/s)","Wind speed, Maximum (m/s)",Long Term Wind (m/s)
2021-03-29 13:50:00,10.315764,1.405932,7.259485,12.929957,6.43
2021-03-29 14:00:00,9.794213,1.510039,7.302179,12.554624,6.43
2021-03-29 14:10:00,9.867609,1.573549,6.818069,11.95654,6.43
2021-03-29 14:20:00,9.882552,1.158182,7.220875,11.653229,6.43
2021-03-29 14:30:00,9.716937,1.397137,7.647812,12.240546,6.43


In [None]:
# Extract test/train indexes
train_start = faulty_datasets[DATASET_NAME]['scada']['train'].index[0]
train_end = faulty_datasets[DATASET_NAME]['scada']['valid'].index[-1]
test_start = faulty_datasets[DATASET_NAME]['scada']['test'].index[0]
test_end = faulty_datasets[DATASET_NAME]['scada']['test'].index[-1]

failure_time = faulty_datasets[DATASET_NAME]['failure_alarm'].name


In [None]:
# --- EXTRACT SERIES USING TEST INDEX ---
LR_residual_series = results_df['LR_residuals']
gpt_score_series = results_df['gpt_scores']

# Identify anomaly points
anomaly_points_LR = results_df[results_df['LR'] == 1]
anomaly_points_GPT = results_df[results_df['CHATGPT'] == 1]


In [None]:
# --- PLOT ANOMALIES ---

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8), sharex=True, gridspec_kw={'height_ratios': [1, 1]})

# --- Top Plot: LR Residuals ---
ax1.plot(LR_residual_series, label='LR Residual Series', color='purple', alpha=0.6)
ax1.scatter(anomaly_points_LR.index,
            LR_residual_series.loc[anomaly_points_LR.index],
            marker='x', color='blue', label='LR Anomaly', s=10)
ax1.axvline(x=failure_time, color='red', linestyle='--', label='Failure Alarm')
ax1.axvspan(train_start, train_end, color='gray', alpha=0.2, label='Train Set')
ax1.axvspan(test_start, test_end, color='orange', alpha=0.2, label='Test Set')
ax1.set_ylabel('LR Residuals')
ax1.set_title('Anomaly Detection - LR Residuals')
ax1.legend(loc='upper left')

# --- Bottom Plot: GPT Isolation Forest ---
ax2.plot(gpt_score_series, label='GPT Isolation Forest Score', color='green', alpha=0.6)
ax2.scatter(anomaly_points_GPT.index,
            gpt_score_series.loc[anomaly_points_GPT.index],
            marker='x', color='red', label='GPT Anomaly', s=10)
ax2.axvline(x=failure_time, color='red', linestyle='--', label='Failure Alarm')
ax2.axvspan(train_start, train_end, color='gray', alpha=0.2, label='Train Set')
ax2.axvspan(test_start, test_end, color='orange', alpha=0.2, label='Test Set')
ax2.set_ylabel('GPT Score')
ax2.set_xlabel('Time')
ax2.set_title('Anomaly Detection - GPT Isolation Forest')
ax2.legend(loc='upper left')

plt.tight_layout()
plt.grid(True)
plt.show()
