In [None]:
# ======================
# Isolation Forest
# ======================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.metrics import (classification_report, confusion_matrix,
                             accuracy_score, precision_score,
                             recall_score, f1_score,
                             roc_curve, auc)

# ----------------------
# Load Dataset
# ----------------------
data = pd.read_csv('/content/14bus_fdia_combined_dataset_overall.csv')

# Feature selection
X = data.drop(['attack', 'bus', 'load_percentage',
               'varied_load', 'voltage_increase',
               'angle_increase'], axis=1)
y = data['attack']

# ----------------------
# Train-Test Split
# ----------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.3,
    random_state=42,
    stratify=y
)

# ----------------------
# Standardization
# ----------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# ----------------------
# Train Isolation Forest
# ----------------------
print("\nTraining Isolation Forest...")

iso_forest = IsolationForest(
    n_estimators=200,
    contamination='auto',
    random_state=42
)

iso_forest.fit(X_train_scaled)

# ----------------------
# Predictions
# ----------------------
# 1 = normal, -1 = anomaly
iso_pred = iso_forest.predict(X_test_scaled)

# Convert to 0 (normal) and 1 (attack)
iso_pred = np.where(iso_pred == 1, 0, 1)

# ----------------------
# Classification Metrics
# ----------------------
print("\nIsolation Forest Results:")
print(classification_report(y_test, iso_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, iso_pred))

accuracy = accuracy_score(y_test, iso_pred)
precision = precision_score(y_test, iso_pred)
recall = recall_score(y_test, iso_pred)
f1 = f1_score(y_test, iso_pred)

print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-Score:  {f1:.4f}")

# ----------------------
# ROC-AUC Calculation
# ----------------------
# Higher score = more anomalous
iso_scores = -iso_forest.decision_function(X_test_scaled)

fpr, tpr, thresholds = roc_curve(y_test, iso_scores)
roc_auc = auc(fpr, tpr)

print(f"ROC-AUC Score: {roc_auc:.4f}")

# ----------------------
# Plot ROC Curve
# ----------------------
plt.figure()
plt.plot(fpr, tpr, label=f'Isolation Forest (AUC = {roc_auc:.4f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve - Isolation Forest")
plt.legend(loc="lower right")
plt.show()

# ----------------------
# Store Results in DataFrame
# ----------------------
results = pd.DataFrame([{
    'Model': 'Isolation Forest',
    'Accuracy': accuracy,
    'Precision': precision,
    'Recall': recall,
    'F1-Score': f1,
    'ROC AUC': roc_auc
}])

print("\nFinal Results Table:")
print(results)
