In [2]:
import os
import json
import yaml
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# ---------------------------------------------------------------------
# 🧭 Paths
# ---------------------------------------------------------------------
BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Pathogen Mutation Predictor"
PRED_PATH = os.path.join(BASE_DIR, "resistance_prediction.json")
META_PATH = os.path.join(BASE_DIR, "biomind_metadata.yaml")
OUT_DIR = os.path.join(BASE_DIR, "outputs")
os.makedirs(OUT_DIR, exist_ok=True)

# ---------------------------------------------------------------------
# 📥 Load Predictions
# ---------------------------------------------------------------------
print("[INFO] Loading prediction data...")
with open(PRED_PATH, "r") as f:
    pred_data = json.load(f)
pred_df = pd.DataFrame(pred_data)
print(f"[INFO] Loaded predictions: {pred_df.shape}")

# ---------------------------------------------------------------------
# 🧾 Safe YAML Loader
# ---------------------------------------------------------------------
def safe_yaml_load(path):
    try:
        with open(path, "r") as f:
            return yaml.safe_load(f)
    except yaml.constructor.ConstructorError:
        print("[WARN] Non-standard YAML tags found (e.g. !!python/tuple). Cleaning and retrying...")
        with open(path, "r") as f:
            raw = f.read()
        # remove !!python/tuple [ ... ] patterns
        raw = re.sub(r"!!python/tuple", "", raw)
        return yaml.safe_load(raw)
    except Exception as e:
        print(f"[WARN] Could not parse YAML ({e}); using defaults.")
        return {"models": {"RandomForest": 0, "DeepLearning": 0}}

if os.path.exists(META_PATH):
    meta = safe_yaml_load(META_PATH)
else:
    meta = {"models": {"RandomForest": 0, "DeepLearning": 0}}

# ---------------------------------------------------------------------
# 📊 Prepare Data
# ---------------------------------------------------------------------
true = pred_df["TrueLabel"].astype(int)
rf_pred = pred_df["Pred_RF"].astype(int)
dl_pred = pred_df["Pred_DL"].astype(int)

# ---------------------------------------------------------------------
# 1️⃣ Accuracy Graph
# ---------------------------------------------------------------------
acc_rf = meta.get("models", {}).get("RandomForest", np.mean(true == rf_pred))
acc_dl = meta.get("models", {}).get("DeepLearning", np.mean(true == dl_pred))

plt.figure(figsize=(6,4))
plt.bar(["RandomForest","DeepLearning"], [acc_rf, acc_dl], color=["skyblue","orange"])
plt.ylabel("Accuracy")
plt.title("Model Accuracy Comparison")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "accuracy_graph.png"))
plt.close()

# ---------------------------------------------------------------------
# 2️⃣ Confusion Heatmap
# ---------------------------------------------------------------------
cm = confusion_matrix(true, dl_pred)
plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix (DL)")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "confusion_heatmap.png"))
plt.close()

# ---------------------------------------------------------------------
# 3️⃣ Result Distribution
# ---------------------------------------------------------------------
plt.figure(figsize=(6,4))
sns.countplot(x=true, palette="Set2")
plt.title("Resistance Distribution")
plt.xlabel("Label (0=Susceptible, 1=Resistant)")
plt.ylabel("Count")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "result_distribution.png"))
plt.close()

# ---------------------------------------------------------------------
# 4️⃣ Prediction Trend Graph
# ---------------------------------------------------------------------
plt.figure(figsize=(10,4))
plt.plot(range(len(dl_pred)), dl_pred, 'r-', lw=1.5, label="DeepLearning")
plt.plot(range(len(rf_pred)), rf_pred, 'b--', lw=1.5, label="RandomForest")
plt.xlabel("Sample Index")
plt.ylabel("Predicted Label")
plt.title("Predictions Across Samples")
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "prediction_graph.png"))
plt.close()

# ---------------------------------------------------------------------
# 5️⃣ Comparison Graph
# ---------------------------------------------------------------------
plt.figure(figsize=(6,6))
sns.scatterplot(x=rf_pred, y=dl_pred, hue=true, palette="coolwarm", s=60)
plt.title("RF vs DL Prediction Comparison")
plt.xlabel("RandomForest")
plt.ylabel("DeepLearning")
plt.tight_layout()
plt.savefig(os.path.join(OUT_DIR, "comparison_graph.png"))
plt.close()

print("\n✅ All visualization graphs saved to:", OUT_DIR)
for f in ["accuracy_graph.png", "confusion_heatmap.png", "result_distribution.png", "prediction_graph.png", "comparison_graph.png"]:
    print(" ├──", f)


[INFO] Loading prediction data...
[INFO] Loaded predictions: (5, 3)
[WARN] Non-standard YAML tags found (e.g. !!python/tuple). Cleaning and retrying...



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x=true, palette="Set2")



✅ All visualization graphs saved to: C:\Users\NXTWAVE\Downloads\Pathogen Mutation Predictor\outputs
 ├── accuracy_graph.png
 ├── confusion_heatmap.png
 ├── result_distribution.png
 ├── prediction_graph.png
 ├── comparison_graph.png
