# 🧬 Evaluate DNA Classifier & Export Metrics

### 📊 Evaluation of trained DNA classifier

### 📦 Install & Import 🔧

In [None]:
!pip install scikit-learn matplotlib seaborn

import pandas as pd
import numpy as np
import joblib
import os
import json
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix

### 📥 Load Model & Data

In [None]:
# Load model & label encoder
model = joblib.load("src/ml/xgboost_dna_classifier.pkl")
le = joblib.load("src/ml/label_encoder.pkl")

# Load data
df = pd.read_csv("data/processed/fasta_kmer_6mer.csv")
# 🔁 Recreate fake 3-class labels for testing (same logic from training)
df["label"] = [i % 3 for i in range(len(df))]

# Split
X = df.drop(columns=["label"])
y_true = le.transform(df["label"])  # label-encoded targets

### 🎯 Predict & Report

In [None]:
y_pred = model.predict(X)

# Full report
report = classification_report(y_true, y_pred, target_names=le.classes_, output_dict=True)
df_report = pd.DataFrame(report).transpose()

# Show
print("📈 Classification Report:")
display(df_report)

### 💾 Save Report to CSV & JSON

In [None]:
os.makedirs("outputs", exist_ok=True)

df_report.to_csv("data/outputs/classification_report.csv")

cleaned_report = {str(k): v for k, v in report.items()}

with open("data/outputs/classification_report.json", "w") as f:
    json.dump(cleaned_report, f, indent=4)

print("✅ Report saved to data/outputs/classification_report.{csv,json}")

### 📊 Confusion Matrix

In [None]:
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=le.classes_, yticklabels=le.classes_)
plt.title("🧬 Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.savefig("data/outputs/confusion_matrix.png")
plt.show()

print("✅ Confusion matrix saved to data/outputs/confusion_matrix.png")