# 06. Model Explanation

Notebook này giải thích mô hình sử dụng:
- Feature Importance
- SHAP values (if available)
- Sample predictions

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Import project modules
import sys
from pathlib import Path

# Calculate project root by finding directory with config folder
current_path = Path().resolve()
project_root = current_path

# Find project root by looking for config/ directory
max_levels = 5
for _ in range(max_levels):
    if (project_root / 'config').exists() and (project_root / 'src').exists():
        break
    if project_root.parent == project_root:
        break
    project_root = project_root.parent
else:
    if 'notebooks' in str(current_path):
        project_root = current_path.parent

# Add src to Python path
src_path = project_root / 'src'
if src_path.exists():
    sys.path.insert(0, str(src_path))

from models.random_forest import load_model, get_feature_importance
from evaluation.visualization import plot_feature_importance

## 1. Load Model and Data

In [None]:
# Load model
rf_model = load_model('random_forest.pkl')

# Load test data
processed_dir = project_root / "data" / "processed"
X_test = pd.read_csv(processed_dir / "X_test.csv")
y_test = pd.read_csv(processed_dir / "y_test.csv").squeeze()

print("Model and data loaded!")

## 2. Feature Importance

In [None]:
# Get feature importance
feature_names = X_test.columns.tolist()
importance_dict = get_feature_importance(rf_model, feature_names)

# Sort by importance
importance_sorted = dict(sorted(importance_dict.items(), key=lambda x: x[1], reverse=True))

print("Top 10 Most Important Features:")
for i, (feat, imp) in enumerate(list(importance_sorted.items())[:10], 1):
    print(f"{i}. {feat}: {imp:.4f}")

# Visualize
importances = [importance_dict[feat] for feat in feature_names]
plot_feature_importance(importances, feature_names, top_n=20, title='Random Forest - Feature Importance')

## 3. SHAP Values (Optional)

In [None]:
# Uncomment to use SHAP
# import shap
# 
# # Create SHAP explainer
# explainer = shap.TreeExplainer(rf_model)
# shap_values = explainer.shap_values(X_test[:100])  # Use sample for speed
# 
# # Summary plot
# shap.summary_plot(shap_values[1], X_test[:100], show=False)
# plt.title('SHAP Summary Plot')
# plt.show()