# 📈 ارزیابی جامع مدل‌های ماشین لرنینگ - خانه‌های بوستون

این نوت‌بوک شامل ارزیابی جامع و تحلیل عمیق عملکرد مدل‌های آموزش دیده است.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from pathlib import Path
import sys

# Add src to path
sys.path.append(str(Path.cwd().parent / 'src'))

# Suppress warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("✅ Libraries imported successfully!")

## 📥 بارگذاری مدل‌ها و داده‌ها

In [None]:
# Load preprocessed data and models
from data_loader import BostonHousingDataLoader
from preprocessing import DataPreprocessor
from models import ModelTrainer
from evaluation import ModelEvaluator

# Load data
loader = BostonHousingDataLoader()
features, target, feature_names = loader.load_data()

# Preprocess data
preprocessor = DataPreprocessor(scaler_type='standard')
features_clean = preprocessor.handle_outliers(features, strategy='clip')
X_train, X_test, y_train, y_test = preprocessor.split_data(features_clean, target)
X_train_scaled, X_test_scaled = preprocessor.scale_features(X_train, X_test)

# Train models
trainer = ModelTrainer()
results = trainer.train_models(X_train_scaled, y_train, X_test_scaled, y_test)

print(f"📊 Data and models loaded successfully!")
print(f"Models trained: {len(results)}")
print(f"Best model: {trainer.best_model.__class__.__name__ if trainer.best_model else 'None'}")

## 📊 ارزیابی جامع مدل‌ها

In [None]:
# Initialize evaluator
evaluator = ModelEvaluator()

# Evaluate all models
print("📊 Evaluating all models...")
print("=" * 50)

for model_name, result in results.items():
    y_pred = result['y_pred']
    metrics = evaluator.calculate_metrics(y_test, y_pred, model_name)
    evaluator.print_metrics(model_name)

print(f"✅ Evaluation completed for {len(results)} models!")

## 📈 نمودارهای پیش‌بینی vs واقعی

In [None]:
# Plot predictions vs actual for best model
if trainer.best_model:
    best_model_name = None
    for name, model in trainer.models.items():
        if model == trainer.best_model:
            best_model_name = name
            break
    
    if best_model_name:
        print(f"📈 Creating prediction plots for best model: {best_model_name}")
        y_pred_best = results[best_model_name]['y_pred']
        evaluator.plot_predictions_vs_actual(y_test, y_pred_best, best_model_name)
    else:
        print("❌ Could not identify best model name!")
else:
    print("❌ No best model available!")

## 🔍 تحلیل Residuals

In [None]:
# Comprehensive residual analysis for best model
if trainer.best_model and best_model_name:
    print(f"🔍 Performing residual analysis for {best_model_name}...")
    y_pred_best = results[best_model_name]['y_pred']
    evaluator.plot_residual_analysis(y_test, y_pred_best, best_model_name)
else:
    print("❌ No best model available for residual analysis!")

## 📊 نمودارهای Learning Curves

In [None]:
# Plot learning curves for best model
if trainer.best_model and best_model_name:
    print(f"📊 Plotting learning curves for {best_model_name}...")
    evaluator.plot_learning_curves(trainer.best_model, X_train_scaled, y_train)
else:
    print("❌ No best model available for learning curves!")

## ⚙️ Validation Curves برای هیپرپارامترها

In [None]:
# Plot validation curves for key parameters
if 'Random Forest' in trainer.models:
    print("⚙️ Plotting validation curves for Random Forest...")
    
    # n_estimators validation curve
    n_estimators_range = [10, 25, 50, 100, 200]
    evaluator.plot_validation_curves(
        trainer.models['Random Forest'], 
        X_train_scaled, y_train, 
        'n_estimators', n_estimators_range
    )
    
    # max_depth validation curve
    max_depth_range = [3, 5, 10, 15, 20, None]
    evaluator.plot_validation_curves(
        trainer.models['Random Forest'], 
        X_train_scaled, y_train, 
        'max_depth', max_depth_range
    )
else:
    print("⚠️ Random Forest not available for validation curves!")

## 📊 مقایسه جامع مدل‌ها

In [None]:
# Compare all models comprehensively
print("📊 Creating comprehensive model comparison...")
evaluator.compare_models(results)

## 🎯 تحلیل عملکرد بر اساس معیارهای مختلف

In [None]:
# Create detailed performance analysis
performance_analysis = {}

for model_name, result in results.items():
    performance_analysis[model_name] = {
        'R²': result['r2'],
        'RMSE': result['rmse'],
        'MAE': result['mae'],
        'CV_R²_Mean': result['cv_mean'],
        'CV_R²_Std': result['cv_std']
    }

# Convert to DataFrame for easier analysis
performance_df = pd.DataFrame(performance_analysis).T

print("🎯 Detailed Performance Analysis:")
print("=" * 50)
display(performance_df.round(4))

# Performance rankings
print("\n🏆 Performance Rankings:")
print("-" * 30)

# R² ranking
r2_ranking = performance_df['R²'].sort_values(ascending=False)
print("R² Score Ranking:")
for i, (model, score) in enumerate(r2_ranking.items(), 1):
    print(f"  {i}. {model}: {score:.4f}")

# RMSE ranking
rmse_ranking = performance_df['RMSE'].sort_values()
print("\nRMSE Ranking (Lower is better):")
for i, (model, score) in enumerate(rmse_ranking.items(), 1):
    print(f"  {i}. {model}: {score:.4f}")

# CV R² ranking
cv_r2_ranking = performance_df['CV_R²_Mean'].sort_values(ascending=False)
print("\nCross-Validation R² Ranking:")
for i, (model, score) in enumerate(cv_r2_ranking.items(), 1):
    print(f"  {i}. {model}: {score:.4f}")

## 📈 نمودارهای عملکرد

In [None]:
# Create performance visualization
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
axes = axes.flatten()

# 1. R² scores
models = list(performance_df.index)
r2_scores = performance_df['R²'].values
bars1 = axes[0].bar(models, r2_scores, color='skyblue', alpha=0.7)
axes[0].set_title('R² Scores Comparison')
axes[0].set_ylabel('R² Score')
axes[0].set_ylim(0, 1)
axes[0].tick_params(axis='x', rotation=45)

# Add value labels on bars
for bar, score in zip(bars1, r2_scores):
    height = bar.get_height()
    axes[0].text(bar.get_x() + bar.get_width()/2., height + 0.01,
                f'{score:.3f}', ha='center', va='bottom')

# 2. RMSE scores
rmse_scores = performance_df['RMSE'].values
bars2 = axes[1].bar(models, rmse_scores, color='lightcoral', alpha=0.7)
axes[1].set_title('RMSE Scores Comparison')
axes[1].set_ylabel('RMSE')
axes[1].tick_params(axis='x', rotation=45)

# Add value labels on bars
for bar, score in zip(bars2, rmse_scores):
    height = bar.get_height()
    axes[1].text(bar.get_x() + bar.get_width()/2., height + 0.01,
                f'{score:.3f}', ha='center', va='bottom')

# 3. CV R² scores
cv_r2_scores = performance_df['CV_R²_Mean'].values
cv_r2_stds = performance_df['CV_R²_Std'].values
bars3 = axes[2].bar(models, cv_r2_scores, yerr=cv_r2_stds, 
                    color='lightgreen', alpha=0.7, capsize=5)
axes[2].set_title('Cross-Validation R² Scores')
axes[2].set_ylabel('CV R² Score')
axes[2].set_ylim(0, 1)
axes[2].tick_params(axis='x', rotation=45)

# Add value labels on bars
for bar, score in zip(bars3, cv_r2_scores):
    height = bar.get_height()
    axes[2].text(bar.get_x() + bar.get_width()/2., height + 0.01,
                f'{score:.3f}', ha='center', va='bottom')

# 4. MAE scores
mae_scores = performance_df['MAE'].values
bars4 = axes[3].bar(models, mae_scores, color='orange', alpha=0.7)
axes[3].set_title('MAE Scores Comparison')
axes[3].set_ylabel('MAE')
axes[3].tick_params(axis='x', rotation=45)

# Add value labels on bars
for bar, score in zip(bars4, mae_scores):
    height = bar.get_height()
    axes[3].text(bar.get_x() + bar.get_width()/2., height + 0.01,
                f'{score:.3f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

## 🔍 تحلیل ویژگی‌های مهم

In [None]:
# Get feature importance for tree-based models
tree_models = ['Random Forest', 'XGBoost', 'Gradient Boosting', 'Decision Tree']
feature_importance_summary = {}

for model_name in tree_models:
    if model_name in trainer.models:
        importance = trainer.get_feature_importance(model_name, feature_names)
        if importance:
            feature_importance_summary[model_name] = importance
            print(f"✅ Feature importance obtained for {model_name}")
        else:
            print(f"❌ Could not get feature importance for {model_name}")
    else:
        print(f"⚠️ {model_name} not found in trained models")

# Compare feature importance across models
if feature_importance_summary:
    print(f"\n🔍 Feature Importance Comparison:")
    print("=" * 50)
    
    # Get top 5 features from each model
    for model_name, importance in feature_importance_summary.items():
        print(f"\n{model_name} - Top 5 Features:")
        top_5 = list(importance.items())[:5]
        for i, (feature, score) in enumerate(top_5, 1):
            print(f"  {i}. {feature}: {score:.4f}")
    
    # Find common important features
    all_features = set()
    for importance in feature_importance_summary.values():
        all_features.update(list(importance.keys())[:5])
    
    print(f"\n🔗 Common important features across models: {len(all_features)}")
    for feature in sorted(all_features):
        print(f"  - {feature}")
else:
    print("❌ No feature importance data available!")

## 📊 نمودارهای Feature Importance

In [None]:
# Plot feature importance for all tree-based models
if feature_importance_summary:
    n_models = len(feature_importance_summary)
    n_cols = 2
    n_rows = (n_models + n_cols - 1) // n_cols
    
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 6*n_rows))
    axes = axes.flatten()
    
    for i, (model_name, importance) in enumerate(feature_importance_summary.items()):
        if i < len(axes):
            # Get top 10 features
            top_features = dict(list(importance.items())[:10])
            features = list(top_features.keys())
            scores = list(top_features.values())
            
            y_pos = np.arange(len(features))
            axes[i].barh(y_pos, scores, color='lightgreen', alpha=0.7)
            axes[i].set_yticks(y_pos)
            axes[i].set_yticklabels(features)
            axes[i].set_xlabel('Feature Importance')
            axes[i].set_title(f'Top 10 Feature Importance - {model_name}')
            axes[i].invert_yaxis()
            axes[i].grid(True, alpha=0.3)
            
            # Add value labels
            for j, score in enumerate(scores):
                axes[i].text(score + 0.001, j, f'{score:.3f}', va='center')
    
    # Hide empty subplots
    for i in range(len(feature_importance_summary), len(axes)):
        axes[i].set_visible(False)
    
    plt.tight_layout()
    plt.show()
else:
    print("❌ No feature importance plots to show!")

## 📋 گزارش ارزیابی جامع

In [None]:
# Create comprehensive evaluation report
print("📋 Creating comprehensive evaluation report...")
evaluator.create_evaluation_report('../results/evaluation_report.txt')
print("✅ Evaluation report created successfully!")

## 🎯 تحلیل نهایی و توصیه‌ها

In [None]:
# Final analysis and recommendations
print("🎯 Final Analysis and Recommendations:")
print("=" * 60)

# Best model analysis
if trainer.best_model:
    best_model_name = None
    for name, model in trainer.models.items():
        if model == trainer.best_model:
            best_model_name = name
            break
    
    if best_model_name:
        best_result = results[best_model_name]
        print(f"🏆 Best Model: {best_model_name}")
        print(f"   R² Score: {best_result['r2']:.4f}")
        print(f"   RMSE: {best_result['rmse']:.4f}")
        print(f"   MAE: {best_result['mae']:.4f}")
        print(f"   CV R²: {best_result['cv_mean']:.4f} ± {best_result['cv_std']:.4f}")
        
        # Performance category
        r2_score = best_result['r2']
        if r2_score >= 0.8:
            performance = "Excellent 🎯"
            recommendation = "Model is ready for production use"
        elif r2_score >= 0.6:
            performance = "Good 👍"
            recommendation = "Model performs well, consider feature engineering"
        elif r2_score >= 0.4:
            performance = "Fair ⚠️"
            recommendation = "Model needs improvement, try different algorithms"
        else:
            performance = "Poor ❌"
            recommendation = "Model needs significant improvement"
        
        print(f"   Performance: {performance}")
        print(f"   Recommendation: {recommendation}")
    else:
        print("❌ Could not identify best model!")
else:
    print("❌ No best model available!")

# Model stability analysis
print(f"\n🔍 Model Stability Analysis:")
print("-" * 40)

cv_stability = {}
for model_name, result in results.items():
    cv_std = result['cv_std']
    if cv_std < 0.05:
        stability = "Very Stable 🟢"
    elif cv_std < 0.1:
        stability = "Stable 🟡"
    else:
        stability = "Unstable 🔴"
    
    cv_stability[model_name] = {
        'cv_std': cv_std,
        'stability': stability
    }
    
    print(f"{model_name}: {stability} (CV Std: {cv_std:.4f})")

# Feature importance insights
if feature_importance_summary:
    print(f"\n🔍 Feature Importance Insights:")
    print("-" * 40)
    
    # Find most important features across all models
    feature_scores = {}
    for model_name, importance in feature_importance_summary.items():
        for feature, score in importance.items():
            if feature not in feature_scores:
                feature_scores[feature] = []
            feature_scores[feature].append(score)
    
    # Calculate average importance
    avg_importance = {}
    for feature, scores in feature_scores.items():
        avg_importance[feature] = np.mean(scores)
    
    # Show top 10 most important features
    top_features_avg = sorted(avg_importance.items(), key=lambda x: x[1], reverse=True)[:10]
    print("Top 10 Most Important Features (Average across models):")
    for i, (feature, avg_score) in enumerate(top_features_avg, 1):
        print(f"  {i}. {feature}: {avg_score:.4f}")

# Final recommendations
print(f"\n💡 Final Recommendations:")
print("-" * 30)
print("1. Use the best performing model for predictions")
print("2. Consider ensemble methods for improved performance")
print("3. Focus on the most important features for feature engineering")
print("4. Monitor model performance on new data")
print("5. Regular retraining with updated data")
print("6. Consider business context when interpreting results")

## 💾 ذخیره نتایج ارزیابی

In [None]:
# Save evaluation results
import json

evaluation_summary = {
    'best_model': {
        'name': best_model_name if best_model_name else 'None',
        'r2_score': float(best_result['r2']) if best_model_name else 0.0,
        'rmse': float(best_result['rmse']) if best_model_name else 0.0,
        'mae': float(best_result['mae']) if best_model_name else 0.0
    },
    'model_performance': performance_analysis,
    'cv_stability': cv_stability,
    'feature_importance_summary': {}
}

# Add feature importance data
if feature_importance_summary:
    for model_name, importance in feature_importance_summary.items():
        evaluation_summary['feature_importance_summary'][model_name] = {
            feature: float(score) for feature, score in importance.items()
        }

with open('../results/evaluation_summary.json', 'w') as f:
    json.dump(evaluation_summary, f, indent=2)

print("💾 Evaluation results saved to 'results/evaluation_summary.json'")

## 📋 خلاصه نهایی

In [None]:
print("📋 Final Evaluation Summary:")
print("=" * 50)
print(f"Models evaluated: {len(results)}")
print(f"Best model: {best_model_name if best_model_name else 'None'}")
print(f"Best R² score: {trainer.best_score:.4f if trainer.best_score else 'N/A'}")
print(f"Feature importance analyzed: {len(feature_importance_summary)} models")
print(f"Evaluation report created: ✅")
print(f"Residual analysis performed: ✅")
print(f"Learning curves generated: ✅")
print(f"Model comparison completed: ✅")

print(f"\n📁 Generated Files:")
print(f"  - results/evaluation_report.txt")
print(f"  - results/evaluation_summary.json")
print(f"  - Various evaluation plots and visualizations")

print(f"\n🎉 Comprehensive evaluation completed successfully!")