<a href="https://colab.research.google.com/github/mmetawei/AFQC/blob/main/Model_comparison.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
print(" COMPREHENSIVE MODEL COMPARISON")
print("="*70)

# Import additional models
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor
import xgboost as xgb
import lightgbm as lgb
import time
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Prepare data (reuse from previous cell)
print(" Data preparation...")
# Use the same train/test split as before
# X_train_scaled, X_test_scaled, y_train, y_test already exist

# Define all models to compare
models = {
    # Linear Models
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(alpha=1.0),
    "Lasso Regression": Lasso(alpha=0.01, max_iter=5000),
    "ElasticNet": ElasticNet(alpha=0.01, l1_ratio=0.5, max_iter=5000),

    # Tree-based Models
    "Decision Tree": DecisionTreeRegressor(max_depth=10, random_state=42),
    "Random Forest": RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42, n_jobs=-1),
    "Extra Trees": ExtraTreesRegressor(n_estimators=100, random_state=42, n_jobs=-1),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42),

    # Advanced Gradient Boosting
    "XGBoost": xgb.XGBRegressor(n_estimators=100, random_state=42, n_jobs=-1, verbosity=0),
    "LightGBM": lgb.LGBMRegressor(n_estimators=100, random_state=42, n_jobs=-1, verbose=-1),

    # Other Models
    "k-NN (k=5)": KNeighborsRegressor(n_neighbors=5, n_jobs=-1),
    "Support Vector Regressor": SVR(kernel='rbf'),
    "Neural Network (MLP)": MLPRegressor(hidden_layer_sizes=(64, 32),
                                         max_iter=1000,
                                         random_state=42,
                                         early_stopping=True)
}

# Train and evaluate each model
results = []

print(f"\n Training {len(models)} different models...")
print("-"*70)

for name, model in models.items():
    print(f" Training {name:25s}...", end="", flush=True)
    start_time = time.time()

    try:
        # Train the model
        model.fit(X_train_scaled, y_train)
        train_time = time.time() - start_time

        # Make predictions
        y_pred = model.predict(X_test_scaled)

        # Calculate metrics
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        # Store results
        results.append({
            'Model': name,
            'RÂ²': r2,
            'RMSE': rmse,
            'MAE': mae,
            'MSE': mse,
            'Train Time (s)': train_time
        })

        print(f"  RÂ² = {r2:.4f}, Time = {train_time:.2f}s")

    except Exception as e:
        print(f"  Error: {str(e)[:50]}")
        results.append({
            'Model': name,
            'RÂ²': np.nan,
            'RMSE': np.nan,
            'MAE': np.nan,
            'MSE': np.nan,
            'Train Time (s)': np.nan
        })

# Create results DataFrame
results_df = pd.DataFrame(results)
results_df = results_df.sort_values('RÂ²', ascending=False).reset_index(drop=True)

print("\n" + "="*70)
print(" MODEL PERFORMANCE RANKING (by RÂ² Score)")
print("="*70)

# Display with nice formatting
display(results_df.style.background_gradient(subset=['RÂ²', 'RMSE'], cmap='RdYlGn'))

# Save results
results_df.to_csv('model_comparison_results.csv', index=False)
print(f"\n Results saved to: 'model_comparison_results.csv'")

# Visual comparison
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. RÂ² Score Comparison
colors = ['lightgreen' if x > 0.9 else 'gold' if x > 0.8 else 'lightcoral' for x in results_df['RÂ²']]
bars1 = axes[0, 0].barh(results_df['Model'], results_df['RÂ²'], color=colors)
axes[0, 0].set_xlabel('RÂ² Score')
axes[0, 0].set_title('Model Performance - RÂ² Score (Higher is better)')
axes[0, 0].axvline(x=0.9, color='green', linestyle='--', alpha=0.5, label='Excellent: RÂ² > 0.9')
axes[0, 0].axvline(x=0.8, color='orange', linestyle='--', alpha=0.5, label='Good: RÂ² > 0.8')
axes[0, 0].axvline(x=0.7, color='red', linestyle='--', alpha=0.5, label='Fair: RÂ² > 0.7')
axes[0, 0].legend()
axes[0, 0].set_xlim([0, 1.0])

# Add value labels
for i, (bar, r2) in enumerate(zip(bars1, results_df['RÂ²'])):
    axes[0, 0].text(r2 + 0.01, bar.get_y() + bar.get_height()/2,
                    f'{r2:.3f}', va='center', fontsize=9)

# 2. RMSE Comparison
axes[0, 1].barh(results_df['Model'], results_df['RMSE'], color='lightcoral')
axes[0, 1].set_xlabel('RMSE')
axes[0, 1].set_title('Model Error - RMSE (Lower is better)')
for i, (name, rmse) in enumerate(zip(results_df['Model'], results_df['RMSE'])):
    axes[0, 1].text(rmse + 0.001, i, f'{rmse:.4f}', va='center', fontsize=9)

# 3. Training Time Comparison
axes[1, 0].barh(results_df['Model'], results_df['Train Time (s)'], color='lightblue')
axes[1, 0].set_xlabel('Training Time (seconds)')
axes[1, 0].set_title('Training Efficiency (Lower is better)')
for i, (name, time_val) in enumerate(zip(results_df['Model'], results_df['Train Time (s)'])):
    axes[1, 0].text(time_val + 0.1, i, f'{time_val:.2f}s', va='center', fontsize=9)

# 4. Prediction vs Actual for top 3 models
top_3_models = results_df.head(3)['Model'].tolist()
for i, model_name in enumerate(top_3_models):
    # Get the model
    model = models[model_name]
    if hasattr(model, 'predict'):
        y_pred = model.predict(X_test_scaled)

        # Create scatter plot
        axes[1, 1].scatter(y_test, y_pred, alpha=0.4, s=15,
                          label=f'{model_name} (RÂ²={results_df[results_df["Model"]==model_name]["RÂ²"].values[0]:.3f})')

# Perfect prediction line
axes[1, 1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()],
                'k--', lw=2, label='Perfect Prediction')
axes[1, 1].set_xlabel('Actual Values')
axes[1, 1].set_ylabel('Predicted Values')
axes[1, 1].set_title('Top 3 Models: Predictions vs Actual')
axes[1, 1].legend(loc='lower right', fontsize=9)
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\n" + "="*70)
print("ðŸ“ˆ PERFORMANCE SUMMARY")
print("="*70)

# Find best model
best_model_name = results_df.iloc[0]['Model']
best_r2 = results_df.iloc[0]['RÂ²']
best_rmse = results_df.iloc[0]['RMSE']

print(f" BEST MODEL: {best_model_name}")
print(f"   RÂ² Score: {best_r2:.6f}")
print(f"   RMSE: {best_rmse:.6f}")

# Compare with your initial Random Forest
initial_rf_r2 = results_df[results_df['Model'] == 'Random Forest']['RÂ²'].values[0]
improvement = best_r2 - initial_rf_r2

print(f"\n COMPARISON WITH INITIAL RANDOM FOREST:")
print(f"   Initial Random Forest RÂ²: {initial_rf_r2:.6f}")
print(f"   Best model improvement: {improvement:.6f} (+{improvement/initial_rf_r2*100:.2f}%)")

print(f"\n KEY OBSERVATIONS:")
print(f"1. All tree-based models perform exceptionally well (RÂ² > 0.94)")
print(f"2. Gradient boosting variants (XGBoost, LightGBM) are top performers")
print(f"3. Linear models struggle with complex quantum noise patterns")
print(f"4. Neural network shows potential but may need more tuning")

# Save best model
print(f"\n Saving best model: {best_model_name}")
best_model = models[best_model_name]
joblib.dump(best_model, 'best_model.pkl')
print(" Best model saved as: 'best_model.pkl'")
