DIFFERENT COMBINATION MODELS 

In [None]:
# Combination models
import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


# Define model combinations
model_combinations = {
    "RF+GBR+XGB": StackingRegressor(
        estimators=[
            ("rf", RandomForestRegressor(n_estimators=150, random_state=42)),
            ("gbr", GradientBoostingRegressor(n_estimators=150, random_state=42)),
            ("xgb", XGBRegressor(n_estimators=150, random_state=42))
        ], final_estimator=XGBRegressor(n_estimators=100)
    ),
    "LGBM+XGB+CatBoost": StackingRegressor(
        estimators=[
            ("lgbm", LGBMRegressor(n_estimators=200)),
            ("xgb", XGBRegressor(n_estimators=200)),
            ("catboost", CatBoostRegressor(n_estimators=200, verbose=0))
        ], final_estimator=XGBRegressor(n_estimators=100)
    ),
    "LR+DT+XGB+SVR": StackingRegressor(
        estimators=[
            ("lr", LinearRegression()),
            ("dt", DecisionTreeRegressor()),
            ("xgb", XGBRegressor(n_estimators=200)),
            ("svr", SVR())
        ], final_estimator=RandomForestRegressor(n_estimators=100)
    )
}

# Train and evaluate models
results = {}
for name, model in model_combinations.items():
    print(f"Training {name}...")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    rmse = mean_squared_error(y_test, y_pred) ** 0.5  # Taking square root manually
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results[name] = {"RMSE": rmse, "MAE": mae, "R2": r2, "model": model}
    print(f" {name} , {rmse} , {mae} ,{r2}")
    
    # Scatter plot
    plt.figure(figsize=(6, 6))
    plt.scatter(y_test, y_pred, alpha=0.5, label=name)
    plt.xlabel("Actual")
    plt.ylabel("Predicted")
    plt.title(f"Scatter Plot: {name}")
    plt.legend()
    plt.show()

# Find the best model based on RMSE
best_model_name = min(results, key=lambda x: results[x]["RMSE"])
best_model = results[best_model_name]["model"]
print(f"Best Model: {best_model_name}")

# Save the best model
joblib.dump(best_model, "good_model.pkl")