In [None]:
! pip install catboost



In [None]:
from sklearn.linear_model import (
    LinearRegression, Ridge, Lasso, ElasticNet, HuberRegressor, SGDRegressor, PassiveAggressiveRegressor
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import (
    RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor, BaggingRegressor
)
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.neural_network import MLPRegressor
import joblib
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error


In [None]:
unscaled_data = pd.read_csv('/content/unscaled_processed_data.csv')
X_train_scaled, X_test_scaled, y_train_scaled, y_test_scaled=joblib.load("/content/sacled_train_test_split.pkl")

In [None]:
X_unscaled = unscaled_data.drop(columns='price')
y_unscaled = unscaled_data['price']

In [None]:
X_train_unscaled, X_test_unscaled, y_train_unscaled, y_test_unscaled = train_test_split(X_unscaled, y_unscaled, test_size=0.2, random_state=42)

# models

In [30]:
scaled_models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "ElasticNet Regression": ElasticNet(),
    "KNN Regressor": KNeighborsRegressor(),
    "SVR": SVR()
}


In [31]:
unscaled_models = {
    "Decision Tree": DecisionTreeRegressor(),
    "Random Forest": RandomForestRegressor(),
    "Gradient Boosting": GradientBoostingRegressor(),
    "AdaBoost": AdaBoostRegressor(),
    "Bagging Regressor": BaggingRegressor(),
    "XGBoost": XGBRegressor()
}


In [32]:
results = []

In [33]:
for model_name, model in scaled_models.items():
    model.fit(X_train_scaled, y_train_scaled)
    y_pred = model.predict(X_test_scaled)

    rmse = mean_squared_error(y_test_scaled, y_pred)
    r2 = r2_score(y_test_scaled, y_pred)
    mae = mean_absolute_error(y_test_scaled, y_pred)


    results.append({'Model': model_name, 'RMSE': rmse, 'R^2': r2, 'MAE': mae})


In [34]:
for model_name, model in unscaled_models.items():
    model.fit(X_train_unscaled, y_train_unscaled)
    y_pred = model.predict(X_test_unscaled)

    rmse = mean_squared_error(y_test_unscaled, y_pred)
    r2 = r2_score(y_test_unscaled, y_pred)
    mae = mean_absolute_error(y_test_unscaled, y_pred)

    results.append({'Model': model_name, 'RMSE': rmse, 'R^2': r2, 'MAE': mae})


In [35]:
performance_df = pd.DataFrame(results)

In [36]:
print(performance_df)

                    Model          RMSE       R^2         MAE
0       Linear Regression  2.037055e-02  0.980573    0.112211
1        Ridge Regression  2.037078e-02  0.980573    0.112216
2        Lasso Regression  1.048901e+00 -0.000292    0.887064
3   ElasticNet Regression  5.142507e-01  0.509581    0.613561
4           KNN Regressor  3.502805e-02  0.966595    0.141613
5                     SVR  1.259423e-02  0.987989    0.087444
6           Decision Tree  6.043915e+05  0.963940  395.609285
7           Random Forest  3.177971e+05  0.981039  296.150605
8       Gradient Boosting  3.819329e+05  0.977213  346.029614
9                AdaBoost  1.319463e+06  0.921277  848.910321
10      Bagging Regressor  3.448466e+05  0.979425  310.133779
11                XGBoost  3.266887e+05  0.980509  294.112823


In [37]:
performance_df.to_csv('performance_df.csv')