In [1]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.5-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.5-cp310-cp310-manylinux2014_x86_64.whl (98.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.5


In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_wine
from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures,StandardScaler
from sklearn.model_selection import train_test_split, RandomizedSearchCV,GridSearchCV, KFold
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
from scipy.stats import randint

from sklearn.ensemble import StackingRegressor
from lightgbm import LGBMRegressor

In [3]:
# بارگذاری داده‌ها
from sklearn.datasets import load_wine
data = load_wine()
df = pd.DataFrame(data.data, columns=data.feature_names)

In [4]:
X = df.drop('alcohol', axis=1)
y = df['alcohol']


In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [20]:
# تعریف مدل‌ها
models = {
    'Linear Regression': LinearRegression(),
    'Random Forest': RandomForestRegressor(random_state=42),
    'XGBoost': XGBRegressor(random_state=42),
    'CatBoost': CatBoostRegressor(verbose=0, random_state=42)
    # ,
    # 'LightGBM': LGBMRegressor(random_state=42)
}


In [21]:
# بررسی و تنظیم هایپرپارامترها
param_grid = {
    'Random Forest': {
        'n_estimators': [100, 500, 1000],
        'max_depth': [None, 10, 20],
    },
    'XGBoost': {
        'n_estimators': [100, 500, 1000],
        'learning_rate': [0.01, 0.1, 0.5],
        'max_depth': [3, 5, 10],
    },
    'CatBoost': {
        'iterations': [500, 1000],
        'learning_rate': [0.01, 0.1],
        'depth': [6, 10],
     }
    #,
    # 'LightGBM': {
    #     'n_estimators': [100, 500, 1000],
    #     'learning_rate': [0.01, 0.1],
    #     'max_depth': [3, 5, 10],
    # }
}

In [16]:

best_models = {}

# ارزیابی مدل‌ها
for name, model in models.items():
    print(f"Training {name}...")
    if name in param_grid:
        grid_search = GridSearchCV(model, param_grid[name], cv=5, scoring='neg_mean_squared_error', verbose=1)
        grid_search.fit(X_train, y_train)
        best_model = grid_search.best_estimator_
        best_models[name] = best_model
    else:
        model.fit(X_train, y_train)
        best_models[name] = model

    # پیش‌بینی و ارزیابی مدل
    y_pred = best_models[name].predict(X_test)
    print(f"{name} R2 Score: {r2_score(y_test, y_pred)}")
    print(f"{name} Mean Squared Error: {mean_squared_error(y_test, y_pred)}")
    print(f"{name} Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}\n")

# انتخاب بهترین مدل بر اساس R2 Score
best_model_name = max(best_models, key=lambda name: r2_score(y_test, best_models[name].predict(X_test)))
print(f"Best Model: {best_model_name}")



Training Linear Regression...
Linear Regression R2 Score: 0.688936431784382
Linear Regression Mean Squared Error: 0.18571608706852427
Linear Regression Mean Absolute Error: 0.36754935046008863

Training Random Forest...
Fitting 5 folds for each of 9 candidates, totalling 45 fits
Random Forest R2 Score: 0.7373219305233214
Random Forest Mean Squared Error: 0.15682821200105224
Random Forest Mean Absolute Error: 0.3167603295259027

Training XGBoost...
Fitting 5 folds for each of 27 candidates, totalling 135 fits
XGBoost R2 Score: 0.6772571773482137
XGBoost Mean Squared Error: 0.1926890201130633
XGBoost Mean Absolute Error: 0.36241332901848683

Training CatBoost...
Fitting 5 folds for each of 8 candidates, totalling 40 fits
CatBoost R2 Score: 0.7466674837867322
CatBoost Mean Squared Error: 0.1512485821089138
CatBoost Mean Absolute Error: 0.31714946133646593

Best Model: CatBoost


In [22]:
#randomsearch
best_models = {}

# ارزیابی مدل‌ها
for name, model in models.items():
    print(f"Training {name}...")
    if name in param_grid:

        random_search = RandomizedSearchCV(
            model,
            param_distributions=param_grid[name],
            n_iter=len(param_grid[name]),
            cv=5,
            scoring='neg_mean_squared_error',
            verbose=1,
            random_state=42
        )
        random_search.fit(X_train, y_train)
        best_model = random_search.best_estimator_
        best_models[name] = best_model
    else:
        model.fit(X_train, y_train)
        best_models[name] = model

    # پیش‌بینی و ارزیابی مدل
    y_pred = best_models[name].predict(X_test)
    print(f"{name} R2 Score: {r2_score(y_test, y_pred)}")
    print(f"{name} Mean Squared Error: {mean_squared_error(y_test, y_pred)}")
    print(f"{name} Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}\n")

# انتخاب بهترین مدل بر اساس R2 Score
best_model_name = max(best_models, key=lambda name: r2_score(y_test, best_models[name].predict(X_test)))
print(f"Best Model: {best_model_name}")

Training Linear Regression...
Linear Regression R2 Score: 0.688936431784382
Linear Regression Mean Squared Error: 0.18571608706852427
Linear Regression Mean Absolute Error: 0.36754935046008863

Training Random Forest...
Fitting 5 folds for each of 2 candidates, totalling 10 fits
Random Forest R2 Score: 0.7389932595704225
Random Forest Mean Squared Error: 0.1558303687222263
Random Forest Mean Absolute Error: 0.3176083333333408

Training XGBoost...
Fitting 5 folds for each of 3 candidates, totalling 15 fits
XGBoost R2 Score: 0.6356567193968075
XGBoost Mean Squared Error: 0.21752598290916464
XGBoost Mean Absolute Error: 0.38175689167446564

Training CatBoost...
Fitting 5 folds for each of 3 candidates, totalling 15 fits
CatBoost R2 Score: 0.7466674837867322
CatBoost Mean Squared Error: 0.1512485821089138
CatBoost Mean Absolute Error: 0.31714946133646593

Best Model: CatBoost
