In [4]:
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split
import pandas as pd
import h2o
from sklearn.metrics import mean_squared_error
from h2o.estimators import H2OGradientBoostingEstimator

In [19]:
data = pd.read_csv('DS.csv')
X = data.drop('central', axis=1)
y = data['central']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = CatBoostRegressor(iterations=10000, learning_rate=0.01, depth=10, loss_function='RMSE')
model.fit(X_train, y_train, verbose=False, plot =True)
predictions = model.predict(X_test)

mse = mean_squared_error(y_test, predictions)
print(f'Mean Squared Error: {mse}')

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

Mean Squared Error: 1.4611696376563687e-06


In [5]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

# Инициализация и обучение модели градиентного бустинга
data = pd.read_csv('DS.csv')
X = data.drop('central', axis=1)
y = data['central']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
gb_model = GradientBoostingRegressor()
gb_model.fit(X_train, y_train)

# Предсказание на тестовом наборе
y_pred = gb_model.predict(X_test)

# Оценка качества модели
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 2.7201277396213935e-06


In [6]:
from sklearn.model_selection import GridSearchCV

# Определение набора параметров для настройки
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5],
    'max_depth': [3, 5, 7]
}

# Инициализация модели
gb_model = GradientBoostingRegressor()

# Поиск оптимальных параметров с использованием перекрестной проверки
grid_search = GridSearchCV(estimator=gb_model, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Вывод результатов
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

Best parameters: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 100}
Best score: 0.9917463216532179


In [7]:
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5)
gb_model.fit(X_train, y_train)
y_pred = gb_model.predict(X_test)

# Оценка качества модели
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 2.7108823647777534e-06


In [1]:
pip install scikit-optimize


Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.





Collecting scikit-optimize
  Downloading scikit_optimize-0.10.1-py2.py3-none-any.whl (107 kB)
     -------------------------------------- 107.7/107.7 kB 1.6 MB/s eta 0:00:00
Collecting pyaml>=16.9
  Downloading pyaml-23.12.0-py3-none-any.whl (23 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-23.12.0 scikit-optimize-0.10.1


In [10]:
from skopt import BayesSearchCV
from sklearn.ensemble import GradientBoostingRegressor

# Определение пространства поиска гиперпараметров
param_space = {
    'n_estimators': (50, 200),
    'learning_rate': (0.01, 0.5),
    'max_depth': (3, 7)
}

# Инициализация модели
gb_model = GradientBoostingRegressor()

# Оптимизация гиперпараметров с использованием оптимизации байесовской оптимизации
opt = BayesSearchCV(gb_model, param_space, n_iter=30, cv=5)
opt.fit(X_train, y_train)

# Вывод результатов
print("Best parameters:", opt.best_params_)
print("Best score:", opt.best_score_)


Best parameters: OrderedDict([('learning_rate', 0.5), ('max_depth', 3), ('n_estimators', 155)])
Best score: 0.992070408021154
