In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import export_graphviz
import graphviz

# Загрузка данных
data = pd.read_csv('diabetes.csv')

# Разделение данных на признаки и целевую переменную
X = data.drop('Outcome', axis=1)
y = data['Outcome']

# предположим, что X - это ваши данные, а y - это метки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, max_depth=5)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))


Accuracy: 0.7727272727272727


In [2]:
import xgboost as xgb
from sklearn.metrics import accuracy_score
import time

# Предположим, что X - это ваши данные, а y - это метки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

start_time = time.time()
model = xgb.XGBClassifier(
    n_estimators=150, 
    max_depth=5, 
    learning_rate=0.06, 
    subsample=1,  
    gamma=1, 
    reg_alpha=0, 
    reg_lambda=200
)
model.fit(X_train, y_train)
end_time = time.time()

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Training time:", end_time - start_time)


Accuracy: 0.7727272727272727
Training time: 0.058968544006347656


In [3]:
from sklearn.model_selection import RandomizedSearchCV

param_dist = {
    'n_estimators': [50, 100, 150, 200],
    'max_depth': [3, 5, 7, 9],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0],
    'gamma': [0, 0.5, 1],
    'reg_alpha': [0, 0.5, 1],
    'reg_lambda': [0.5, 1, 1.5]
}

model = xgb.XGBClassifier()
random_search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=25, scoring='accuracy', n_jobs=-1, cv=5, verbose=3)
random_search.fit(X_train, y_train)

print("Best parameters found: ", random_search.best_params_)
print("Best accuracy found: ", random_search.best_score_)


Fitting 5 folds for each of 25 candidates, totalling 125 fits
Best parameters found:  {'subsample': 0.6, 'reg_lambda': 1, 'reg_alpha': 1, 'n_estimators': 50, 'max_depth': 5, 'learning_rate': 0.1, 'gamma': 1, 'colsample_bytree': 1.0}
Best accuracy found:  0.7785152605624417


In [10]:
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score

space = {
    'n_estimators': hp.quniform('n_estimators', 50, 200, 1),
    'max_depth': hp.quniform('max_depth', 3, 9, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.2),
    'subsample': hp.uniform('subsample', 0.6, 1),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.6, 1),
    'gamma': hp.uniform('gamma', 0, 1),
    'reg_alpha': hp.uniform('reg_alpha', 0, 1),
    'reg_lambda': hp.uniform('reg_lambda', 0.5, 1.5)
}

def objective(space):
    model = xgb.XGBClassifier(
        n_estimators =int(space['n_estimators']), 
        max_depth = int(space['max_depth']), 
        learning_rate = space['learning_rate'],
        subsample = space['subsample'],
        colsample_bytree = space['colsample_bytree'],
        gamma = space['gamma'],
        reg_alpha = space['reg_alpha'],
        reg_lambda = space['reg_lambda']
    )
    accuracy = cross_val_score(model, X_train, y_train, cv=5).mean()

    return {'loss': -accuracy, 'status': STATUS_OK }

trials = Trials()
best = fmin(
    fn=objective,  # функция, которую мы хотим минимизировать
    space=space,  # пространство поиска
    algo=tpe.suggest,  # алгоритм для использования
    max_evals=100,  # максимальное количество итераций
    trials=trials  # объект Trials для сохранения истории
)

print("Лучшие гиперпараметры: ", best)

100%|█████████████████████████████████████████████| 100/100 [00:36<00:00,  2.77trial/s, best loss: -0.7817406370785018]
Лучшие гиперпараметры:  {'colsample_bytree': 0.8508104129014503, 'gamma': 0.4498388751175088, 'learning_rate': 0.08121508202868899, 'max_depth': 9.0, 'n_estimators': 74.0, 'reg_alpha': 0.35690174108889133, 'reg_lambda': 1.464916348060583, 'subsample': 0.6852148426014755}
