# Деревья решений с градиентным бустингом (GBDT)

#### Подготовка датасета

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_wine
import pandas as pd

In [2]:
dataset = load_wine()
df = pd.DataFrame(dataset.data, columns=dataset.feature_names)
df["class"] = dataset.target
df["class_name"] = dataset.target_names[dataset["target"]]

x_train, x_test, y_train, y_test = train_test_split(
    dataset.data,
    dataset.target,
    test_size=0.2,
    random_state=42,
)
df

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,class,class_name
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0,class_0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0,0,class_0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0,0,class_0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0,0,class_0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0,class_0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0,2,class_2
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0,2,class_2
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0,2,class_2
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0,2,class_2


#### Обучение модели

In [3]:
from sklearn.ensemble import GradientBoostingClassifier

In [4]:
model = GradientBoostingClassifier()
model.fit(x_train, y_train)
pass

#### Измерение точности

In [5]:
model.score(x_test, y_test)

0.9444444444444444

#### Получение предсказания

In [6]:
predict_class = model.predict([[14.83, 0.71, 2.83, 14.6, 123.0, 2.90, 3.66, 0.58, 2.09, 5.04, 1.24, 4.12, 1085.0]])[0]
print(f"Вид ириса: {dataset.target_names[predict_class]}")

Вид ириса: class_0


### Деревья решений с градиентным бустингом (Через цикл for)

#### Создание тестового датасета

In [7]:
from sklearn.datasets import make_regression

In [22]:
X, y = make_regression(
    n_samples=100,
    n_features=2,
    # n_informative=2,
    random_state=42,
)

x_train, x_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    shuffle=True,
)

In [23]:
from sklearn.tree import DecisionTreeRegressor
import numpy as np

In [24]:
learning_rate = 0.1
n_trees = 5
trees = []

#### Среднее значение всех целевых значений

In [25]:
y_pred = np.array([y_train.mean()] * len(y_train))
baseline = y_pred

In [26]:
for i in range(n_trees):
    print("iter:", i)
    error = y_train - y_pred
    print("Error:", error[:10])
    print()
    tree = DecisionTreeRegressor(max_depth=1, random_state=42)
    tree.fit(x_train, error)
    predictions = tree.predict(x_train)
    print("predictions:", predictions[:10], "\n")
    print("(learning_rate * predictions):", (learning_rate * predictions)[:10], "\n")
    y_pred = y_pred + (learning_rate * predictions)
    print("y_pred", y_pred[:10], "\n\n")
    trees.append(tree)

iter: 0
Error: [ -26.1971918   -38.53188106    8.19435461  -54.87233406 -205.32801059
   41.87248149   81.28549976 -141.80111208  -26.18952571 -154.3269071 ]

predictions: [-79.68089134 -79.68089134  53.12059423 -79.68089134 -79.68089134
  53.12059423  53.12059423 -79.68089134  53.12059423 -79.68089134] 

(learning_rate * predictions): [-7.96808913 -7.96808913  5.31205942 -7.96808913 -7.96808913  5.31205942
  5.31205942 -7.96808913  5.31205942 -7.96808913] 

y_pred [-8.98208506 -8.98208506  4.2980635  -8.98208506 -8.98208506  4.2980635
  4.2980635  -8.98208506  4.2980635  -8.98208506] 


iter: 1
Error: [ -18.22910266  -30.56379193    2.88229519  -46.90424492 -197.35992146
   36.56042207   75.97344034 -133.83302295  -31.50158513 -146.35881796]

predictions: [ 80.91495608 -48.54897365 -48.54897365  80.91495608 -48.54897365
 -48.54897365  80.91495608 -48.54897365 -48.54897365 -48.54897365] 

(learning_rate * predictions): [ 8.09149561 -4.85489736 -4.85489736  8.09149561 -4.85489736 -4.854