In [20]:
import warnings
warnings.filterwarnings('ignore')

In [21]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV

## **Загрузка данных**

In [22]:
iris = load_iris()
x, y = iris.data, iris.target

In [23]:
y.shape

(150,)

In [24]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=1)

In [25]:
y_train.shape

(75,)

## **Решающее дерево**

In [26]:
model = DecisionTreeClassifier()

In [27]:
param_grid = {
    'max_depth': np.arange(1, 11),
    'min_samples_split': np.arange(2, 11)
}

In [28]:
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(x_train, y_train)

In [29]:
best_max_depth = grid_search.best_params_['max_depth']
best_min_samples_split = grid_search.best_params_['min_samples_split']

print(f"best_max_depth: {best_max_depth}")
print(f"best_min_samples_split: {best_min_samples_split}")

best_max_depth: 3
best_min_samples_split: 2


In [30]:
best_model = DecisionTreeClassifier(max_depth=best_max_depth, min_samples_split=best_min_samples_split)
best_model.fit(x_train, y_train)

In [31]:
y_pred = best_model.predict(x_test)

confusion = confusion_matrix(y_test, y_pred)
print("confusion_matrix:")
print(confusion)
print()
report = classification_report(y_test, y_pred)
print("classification_report:")
print(report)

confusion_matrix:
[[24  0  0]
 [ 0 23  1]
 [ 0  1 26]]

classification_report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        24
           1       0.96      0.96      0.96        24
           2       0.96      0.96      0.96        27

    accuracy                           0.97        75
   macro avg       0.97      0.97      0.97        75
weighted avg       0.97      0.97      0.97        75



## **Случайный лес**

In [46]:
from sklearn.ensemble import RandomForestClassifier

In [47]:
model = RandomForestClassifier()

In [48]:
param_grid = {
    'max_depth': np.arange(1, 11),
    'min_samples_split': np.arange(2, 11),
    'n_estimators': [1, 2, 3, 4, 5],
    'criterion': ["gini", "entropy", "log_loss"]
}

In [49]:
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(x_train, y_train)

In [50]:
best_max_depth = grid_search.best_params_['max_depth']
best_min_samples_split = grid_search.best_params_['min_samples_split']
best_n_estimators = grid_search.best_params_['n_estimators']

print(f"best_max_depth: {best_max_depth}")
print(f"best_min_samples_split: {best_min_samples_split}")
print(f"best_n_estimators: {best_n_estimators}")

best_max_depth: 2
best_min_samples_split: 5
best_n_estimators: 3


In [51]:
best_model = RandomForestClassifier(
    max_depth=best_max_depth,
    min_samples_split=best_min_samples_split,
    n_estimators=best_n_estimators
)

best_model.fit(x_train, y_train)

In [52]:
y_pred = best_model.predict(x_test)

confusion = confusion_matrix(y_test, y_pred)
print("confusion_matrix:")
print(confusion)
print()
report = classification_report(y_test, y_pred)
print("classification_report:")
print(report)

confusion_matrix:
[[24  0  0]
 [ 0 23  1]
 [ 0  1 26]]

classification_report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        24
           1       0.96      0.96      0.96        24
           2       0.96      0.96      0.96        27

    accuracy                           0.97        75
   macro avg       0.97      0.97      0.97        75
weighted avg       0.97      0.97      0.97        75

