In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV

## **Загрузка данных**

In [3]:
iris = load_iris()
x, y = iris.data, iris.target

In [4]:
y.shape

(150,)

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=1)

In [6]:
y_train.shape

(75,)

## **Решающее дерево**

In [7]:
model = DecisionTreeClassifier()

In [8]:
param_grid = {
    'max_depth': np.arange(1, 11),
    'min_samples_split': np.arange(2, 11)
}

In [9]:
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(x_train, y_train)

In [10]:
best_max_depth = grid_search.best_params_['max_depth']
best_min_samples_split = grid_search.best_params_['min_samples_split']

print(f"best_max_depth: {best_max_depth}")
print(f"best_min_samples_split: {best_min_samples_split}")

best_max_depth: 3
best_min_samples_split: 2


In [11]:
best_model = DecisionTreeClassifier(max_depth=best_max_depth, min_samples_split=best_min_samples_split)
best_model.fit(x_train, y_train)

In [12]:
y_pred = best_model.predict(x_test)

confusion = confusion_matrix(y_test, y_pred)
print("confusion_matrix:")
print(confusion)
print()
report = classification_report(y_test, y_pred)
print("classification_report:")
print(report)

confusion_matrix:
[[24  0  0]
 [ 0 23  1]
 [ 0  1 26]]

classification_report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        24
           1       0.96      0.96      0.96        24
           2       0.96      0.96      0.96        27

    accuracy                           0.97        75
   macro avg       0.97      0.97      0.97        75
weighted avg       0.97      0.97      0.97        75



## **Случайный лес**

In [13]:
from sklearn.ensemble import RandomForestClassifier

In [14]:
model = RandomForestClassifier()

In [15]:
param_grid = {
    'max_depth': np.arange(1, 11),
    'min_samples_split': np.arange(2, 11),
    'n_estimators': [1, 2, 3, 5, 7, 10]
}

In [16]:
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(x_train, y_train)

In [17]:
best_max_depth = grid_search.best_params_['max_depth']
best_min_samples_split = grid_search.best_params_['min_samples_split']
best_n_estimators = grid_search.best_params_['n_estimators']

print(f"best_max_depth: {best_max_depth}")
print(f"best_min_samples_split: {best_min_samples_split}")
print(f"best_n_estimators: {best_n_estimators}")

best_max_depth: 2
best_min_samples_split: 2
best_n_estimators: 2


In [18]:
best_model = RandomForestClassifier(
    max_depth=best_max_depth,
    min_samples_split=best_min_samples_split,
    n_estimators=best_n_estimators
)

best_model.fit(x_train, y_train)

In [19]:
y_pred = best_model.predict(x_test)

confusion = confusion_matrix(y_test, y_pred)
print("confusion_matrix:")
print(confusion)
print()
report = classification_report(y_test, y_pred)
print("classification_report:")
print(report)

confusion_matrix:
[[24  0  0]
 [ 0 23  1]
 [ 0  5 22]]

classification_report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        24
           1       0.82      0.96      0.88        24
           2       0.96      0.81      0.88        27

    accuracy                           0.92        75
   macro avg       0.93      0.92      0.92        75
weighted avg       0.93      0.92      0.92        75

