Описание признаков
> 1. age 
> 2. sex 
> 3. chest pain type (4 values) 
> 4. resting blood pressure 
> 5. serum cholestoral in mg/dl 
> 6. fasting blood sugar > 120 mg/dl
> 7. resting electrocardiographic results (values 0,1,2)
> 8. maximum heart rate achieved 
> 9. exercise induced angina 
> 10. oldpeak = ST depression induced by exercise relative to rest 
> 11. the slope of the peak exercise ST segment 
> 12. number of major vessels (0-3) colored by flourosopy 
> 13. thal: 3 = normal; 6 = fixed defect; 7 = reversable defect

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score, recall_score, f1_score
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

In [2]:
data = pd.read_csv('data/heart.csv', sep=',')
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
data.isnull().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [4]:
X = data.loc[:, data.columns != 'target']
X.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2


In [5]:
Y = data['target']
Y.head()

0    1
1    1
2    1
3    1
4    1
Name: target, dtype: int64

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size = 0.2, random_state = 1)
X_train.shape, y_train.shape

((242, 13), (242,))

In [7]:
X_test.shape, y_test.shape

((61, 13), (61,))

In [8]:
logistic = LogisticRegression()
logistic.fit(X_train, y_train)
logistic_y_test = logistic.predict(X_test)
classification_report(y_test, logistic_y_test, output_dict=True)["0"], \
classification_report(y_test, logistic_y_test, output_dict=True)["1"]



({'precision': 0.8,
  'recall': 0.6666666666666666,
  'f1-score': 0.7272727272727272,
  'support': 30},
 {'precision': 0.7222222222222222,
  'recall': 0.8387096774193549,
  'f1-score': 0.7761194029850746,
  'support': 31})

In [9]:
svc = SVC(kernel="rbf", C=0.5)
svc.fit(X_train, y_train)



SVC(C=0.5, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [10]:
svc_y_test = svc.predict(X_test)
svc_y_test

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)

In [11]:
classification_report(y_test, svc_y_test, output_dict=True)["0"], \
classification_report(y_test, svc_y_test, output_dict=True)["1"]

  'precision', 'predicted', average, warn_for)


({'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 30},
 {'precision': 0.5081967213114754,
  'recall': 1.0,
  'f1-score': 0.673913043478261,
  'support': 31})

In [12]:
# при помощи решетчатого поиска и кросс-валидации найдем оптимальное значение гиперпараметра C
scoring = {
    'recall': make_scorer(recall_score), 
    'f1': make_scorer(f1_score), 
    'accuracy': make_scorer(accuracy_score)
}
svc_n_range = [i/10 for i in np.array(range(1, 10, 1))]
svc_tuned_parameters = [{'C': svc_n_range}]
svc_tuned_parameters

[{'C': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}]

In [13]:
svc_gs = GridSearchCV(SVC(kernel="rbf"), svc_tuned_parameters, cv=5, scoring=scoring, refit='f1')
svc_gs.fit(X_train, y_train)





GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid=[{'C': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}],
       pre_dispatch='2*n_jobs', refit='f1', return_train_score='warn',
       scoring={'recall': make_scorer(recall_score), 'f1': make_scorer(f1_score), 'accuracy': make_scorer(accuracy_score)},
       verbose=0)

In [14]:
# лучшая модель
best_svc = svc_gs.best_estimator_
best_svc

SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [15]:
# лучшее f1
svc_gs.best_score_

0.7127554565117651

In [16]:
# лучшее k
svc_gs.best_params_

{'C': 0.1}

In [17]:
# на начальном разбиении проверим метрики при новом значении c
best_svc.fit(X_train, y_train)
predicted_best_svc = best_svc.predict(X_test)
predicted_best_svc



array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)

In [18]:
classification_report(y_test, predicted_best_svc, output_dict=True)["0"], \
classification_report(y_test, predicted_best_svc, output_dict=True)["1"]

  'precision', 'predicted', average, warn_for)


({'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 30},
 {'precision': 0.5081967213114754,
  'recall': 1.0,
  'f1-score': 0.673913043478261,
  'support': 31})

In [19]:
tree = DecisionTreeClassifier(random_state=1, max_depth=5)
tree.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=5,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1,
            splitter='best')

In [20]:
tree_y_test = tree.predict(X_test)
tree_y_test

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0], dtype=int64)

In [21]:
classification_report(y_test, tree_y_test, output_dict=True)["0"], \
classification_report(y_test, tree_y_test, output_dict=True)["1"]

({'precision': 0.7, 'recall': 0.7, 'f1-score': 0.7, 'support': 30},
 {'precision': 0.7096774193548387,
  'recall': 0.7096774193548387,
  'f1-score': 0.7096774193548389,
  'support': 31})

In [22]:
# при помощи решетчатого поиска и кросс-валидации найдем оптимальное значение гиперпараметра C
tree_n_range = np.array(range(1, 20))
tree_tuned_parameters = [{'max_depth': tree_n_range}]
tree_tuned_parameters

[{'max_depth': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
         18, 19])}]

In [23]:
tree_gs = GridSearchCV(DecisionTreeClassifier(random_state=1), tree_tuned_parameters, cv=5, scoring=scoring, refit='f1')
tree_gs.fit(X_train, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1,
            splitter='best'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid=[{'max_depth': array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19])}],
       pre_dispatch='2*n_jobs', refit='f1', return_train_score='warn',
       scoring={'recall': make_scorer(recall_score), 'f1': make_scorer(f1_score), 'accuracy': make_scorer(accuracy_score)},
       verbose=0)

In [24]:
# лучшая модель
best_tree = tree_gs.best_estimator_
best_tree

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=4,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1,
            splitter='best')

In [25]:
# лучшее значение f1
tree_gs.best_score_

0.8615494578662056

In [26]:
# на начальном разбиении проверим метрики при новом значении c
best_tree.fit(X_train, y_train)
predicted_best_tree = best_tree.predict(X_test)
predicted_best_tree

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0], dtype=int64)

In [27]:
classification_report(y_test, predicted_best_tree, output_dict=True)["0"], \
classification_report(y_test, predicted_best_tree, output_dict=True)["1"]

({'precision': 0.7666666666666667,
  'recall': 0.7666666666666667,
  'f1-score': 0.7666666666666667,
  'support': 30},
 {'precision': 0.7741935483870968,
  'recall': 0.7741935483870968,
  'f1-score': 0.7741935483870968,
  'support': 31})

In [28]:
# таким образом из трех моделей лучший результат показал метод Логистической регрессии
classification_report(y_test, logistic_y_test, output_dict=True)["0"], \
classification_report(y_test, logistic_y_test, output_dict=True)["1"]

({'precision': 0.8,
  'recall': 0.6666666666666666,
  'f1-score': 0.7272727272727272,
  'support': 30},
 {'precision': 0.7222222222222222,
  'recall': 0.8387096774193549,
  'f1-score': 0.7761194029850746,
  'support': 31})

# ЛР 6

In [29]:
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier

In [30]:
bagging_tree = BaggingClassifier(DecisionTreeClassifier(random_state=1), n_estimators=100)
bagging_tree.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1,
            splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=100, n_jobs=None, oob_score=False,
         random_state=None, verbose=0, warm_start=False)

In [31]:
bagging_tree_y_test = bagging_tree.predict(X_test)
bagging_tree_y_test

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0], dtype=int64)

In [32]:
classification_report(y_test, bagging_tree_y_test, output_dict=True)["0"], \
classification_report(y_test, bagging_tree_y_test, output_dict=True)["1"]

({'precision': 0.7307692307692307,
  'recall': 0.6333333333333333,
  'f1-score': 0.6785714285714285,
  'support': 30},
 {'precision': 0.6857142857142857,
  'recall': 0.7741935483870968,
  'f1-score': 0.7272727272727272,
  'support': 31})

In [33]:
bagging_tree_n_range = np.array(range(10, 200, 10))
bagging_tree_tuned_parameters = [{'n_estimators': bagging_tree_n_range}]
bagging_tree_tuned_parameters

[{'n_estimators': array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
         140, 150, 160, 170, 180, 190])}]

In [34]:
bagging_tree_gs = GridSearchCV(BaggingClassifier(DecisionTreeClassifier(random_state=1)), bagging_tree_tuned_parameters, cv=5, scoring=scoring, refit='f1')
bagging_tree_gs.fit(X_train, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            ...stimators=10, n_jobs=None, oob_score=False,
         random_state=None, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid=[{'n_estimators': array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
       140, 150, 160, 170, 180, 190])}],
       pre_dispatch='2*n_jobs', refit='f1', return_train_score='warn',
       scoring={'recall': make_scorer(recall_score), 'f1': make_scorer(f1_score), 'accuracy': make_scorer(accuracy_score)},
       verbose=0)

In [35]:
best_bagging = bagging_tree_gs.best_estimator_
best_bagging

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1,
            splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=110, n_jobs=None, oob_score=False,
         random_state=None, verbose=0, warm_start=False)

In [36]:
bagging_tree_gs.best_score_

0.8778237616860984

In [37]:
bagging_tree_gs.best_params_

{'n_estimators': 110}

In [38]:
best_bagging.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=1,
            splitter='best'),
         bootstrap=True, bootstrap_features=False, max_features=1.0,
         max_samples=1.0, n_estimators=110, n_jobs=None, oob_score=False,
         random_state=None, verbose=0, warm_start=False)

In [39]:
best_bagging_y_test = best_bagging.predict(X_test)
best_bagging_y_test

array([0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0], dtype=int64)

In [40]:
classification_report(y_test, best_bagging_y_test, output_dict=True)["0"], \
classification_report(y_test, best_bagging_y_test, output_dict=True)["1"]

({'precision': 0.7692307692307693,
  'recall': 0.6666666666666666,
  'f1-score': 0.7142857142857142,
  'support': 30},
 {'precision': 0.7142857142857143,
  'recall': 0.8064516129032258,
  'f1-score': 0.7575757575757576,
  'support': 31})

In [41]:
extra_trees = ExtraTreesClassifier(random_state=1, n_estimators=100)
extra_trees.fit(X_train, y_train)

ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
           oob_score=False, random_state=1, verbose=0, warm_start=False)

In [42]:
extra_trees_y_test = extra_trees.predict(X_test)
extra_trees_y_test

array([0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0], dtype=int64)

In [43]:
classification_report(y_test, extra_trees_y_test, output_dict=True)["0"], \
classification_report(y_test, extra_trees_y_test, output_dict=True)["1"]

({'precision': 0.7692307692307693,
  'recall': 0.6666666666666666,
  'f1-score': 0.7142857142857142,
  'support': 30},
 {'precision': 0.7142857142857143,
  'recall': 0.8064516129032258,
  'f1-score': 0.7575757575757576,
  'support': 31})

In [44]:
extra_tree_n_range = np.array(range(10, 200, 10))
extra_tree_tuned_parameters = [{'n_estimators': extra_tree_n_range}]
extra_tree_tuned_parameters

[{'n_estimators': array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
         140, 150, 160, 170, 180, 190])}]

In [45]:
extra_tree_gs = GridSearchCV(ExtraTreesClassifier(random_state=1), extra_tree_tuned_parameters, cv=5, scoring=scoring, refit='f1')
extra_tree_gs.fit(X_train, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
           oob_score=False, random_state=1, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid=[{'n_estimators': array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100, 110, 120, 130,
       140, 150, 160, 170, 180, 190])}],
       pre_dispatch='2*n_jobs', refit='f1', return_train_score='warn',
       scoring={'recall': make_scorer(recall_score), 'f1': make_scorer(f1_score), 'accuracy': make_scorer(accuracy_score)},
       verbose=0)

In [46]:
best_extra_tree = extra_tree_gs.best_estimator_
best_extra_tree

ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=140, n_jobs=None,
           oob_score=False, random_state=1, verbose=0, warm_start=False)

In [47]:
extra_tree_gs.best_params_

{'n_estimators': 140}

In [48]:
extra_tree_gs.best_score_

0.8781143731089153

In [49]:
best_extra_tree.fit(X_train, y_train)

ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
           max_depth=None, max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=140, n_jobs=None,
           oob_score=False, random_state=1, verbose=0, warm_start=False)

In [50]:
best_extra_tree_y_test = best_extra_tree.predict(X_test)
best_extra_tree_y_test

array([0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0], dtype=int64)

In [51]:
classification_report(y_test, best_extra_tree_y_test, output_dict=True)["0"], \
classification_report(y_test, best_extra_tree_y_test, output_dict=True)["1"]

({'precision': 0.7916666666666666,
  'recall': 0.6333333333333333,
  'f1-score': 0.7037037037037038,
  'support': 30},
 {'precision': 0.7027027027027027,
  'recall': 0.8387096774193549,
  'f1-score': 0.7647058823529411,
  'support': 31})

In [52]:
# для сравнения лучший результат беггинга
classification_report(y_test, best_bagging_y_test, output_dict=True)["0"], \
classification_report(y_test, best_bagging_y_test, output_dict=True)["1"]

({'precision': 0.7692307692307693,
  'recall': 0.6666666666666666,
  'f1-score': 0.7142857142857142,
  'support': 30},
 {'precision': 0.7142857142857143,
  'recall': 0.8064516129032258,
  'f1-score': 0.7575757575757576,
  'support': 31})

In [53]:
# таким образом, с небольшим перевесом себя лучше показал беггинг
# однако, лучший результат среди всех методов из 5 и 6 ЛР показал метод логистической регрессии с параметрами по умолчанию
classification_report(y_test, logistic_y_test, output_dict=True)["0"], \
classification_report(y_test, logistic_y_test, output_dict=True)["1"]

({'precision': 0.8,
  'recall': 0.6666666666666666,
  'f1-score': 0.7272727272727272,
  'support': 30},
 {'precision': 0.7222222222222222,
  'recall': 0.8387096774193549,
  'f1-score': 0.7761194029850746,
  'support': 31})