# Оптимизация гиперпараметров

In [65]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

In [34]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [35]:
features = pd.DataFrame(X, columns=iris.feature_names)
features.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [36]:
target = pd.DataFrame(y, columns=['Class'])
target.head()

Unnamed: 0,Class
0,0
1,0
2,0
3,0
4,0


In [37]:
features.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [38]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [39]:
pd.DataFrame(X_scaler, columns=iris.feature_names).describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,0.428704,0.440556,0.467458,0.458056
std,0.230018,0.181611,0.299203,0.317599
min,0.0,0.0,0.0,0.0
25%,0.222222,0.333333,0.101695,0.083333
50%,0.416667,0.416667,0.567797,0.5
75%,0.583333,0.541667,0.694915,0.708333
max,1.0,1.0,1.0,1.0


In [40]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled,
    y,
    test_size=0.2,
    random_state=5
)

In [18]:
svc = SVC()
svc.fit(X_train, y_train)

SVC()

In [19]:
y_pred = svc.predict(X_test)

In [20]:
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred,average='macro')
recall = metrics.recall_score(y_test, y_pred,average='macro')
f1 = metrics.f1_score(y_test, y_pred,average='macro')
print(f'Accuracy: {accuracy}\nPrecision: {precision}\nRecall: {recall}\nF1: {f1}\n')

Accuracy: 0.9
Precision: 0.9111111111111111
Recall: 0.9090909090909092
F1: 0.9089026915113871



##### Оптимизация гиперпараметров

In [21]:
svc.get_params().keys()

dict_keys(['C', 'break_ties', 'cache_size', 'class_weight', 'coef0', 'decision_function_shape', 'degree', 'gamma', 'kernel', 'max_iter', 'probability', 'random_state', 'shrinking', 'tol', 'verbose'])

In [22]:
svc_params = {
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'C': np.logspace(0, 4, 10)
}

In [24]:
svc_grid_search = GridSearchCV(
    estimator = svc,
    param_grid = svc_params,
    cv = 5
)

In [26]:
svc_grid_result = svc_grid_search.fit(X_train, y_train)

In [27]:
svc_grid_result.best_params_

{'C': 7.742636826811269, 'kernel': 'linear'}

In [28]:
best_svc = SVC(**svc_grid_result.best_params_)
best_svc.fit(X_train, y_train)
y_pred = best_svc.predict(X_test)

In [29]:
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred,average='macro')
recall = metrics.recall_score(y_test, y_pred,average='macro')
f1 = metrics.f1_score(y_test, y_pred,average='macro')
print(f'Accuracy: {accuracy}\nPrecision: {precision}\nRecall: {recall}\nF1: {f1}\n')

Accuracy: 0.9333333333333333
Precision: 0.9393939393939394
Recall: 0.9393939393939394
F1: 0.9393939393939394



##### Logistic Regression

In [41]:
lr = LogisticRegression()
lr.fit(X_train, y_train)

LogisticRegression()

In [42]:
y_pred = lr.predict(X_test)

In [44]:
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred,average='macro')
recall = metrics.recall_score(y_test, y_pred,average='macro')
f1 = metrics.f1_score(y_test, y_pred,average='macro')
print(f'Accuracy: {accuracy}\nPrecision: {precision}\nRecall: {recall}\nF1: {f1}\n')

Accuracy: 0.9
Precision: 0.9111111111111111
Recall: 0.9090909090909092
F1: 0.9089026915113871



##### Оптимизация гиперпараметров

In [45]:
lr.get_params().keys()

dict_keys(['C', 'class_weight', 'dual', 'fit_intercept', 'intercept_scaling', 'l1_ratio', 'max_iter', 'multi_class', 'n_jobs', 'penalty', 'random_state', 'solver', 'tol', 'verbose', 'warm_start'])

In [59]:
lr_params = {
    'solver': ['linear', 'liblinear', 'newton-cg'],
    'C': np.logspace(0, 4, 10),
    'penalty': ['l2', 'l1']
}

In [60]:
lr_grid_search = GridSearchCV(
    estimator = lr,
    param_grid = lr_params,
    cv = 5
)

In [61]:
import warnings
warnings.filterwarnings('ignore')
lr_grid_result = lr_grid_search.fit(X_train, y_train)

In [62]:
lr_grid_result.best_params_

{'C': 21.544346900318832, 'penalty': 'l2', 'solver': 'newton-cg'}

In [63]:
best_lr = LogisticRegression(**lr_grid_result.best_params_)
best_lr.fit(X_train, y_train)
y_pred = best_lr.predict(X_test)

In [64]:
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred,average='macro')
recall = metrics.recall_score(y_test, y_pred,average='macro')
f1 = metrics.f1_score(y_test, y_pred,average='macro')
print(f'Accuracy: {accuracy}\nPrecision: {precision}\nRecall: {recall}\nF1: {f1}\n')

Accuracy: 0.9
Precision: 0.9111111111111111
Recall: 0.9090909090909092
F1: 0.9089026915113871



#### Рандомизированный поиск гиперпараметров

In [66]:
svc_params = {
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'C': np.logspace(0, 4, 10)
}

In [68]:
svc_random_search = RandomizedSearchCV(
    estimator = svc,
    param_distributions = svc_params,
    cv = 5
)

In [69]:
svc_random_result = svc_random_search.fit(X_train, y_train)

In [70]:
svc_random_result.best_params_

{'kernel': 'rbf', 'C': 2.7825594022071245}

In [71]:
best_svc = SVC(**svc_random_result.best_params_)
best_svc.fit(X_train, y_train)
y_pred = best_svc.predict(X_test)

In [72]:
accuracy = metrics.accuracy_score(y_test, y_pred)
precision = metrics.precision_score(y_test, y_pred,average='macro')
recall = metrics.recall_score(y_test, y_pred,average='macro')
f1 = metrics.f1_score(y_test, y_pred,average='macro')
print(f'Accuracy: {accuracy}\nPrecision: {precision}\nRecall: {recall}\nF1: {f1}\n')

Accuracy: 0.9333333333333333
Precision: 0.9393939393939394
Recall: 0.9393939393939394
F1: 0.9393939393939394

