In [None]:
import pandas as pd
import numpy as np

from sklearn.datasets import load_iris
iris = load_iris()

iris.data

In [14]:
from sklearn.preprocessing import MinMaxScaler
trans = MinMaxScaler()
data = trans.fit_transform(iris.data)
# Посмотрим на данные
from pandas import DataFrame
dataset = DataFrame(data)
print(dataset.describe())

                0           1           2           3
count  150.000000  150.000000  150.000000  150.000000
mean     0.428704    0.440556    0.467458    0.458056
std      0.230018    0.181611    0.299203    0.317599
min      0.000000    0.000000    0.000000    0.000000
25%      0.222222    0.333333    0.101695    0.083333
50%      0.416667    0.416667    0.567797    0.500000
75%      0.583333    0.541667    0.694915    0.708333
max      1.000000    1.000000    1.000000    1.000000


In [25]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split( 
    iris.data, iris.target, random_state=0) 
 
mlp = MLPClassifier() 
mlp.fit(X_train, y_train) 
 
print("Правильность на обучающем наборе: {:.2f}".format(mlp.score(X_train, y_train))) 
print("Правильность на тестовом наборе: {:.2f}".format(mlp.score(X_test, y_test)))

Правильность на обучающем наборе: 0.98
Правильность на тестовом наборе: 0.95




In [35]:
from sklearn import linear_model
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold


X = iris.data[:150]
y = iris.target[:150]

skf = StratifiedKFold(n_splits=5)
results = cross_val_score(mlp, X, y, cv=5)



In [37]:
print(np.mean(results))

0.9733333333333334


In [None]:
activations = ['identity', 'logistic', 'tanh', 'relu']
best_score = 0

for layers in range(1, 200):
  for activation in activations:
    mlp = MLPClassifier(hidden_layer_sizes=(layers), activation=activation)
    mlp.fit(X_train, y_train)
    results = cross_val_score(mlp, X, y, cv=5)
    if np.mean(results) > best_score:
      best_score = np.mean(results)
      best_parametrs = {'layers': layers, 'activation': activation}

print("Наилучшее значение правильности: {:.2f}".format(best_score))
print("Наилучшее значение параметров: {:.2f}".format(best_parametrs))

In [41]:
print("Наилучшее значение правильности: {:.2f}".format(best_score))
print("Наилучшее значение параметров: {}".format(best_parametrs))

Наилучшее значение правильности: 0.99
Наилучшее значение параметров: {'layers': 27, 'activation': 'tanh'}


In [39]:
param_grid = {'layers': [i for i in range(100)], 
'activations': ['identity', 'logistic', 'tanh', 'relu']}

print('Сетка параметров: \n{}'.format(param_grid))

Сетка параметров: 
{'layers': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], 'activations': ['identity', 'logistic', 'tanh', 'relu']}


In [42]:
from sklearn.model_selection import GridSearchCV

params = {"alpha": [0.1, 0.01, 0.0001],
          "hidden_layer_sizes": [(10,10,10),
                                 (100,100,100),
                                 (100,100)],
          "solver" : ["lbfgs","adam","sgd"],
          "activation": ["relu","logistic"]}

mlpc_cv_model = GridSearchCV(mlp, params, 
                         cv = 5, # To make a 5-fold CV
                         n_jobs = -1, # Number of jobs to be run in parallel (-1: means to use all processors)
                         verbose = 2)

In [43]:
X_train, X_test, y_train, y_test = train_test_split( 
    iris.data, iris.target, random_state=0) 
 
mlp = MLPClassifier() 

mlpc_cv_model.fit(X_train, y_train)

print("Правильность на тестовом наборе: {}".format(mlpc_cv_model.score(X_test, y_test)))

Fitting 5 folds for each of 54 candidates, totalling 270 fits
Правильность на тестовом наборе: 0.9736842105263158




In [44]:
print("Наилучшее значение параметров: {}".format(mlpc_cv_model.best_params_))
print("Наилучшее значение кросс-валидачии правильности: {:.2f}".format(mlpc_cv_model.best_score_))

Наилучшее значение параметров: {'activation': 'relu', 'alpha': 0.1, 'hidden_layer_sizes': (100, 100), 'solver': 'adam'}
Наилучшее значение кросс-валидачии правильности: 0.98


In [45]:
print("Наилучшая модель {}".format(mlpc_cv_model.best_estimator_))

Наилучшая модель MLPClassifier(alpha=0.1, hidden_layer_sizes=(100, 100))


In [47]:
results2 = pd.DataFrame(mlpc_cv_model.cv_results_)
display(results2.head())

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_activation,param_alpha,param_hidden_layer_sizes,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.261291,0.076053,0.002068,0.001164,relu,0.1,"(10, 10, 10)",lbfgs,"{'activation': 'relu', 'alpha': 0.1, 'hidden_l...",1.0,0.956522,1.0,0.909091,0.954545,0.964032,0.033918,17
1,0.250102,0.010302,0.001833,0.000448,relu,0.1,"(10, 10, 10)",adam,"{'activation': 'relu', 'alpha': 0.1, 'hidden_l...",1.0,0.782609,0.954545,0.909091,0.772727,0.883794,0.09135,35
2,0.217943,0.008028,0.001783,0.000997,relu,0.1,"(10, 10, 10)",sgd,"{'activation': 'relu', 'alpha': 0.1, 'hidden_l...",0.130435,0.73913,0.909091,0.772727,0.909091,0.692095,0.289258,41
3,1.983681,0.36503,0.003363,0.003087,relu,0.1,"(100, 100, 100)",lbfgs,"{'activation': 'relu', 'alpha': 0.1, 'hidden_l...",1.0,0.956522,0.954545,0.909091,0.954545,0.954941,0.028759,28
4,1.066662,0.157669,0.005865,0.002802,relu,0.1,"(100, 100, 100)",adam,"{'activation': 'relu', 'alpha': 0.1, 'hidden_l...",1.0,0.956522,1.0,0.954545,0.954545,0.973123,0.021957,5


In [None]:
results2.mean_test_score

In [None]:
!pip install scikit-learn==1.1.1 -i https://pypi.tuna.tsinghua.edu.cn/simple

In [None]:
!pip install mglearn
!pip install --upgrade joblib==1.1.0

In [59]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import pandas as pd
import mglearn

AttributeError: ignored

In [55]:
scores = np.array(results2.mean_test_score).reshape(6, 9)

mglearn.tools.heatmap(scores, xlabel='layers', xtticklabels=param_grid['layers'], 
                      ylabes='activations', ytticklabels=param_grid['activations'], cmap="viridis")

NameError: ignored