In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error, r2_score, confusion_matrix, accuracy_score, roc_auc_score, roc_curve, classification_report
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [2]:
from warnings import filterwarnings
filterwarnings('ignore')

In [3]:
df = pd.read_csv(".\diabetes.csv")
y = df["Outcome"]
X = df.drop(["Outcome"], axis = 1)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

In [50]:
scaler = StandardScaler()

In [51]:
scaler.fit(X_train)
X_train = scaler.transform(X_train)

In [52]:
scaler.fit(X_test)
X_test = scaler.transform(X_test)

In [58]:
mlpc_model = MLPClassifier(activation = "logistic", solver = "lbfgs").fit(X_train, y_train)

In [59]:
mlpc_params = {"alpha": [1, 2, 3, 0.1, 0.01, 0.5, 0.001, 0.005], "hidden_layer_sizes": [(10,10), (100,100,100), (100,100), (3,5)] }

In [60]:
mlpc_cv_model = GridSearchCV(mlpc_model, mlpc_params, cv = 10, n_jobs = -1, verbose = 2).fit(X_train, y_train)

Fitting 10 folds for each of 32 candidates, totalling 320 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    1.4s
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:   24.3s
[Parallel(n_jobs=-1)]: Done 320 out of 320 | elapsed:  1.3min finished


In [61]:
mlpc_cv_model.best_params_

{'alpha': 1, 'hidden_layer_sizes': (3, 5)}

In [62]:
mlpc_cv_model.get_params()

{'cv': 10,
 'error_score': nan,
 'estimator__activation': 'logistic',
 'estimator__alpha': 0.0001,
 'estimator__batch_size': 'auto',
 'estimator__beta_1': 0.9,
 'estimator__beta_2': 0.999,
 'estimator__early_stopping': False,
 'estimator__epsilon': 1e-08,
 'estimator__hidden_layer_sizes': (100,),
 'estimator__learning_rate': 'constant',
 'estimator__learning_rate_init': 0.001,
 'estimator__max_fun': 15000,
 'estimator__max_iter': 200,
 'estimator__momentum': 0.9,
 'estimator__n_iter_no_change': 10,
 'estimator__nesterovs_momentum': True,
 'estimator__power_t': 0.5,
 'estimator__random_state': None,
 'estimator__shuffle': True,
 'estimator__solver': 'lbfgs',
 'estimator__tol': 0.0001,
 'estimator__validation_fraction': 0.1,
 'estimator__verbose': False,
 'estimator__warm_start': False,
 'estimator': MLPClassifier(activation='logistic', solver='lbfgs'),
 'iid': 'deprecated',
 'n_jobs': -1,
 'param_grid': {'alpha': [1, 2, 3, 0.1, 0.01, 0.5, 0.001, 0.005],
  'hidden_layer_sizes': [(10, 10)

In [66]:
mlpc_tuned = MLPClassifier(activation = "logistic", solver = "lbfgs", alpha = 1, hidden_layer_sizes = (3,5)).fit(X_train, y_train)

In [67]:
y_pred = mlpc_tuned.predict(X_test)

In [68]:
accuracy_score(y_test, y_pred)

0.7575757575757576