# Basic models trained using features from protein and AA descriptors

In [27]:
import pandas as pd
import numpy as np

In [None]:
X_train_data = pd.read_feather("data/X_train_data.ftr").drop("Ab_ID", axis=1)
X_test_data = pd.read_feather("data/X_test_data.ftr").drop("Ab_ID", axis=1)
X_valid_data = pd.read_feather("data/X_valid_data.ftr").drop("Ab_ID", axis=1)

In [3]:
data_train = pd.read_csv("data/chen_train_data.csv", sep=";")
data_valid = pd.read_csv("data/chen_valid_data.csv", sep=";")
data_test = pd.read_csv("data/chen_test_data.csv", sep=";")

In [4]:
y_train_data = data_train["Y"]
y_valid_data = data_valid["Y"]
y_test_data = data_test["Y"]

### Used models and hyperparameters:
#### Logistic regression
- regularization: {L2}
- C: loguniform(0.001, 1000)

#### Support Vector Machine
- C: loguniform(0.001, 100)
- kernel: {linear, rbf}
- &gamma: loguniform(0.001, 1)

#### Random Forest
- number of estimators: {1, 10, ..., 200}
- max depth: {0, 2, ..., 50}
- max fraction features ∈ {0.1, 0.15, ..., 0.75}

#### Gradient Boosting
- learning rate: loguniform(0.01, 0.5)
- number of estimators: {1, 10, ..., 200}
- max depth: {0, 2, ..., 20}
- max fraction features ∈ {0.1, 0.2, ..., 0.6}

#### Multilayer Perceptron
- hidden layer sizes: {(100,), (50,), (100, 100)}
- activation: {"relu", "logistic"} (not in paper)

## Logistic Regression

In [5]:
from sklearn.linear_model import LogisticRegression
#from scipy.stats import loguniform
from sklearn.utils.fixes import loguniform    
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [6]:
from numpy.ma import MaskedArray
import sklearn.utils.fixes

sklearn.utils.fixes.MaskedArray = MaskedArray
from skopt import BayesSearchCV

In [11]:
def logistic_regression_test(X_train, y_train):
    lr = LogisticRegression(class_weight='balanced', max_iter=1000, random_state=42)
    parameters = {'C':loguniform(0.001, 1000), 'penalty': ["l2"], "solver": ["lbfgs", "sag"]}
    # parameters = {'C': [0.5, 1], 'penalty': ["l2"]}
    # "solver": ["lbfgs", "sag"]
    grid = RandomizedSearchCV(lr, parameters, verbose=2)
    grid.fit(X_train, y_train)
    print(grid.best_params_)
    return grid

In [12]:
grid_lr = logistic_regression_test(X_train_data, y_train_data)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

{'C': 1.6141735908406478, 'penalty': 'l2', 'solver': 'sag'}




In [19]:
grid_lr.best_score_

0.792437624005759

In [7]:
def logistic_regression_bayes(X_train, y_train):
    lr = LogisticRegression(class_weight='balanced', max_iter=1000, random_state=42)
    opt = BayesSearchCV(
        lr,
        { 
            'penalty': ["l2"],
            'C': (1e-3, 1e+3, 'log-uniform'),
        },
        n_iter=32,
        cv=3,
        scoring='f1'
    )

    opt.fit(X_train, y_train)

In [8]:
logistic_regression_bayes(X_train_data, y_train_data)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

ValueError: too many values to unpack (expected 5)

## SVM

In [14]:
from sklearn.svm import LinearSVC, SVC

In [15]:
def svm_test(X_train, y_train):
    svc = SVC(max_iter=8000, probability=True, class_weight='balanced')
    parameters = {'C': loguniform(0.001, 100), 'kernel':["linear", "rbf"], 'gamma': loguniform(1e-3, 1e0)}
    grid = RandomizedSearchCV(svc, parameters, scoring="f1", verbose=2)
    grid.fit(X_train, y_train)
    print(grid.best_params_)
    return grid

In [17]:
grid = svm_test(X_train_data, y_train_data)

Fitting 5 folds for each of 10 candidates, totalling 50 fits




{'C': 0.00764503251236516, 'gamma': 0.007377326944015009, 'kernel': 'linear'}




In [28]:
grid.best_score_

0.4850692690211048

## Random Forest

In [20]:
from sklearn.ensemble import RandomForestClassifier

In [37]:
def rf_test(X_train, y_train):
    rf = RandomForestClassifier(random_state=42, n_jobs=-1, class_weight='balanced')
    n = len(X_train)
    parameters = {'n_estimators': np.arange(1, 200, 10), 'max_depth': np.arange(1, min(50,n), 2), 
                  'max_features': np.arange(0.1, 0.75, 0.05)}
    grid = RandomizedSearchCV(rf, parameters, scoring="f1", verbose=2)
    grid.fit(X_train, y_train)
    print(grid.best_params_)
    return grid

In [30]:
grid_rf = rf_test(X_train_data, y_train_data)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END max_depth=28, max_features=0.6000000000000002, n_estimators=101; total time=  34.2s
[CV] END max_depth=28, max_features=0.6000000000000002, n_estimators=101; total time=  34.8s
[CV] END max_depth=28, max_features=0.6000000000000002, n_estimators=101; total time=  33.5s
[CV] END max_depth=28, max_features=0.6000000000000002, n_estimators=101; total time=  32.7s
[CV] END max_depth=28, max_features=0.6000000000000002, n_estimators=101; total time=  31.9s
[CV] END max_depth=6, max_features=0.5000000000000001, n_estimators=141; total time=  26.6s
[CV] END max_depth=6, max_features=0.5000000000000001, n_estimators=141; total time=  26.7s
[CV] END max_depth=6, max_features=0.5000000000000001, n_estimators=141; total time=  26.4s
[CV] END max_depth=6, max_features=0.5000000000000001, n_estimators=141; total time=  26.0s
[CV] END max_depth=6, max_features=0.5000000000000001, n_estimators=141; total time=  25.9s
[CV] END max_d

5 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
joblib.externals.loky.process_executor._RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/home/brazdilv/.conda/envs/ml/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py", line 431, in _process_worker
    r = call_item()
  File "/home/brazdilv/.conda/envs/ml/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py", line 285, in __call__
    return self.fn(*self.args, **self.kwargs)
  File "/home/brazdilv/.conda/envs/ml/lib/python3.7/site-packages/joblib/_parallel_backends.py", line 595, in __call__
    return self.func(*args, **kwargs)
  File "/home/brazdilv/.conda/en

{'n_estimators': 141, 'max_features': 0.5000000000000001, 'max_depth': 6}


In [31]:
grid_rf.best_score_

0.49924565617229266

## Gradient Boosting

In [32]:
from sklearn.ensemble import GradientBoostingClassifier

In [38]:
def boost_test(X_train, y_train):
    gb = GradientBoostingClassifier(random_state=42, n_iter_no_change=70)
    n = len(X_train)
    parameters = {'learning_rate': loguniform(0.01, 0.5), 
                  'n_estimators': np.arange(1, 200, 10), 
                  'max_depth': np.arange(1, min(20,n), 2), 
                  'max_features': np.arange(0.1, 0.6, 0.1)}
    grid = RandomizedSearchCV(gb, parameters, scoring="f1", verbose=2)
    grid.fit(X_train, y_train)
    print(grid.best_params_)
    return grid

In [39]:
grid_gb = boost_test(X_train_data, y_train_data)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


  "X does not have valid feature names, but"


[CV] END learning_rate=0.13705207137212586, max_depth=11, max_features=0.2, n_estimators=141; total time=  49.2s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.13705207137212586, max_depth=11, max_features=0.2, n_estimators=141; total time=  48.9s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.13705207137212586, max_depth=11, max_features=0.2, n_estimators=141; total time=  49.3s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.13705207137212586, max_depth=11, max_features=0.2, n_estimators=141; total time=  48.4s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.13705207137212586, max_depth=11, max_features=0.2, n_estimators=141; total time=  47.6s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.02897370312138987, max_depth=7, max_features=0.2, n_estimators=171; total time= 1.1min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.02897370312138987, max_depth=7, max_features=0.2, n_estimators=171; total time= 1.2min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.02897370312138987, max_depth=7, max_features=0.2, n_estimators=171; total time= 1.2min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.02897370312138987, max_depth=7, max_features=0.2, n_estimators=171; total time=  54.6s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.02897370312138987, max_depth=7, max_features=0.2, n_estimators=171; total time=  50.1s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2601101447797948, max_depth=5, max_features=0.30000000000000004, n_estimators=141; total time=  37.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2601101447797948, max_depth=5, max_features=0.30000000000000004, n_estimators=141; total time=  40.6s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2601101447797948, max_depth=5, max_features=0.30000000000000004, n_estimators=141; total time=  50.7s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2601101447797948, max_depth=5, max_features=0.30000000000000004, n_estimators=141; total time=  36.0s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2601101447797948, max_depth=5, max_features=0.30000000000000004, n_estimators=141; total time=  36.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.1127960800202123, max_depth=3, max_features=0.1, n_estimators=191; total time=  18.2s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.1127960800202123, max_depth=3, max_features=0.1, n_estimators=191; total time=  21.3s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.1127960800202123, max_depth=3, max_features=0.1, n_estimators=191; total time=  21.2s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.1127960800202123, max_depth=3, max_features=0.1, n_estimators=191; total time=  20.9s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.1127960800202123, max_depth=3, max_features=0.1, n_estimators=191; total time=  16.1s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2029808739428929, max_depth=3, max_features=0.5, n_estimators=11; total time=   6.2s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2029808739428929, max_depth=3, max_features=0.5, n_estimators=11; total time=   6.2s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2029808739428929, max_depth=3, max_features=0.5, n_estimators=11; total time=   6.1s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2029808739428929, max_depth=3, max_features=0.5, n_estimators=11; total time=   6.2s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2029808739428929, max_depth=3, max_features=0.5, n_estimators=11; total time=   6.1s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.25079757619409754, max_depth=7, max_features=0.5, n_estimators=171; total time= 1.3min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.25079757619409754, max_depth=7, max_features=0.5, n_estimators=171; total time= 1.3min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.25079757619409754, max_depth=7, max_features=0.5, n_estimators=171; total time= 1.4min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.25079757619409754, max_depth=7, max_features=0.5, n_estimators=171; total time= 1.3min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.25079757619409754, max_depth=7, max_features=0.5, n_estimators=171; total time= 1.3min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.21955845222245407, max_depth=7, max_features=0.1, n_estimators=51; total time=  12.9s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.21955845222245407, max_depth=7, max_features=0.1, n_estimators=51; total time=  13.1s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.21955845222245407, max_depth=7, max_features=0.1, n_estimators=51; total time=  12.8s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.21955845222245407, max_depth=7, max_features=0.1, n_estimators=51; total time=  12.9s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.21955845222245407, max_depth=7, max_features=0.1, n_estimators=51; total time=  12.8s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.16133897997574362, max_depth=19, max_features=0.5, n_estimators=131; total time= 2.7min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.16133897997574362, max_depth=19, max_features=0.5, n_estimators=131; total time= 2.7min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.16133897997574362, max_depth=19, max_features=0.5, n_estimators=131; total time= 2.8min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.16133897997574362, max_depth=19, max_features=0.5, n_estimators=131; total time= 2.8min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.16133897997574362, max_depth=19, max_features=0.5, n_estimators=131; total time= 2.7min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.25316353139131303, max_depth=11, max_features=0.30000000000000004, n_estimators=41; total time=  41.7s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.25316353139131303, max_depth=11, max_features=0.30000000000000004, n_estimators=41; total time=  41.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.25316353139131303, max_depth=11, max_features=0.30000000000000004, n_estimators=41; total time=  41.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.25316353139131303, max_depth=11, max_features=0.30000000000000004, n_estimators=41; total time=  41.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.25316353139131303, max_depth=11, max_features=0.30000000000000004, n_estimators=41; total time=  40.8s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.01600317304315852, max_depth=11, max_features=0.4, n_estimators=171; total time= 3.4min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.01600317304315852, max_depth=11, max_features=0.4, n_estimators=171; total time= 3.4min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.01600317304315852, max_depth=11, max_features=0.4, n_estimators=171; total time= 3.5min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.01600317304315852, max_depth=11, max_features=0.4, n_estimators=171; total time= 3.0min


  "X does not have valid feature names, but"


[CV] END learning_rate=0.01600317304315852, max_depth=11, max_features=0.4, n_estimators=171; total time= 2.6min


  "X does not have valid feature names, but"


{'learning_rate': 0.1127960800202123, 'max_depth': 3, 'max_features': 0.1, 'n_estimators': 191}


In [40]:
grid_gb.best_score_

0.43644058155254173

# Multilayer Perceptron

In [41]:
from sklearn.neural_network import MLPClassifier

In [44]:
def mlp_test(X_train, y_train):
    mlp = MLPClassifier(random_state=42, max_iter=int(1000))
    parameters = {'hidden_layer_sizes': [(100,), (50,), (100, 100)], "activation": ["relu", "logistic"]}
    grid = RandomizedSearchCV(mlp, parameters, scoring="f1", verbose=2)
    grid.fit(X_train, y_train)
    print(grid.best_params_)
    return grid

In [45]:
grid_mlp = mlp_test(X_train_data, y_train_data)



Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] END .........activation=relu, hidden_layer_sizes=(100,); total time= 1.9min
[CV] END .........activation=relu, hidden_layer_sizes=(100,); total time= 1.3min
[CV] END .........activation=relu, hidden_layer_sizes=(100,); total time= 1.2min
[CV] END .........activation=relu, hidden_layer_sizes=(100,); total time=  59.6s
[CV] END .........activation=relu, hidden_layer_sizes=(100,); total time=  35.1s
[CV] END ..........activation=relu, hidden_layer_sizes=(50,); total time= 2.2min
[CV] END ..........activation=relu, hidden_layer_sizes=(50,); total time= 1.5min
[CV] END ..........activation=relu, hidden_layer_sizes=(50,); total time= 1.6min
[CV] END ..........activation=relu, hidden_layer_sizes=(50,); total time= 2.8min
[CV] END ..........activation=relu, hidden_layer_sizes=(50,); total time= 1.2min
[CV] END .....activation=relu, hidden_layer_sizes=(100, 100); total time=  51.7s
[CV] END .....activation=relu, hidden_layer_sizes

In [46]:
grid_mlp.best_score_

0.47018467335418074

# Same with Embeddings

In [47]:
model = "original_3_1"

In [49]:
X_train_data = pd.read_feather(f"data/embeddings/embeddings_train_{model}.ftr").drop("Ab_ID", axis=1)
X_test_data = pd.read_feather(f"data/embeddings/embeddings_test_{model}.ftr").drop("Ab_ID", axis=1)
X_valid_data = pd.read_feather(f"data/embeddings/embeddings_valid_{model}.ftr").drop("Ab_ID", axis=1)

In [50]:
grid_lr = logistic_regression_test(X_train_data, y_train_data)

Fitting 5 folds for each of 10 candidates, totalling 50 fits




{'C': 102.39817251044737, 'penalty': 'l2', 'solver': 'sag'}




In [51]:
grid = svm_test(X_train_data, y_train_data)

Fitting 5 folds for each of 10 candidates, totalling 50 fits




{'C': 1.7203971312116122, 'gamma': 0.002806246987347818, 'kernel': 'linear'}


In [52]:
grid_rf = rf_test(X_train_data, y_train_data)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END max_depth=29, max_features=0.6500000000000001, n_estimators=171; total time=   4.4s
[CV] END max_depth=29, max_features=0.6500000000000001, n_estimators=171; total time=   3.4s
[CV] END max_depth=29, max_features=0.6500000000000001, n_estimators=171; total time=   3.5s
[CV] END max_depth=29, max_features=0.6500000000000001, n_estimators=171; total time=   3.2s
[CV] END max_depth=29, max_features=0.6500000000000001, n_estimators=171; total time=   3.3s
[CV] END max_depth=3, max_features=0.7000000000000002, n_estimators=111; total time=   1.0s
[CV] END max_depth=3, max_features=0.7000000000000002, n_estimators=111; total time=   1.0s
[CV] END max_depth=3, max_features=0.7000000000000002, n_estimators=111; total time=   1.0s
[CV] END max_depth=3, max_features=0.7000000000000002, n_estimators=111; total time=   1.0s
[CV] END max_depth=3, max_features=0.7000000000000002, n_estimators=111; total time=   1.0s
[CV] END max_d

In [53]:
grid_gb = boost_test(X_train_data, y_train_data)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


  "X does not have valid feature names, but"


[CV] END learning_rate=0.051661368758675746, max_depth=7, max_features=0.2, n_estimators=81; total time=   1.7s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.051661368758675746, max_depth=7, max_features=0.2, n_estimators=81; total time=   1.7s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.051661368758675746, max_depth=7, max_features=0.2, n_estimators=81; total time=   1.7s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.051661368758675746, max_depth=7, max_features=0.2, n_estimators=81; total time=   1.7s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.051661368758675746, max_depth=7, max_features=0.2, n_estimators=81; total time=   1.7s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.06340578584829724, max_depth=17, max_features=0.2, n_estimators=191; total time=   3.7s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.06340578584829724, max_depth=17, max_features=0.2, n_estimators=191; total time=   3.9s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.06340578584829724, max_depth=17, max_features=0.2, n_estimators=191; total time=   3.8s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.06340578584829724, max_depth=17, max_features=0.2, n_estimators=191; total time=   3.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.06340578584829724, max_depth=17, max_features=0.2, n_estimators=191; total time=   3.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.1854256230380312, max_depth=15, max_features=0.5, n_estimators=61; total time=   5.9s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.1854256230380312, max_depth=15, max_features=0.5, n_estimators=61; total time=   5.8s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.1854256230380312, max_depth=15, max_features=0.5, n_estimators=61; total time=   5.9s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.1854256230380312, max_depth=15, max_features=0.5, n_estimators=61; total time=   5.7s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.1854256230380312, max_depth=15, max_features=0.5, n_estimators=61; total time=   5.9s
[CV] END learning_rate=0.020465361197334168, max_depth=1, max_features=0.5, n_estimators=11; total time=   0.2s


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


[CV] END learning_rate=0.020465361197334168, max_depth=1, max_features=0.5, n_estimators=11; total time=   0.1s
[CV] END learning_rate=0.020465361197334168, max_depth=1, max_features=0.5, n_estimators=11; total time=   0.1s
[CV] END learning_rate=0.020465361197334168, max_depth=1, max_features=0.5, n_estimators=11; total time=   0.1s


  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"
  "X does not have valid feature names, but"


[CV] END learning_rate=0.020465361197334168, max_depth=1, max_features=0.5, n_estimators=11; total time=   0.2s
[CV] END learning_rate=0.020961315656236316, max_depth=9, max_features=0.4, n_estimators=131; total time=   6.7s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.020961315656236316, max_depth=9, max_features=0.4, n_estimators=131; total time=   6.7s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.020961315656236316, max_depth=9, max_features=0.4, n_estimators=131; total time=   6.7s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.020961315656236316, max_depth=9, max_features=0.4, n_estimators=131; total time=   6.4s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.020961315656236316, max_depth=9, max_features=0.4, n_estimators=131; total time=   5.6s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.022727277954015104, max_depth=13, max_features=0.1, n_estimators=21; total time=   0.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.022727277954015104, max_depth=13, max_features=0.1, n_estimators=21; total time=   0.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.022727277954015104, max_depth=13, max_features=0.1, n_estimators=21; total time=   0.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.022727277954015104, max_depth=13, max_features=0.1, n_estimators=21; total time=   0.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.022727277954015104, max_depth=13, max_features=0.1, n_estimators=21; total time=   0.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.07780559321276094, max_depth=1, max_features=0.4, n_estimators=151; total time=   1.0s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.07780559321276094, max_depth=1, max_features=0.4, n_estimators=151; total time=   1.0s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.07780559321276094, max_depth=1, max_features=0.4, n_estimators=151; total time=   1.0s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.07780559321276094, max_depth=1, max_features=0.4, n_estimators=151; total time=   1.0s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.07780559321276094, max_depth=1, max_features=0.4, n_estimators=151; total time=   1.0s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.24645150094675355, max_depth=7, max_features=0.30000000000000004, n_estimators=11; total time=   0.3s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.24645150094675355, max_depth=7, max_features=0.30000000000000004, n_estimators=11; total time=   0.3s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.24645150094675355, max_depth=7, max_features=0.30000000000000004, n_estimators=11; total time=   0.3s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.24645150094675355, max_depth=7, max_features=0.30000000000000004, n_estimators=11; total time=   0.3s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.24645150094675355, max_depth=7, max_features=0.30000000000000004, n_estimators=11; total time=   0.3s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2703925406140571, max_depth=19, max_features=0.1, n_estimators=91; total time=   1.9s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2703925406140571, max_depth=19, max_features=0.1, n_estimators=91; total time=   1.8s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2703925406140571, max_depth=19, max_features=0.1, n_estimators=91; total time=   1.9s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2703925406140571, max_depth=19, max_features=0.1, n_estimators=91; total time=   1.8s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.2703925406140571, max_depth=19, max_features=0.1, n_estimators=91; total time=   1.8s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.012957637868390427, max_depth=9, max_features=0.5, n_estimators=151; total time=   9.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.012957637868390427, max_depth=9, max_features=0.5, n_estimators=151; total time=   9.4s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.012957637868390427, max_depth=9, max_features=0.5, n_estimators=151; total time=   9.5s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.012957637868390427, max_depth=9, max_features=0.5, n_estimators=151; total time=   9.4s


  "X does not have valid feature names, but"


[CV] END learning_rate=0.012957637868390427, max_depth=9, max_features=0.5, n_estimators=151; total time=   9.5s


  "X does not have valid feature names, but"


{'learning_rate': 0.1854256230380312, 'max_depth': 15, 'max_features': 0.5, 'n_estimators': 61}


In [54]:
grid_mlp = mlp_test(X_train_data, y_train_data)

Fitting 5 folds for each of 6 candidates, totalling 30 fits




[CV] END .........activation=relu, hidden_layer_sizes=(100,); total time=   6.0s
[CV] END .........activation=relu, hidden_layer_sizes=(100,); total time=   5.5s
[CV] END .........activation=relu, hidden_layer_sizes=(100,); total time=   5.0s
[CV] END .........activation=relu, hidden_layer_sizes=(100,); total time=   5.4s
[CV] END .........activation=relu, hidden_layer_sizes=(100,); total time=   5.6s
[CV] END ..........activation=relu, hidden_layer_sizes=(50,); total time=   5.7s
[CV] END ..........activation=relu, hidden_layer_sizes=(50,); total time=   5.6s
[CV] END ..........activation=relu, hidden_layer_sizes=(50,); total time=   5.0s
[CV] END ..........activation=relu, hidden_layer_sizes=(50,); total time=   4.8s
[CV] END ..........activation=relu, hidden_layer_sizes=(50,); total time=   5.0s
[CV] END .....activation=relu, hidden_layer_sizes=(100, 100); total time=   2.1s
[CV] END .....activation=relu, hidden_layer_sizes=(100, 100); total time=   2.0s
[CV] END .....activation=rel

In [55]:
grid_mlp.best_score_

0.46262692553094525

In [56]:
grid_lr.best_score_

0.7289905711727214

In [57]:
grid.best_score_

0.4777825549830821

In [58]:
grid_rf.best_score_

0.4163996136874255

In [59]:
grid_gb.best_score_

0.21846680139467728

In [60]:
import json

In [62]:
lr = LogisticRegression(class_weight='balanced', max_iter=1000, random_state=42)
str(type(lr))

"<class 'sklearn.linear_model._logistic.LogisticRegression'>"