In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
from sklearn.utils.validation import column_or_1d
from  warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

In [2]:
datasets = ["a_affirmative", "a_conditional", "a_doubt_question", "a_emphasis", "a_negative", "a_relative", "a_topics", "a_wh_question", "a_yn_question",
         "b_affirmative", "b_conditional", "b_doubt_question", "b_emphasis", "b_negative", "b_relative", "b_topics", "b_wh_question", "b_yn_question"]

alphas = {"a_affirmative": 0.0001, "a_conditional": 0.0001, "a_doubt_question": 0.0001, "a_emphasis": 0.0001, "a_negative": 0.0003, "a_relative": 0.0003, "a_topics": 0.0003, "a_wh_question": 0.0003, "a_yn_question": 0.0001,
         "b_affirmative": 0.01, "b_conditional": 0.01, "b_doubt_question": 0.001, "b_emphasis": 0.001, "b_negative": 0.003, "b_relative": 0.003, "b_topics": 0.01, "b_wh_question": 0.0001, "b_yn_question": 0.003}

lr = {"a_affirmative": 0.01, "a_conditional": 0.01, "a_doubt_question": 0.01, "a_emphasis": 0.01, "a_negative": 0.01, "a_relative": 0.01, "a_topics": 0.01, "a_wh_question": 0.01, "a_yn_question": 0.003,
         "b_affirmative": 0.01, "b_conditional": 0.01, "b_doubt_question": 0.01, "b_emphasis": 0.01, "b_negative": 0.01, "b_relative": 0.01, "b_topics": 0.01, "b_wh_question": 0.01, "b_yn_question": 0.01}

param_dist = {
    "warm_start": [True, False],
    "oob_score": [True, False],
    "n_estimators": [10, 25, 50, 100],
    "bootstrap": [True, False]
}

warm_start = [True, False]
oob_score = [True, False]
n_estimators = [10, 25, 50, 100]
boostrap = [True, False]

In [5]:
bestsParams = {}


for data in datasets:
    X_train = pd.read_csv("./SplitData/" + data + "_X_train.csv")
    X_test = pd.read_csv("./SplitData/" + data + "_X_test.csv")
    y_train = pd.read_csv("./SplitData/" + data + "_y_train.csv")
    y_train = column_or_1d(y_train, warn=True)
    y_test = pd.read_csv("./SplitData/" + data + "_y_test.csv")
    y_test = column_or_1d(y_test, warn=True)
    bestRocAucScore = 0
    # for warm in warm_start:
    # for oob in oob_score:
    for estimators in n_estimators:
        # for boot in boostrap:
        mlp = MLPClassifier(random_state=42, hidden_layer_sizes=(30, 30), max_iter=50, activation="relu", solver="adam", alpha=alphas[data], learning_rate_init=lr[data])
        bag = BaggingClassifier(mlp, n_estimators=estimators)
        y_train_proba = cross_val_predict(bag, X_train, y_train, cv=10, method="predict_proba")
        y_scores = y_train_proba[:, 1]
        score = roc_auc_score(y_train, y_scores)
        if(score > bestRocAucScore):
            bestsParams[data] = {
                # "warm_start":warm,
                # "oob_score": oob,
                "n_estimators": estimators,
                # "bootstrap": boot,
                "RocAucScore": score
            }
            bestRocAucScore = score

NameError: name 'column_or_1d' is not defined

In [4]:
for i in bestsParams:
    print(i, " :", bestsParams[i])

{'a_affirmative': {'n_estimators': 100, 'RocAucScore': 0.6968628928110203}, 'a_conditional': {'n_estimators': 25, 'RocAucScore': 0.7039636635819272}, 'a_doubt_question': {'n_estimators': 100, 'RocAucScore': 0.6911292623769439}, 'a_emphasis': {'n_estimators': 100, 'RocAucScore': 0.616163304459043}, 'a_negative': {'n_estimators': 100, 'RocAucScore': 0.6650587927075466}, 'a_relative': {'n_estimators': 50, 'RocAucScore': 0.7358844477047863}, 'a_topics': {'n_estimators': 50, 'RocAucScore': 0.720038647705527}, 'a_wh_question': {'n_estimators': 10, 'RocAucScore': 0.6763131026659104}, 'a_yn_question': {'n_estimators': 25, 'RocAucScore': 0.6448552587923027}, 'b_affirmative': {'n_estimators': 25, 'RocAucScore': 0.6961860670194004}, 'b_conditional': {'n_estimators': 50, 'RocAucScore': 0.7658500530651438}, 'b_doubt_question': {'n_estimators': 25, 'RocAucScore': 0.796199345582683}, 'b_emphasis': {'n_estimators': 100, 'RocAucScore': 0.7774051622427451}, 'b_negative': {'n_estimators': 100, 'RocAucSco