In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt

In [2]:
datasets = ["a_affirmative", "a_conditional", "a_doubt_question", "a_emphasis", "a_negative", "a_relative", "a_topics", "a_wh_question", "a_yn_question",
         "b_affirmative", "b_conditional", "b_doubt_question", "b_emphasis", "b_negative", "b_relative", "b_topics", "b_wh_question", "b_yn_question"]

alphas = {"a_affirmative": 0.0001, "a_conditional": 0.0001, "a_doubt_question": 0.0001, "a_emphasis": 0.0001, "a_negative": 0.0003, "a_relative": 0.0003, "a_topics": 0.0003, "a_wh_question": 0.0003, "a_yn_question": 0.0001,
         "b_affirmative": 0.01, "b_conditional": 0.01, "b_doubt_question": 0.001, "b_emphasis": 0.001, "b_negative": 0.003, "b_relative": 0.003, "b_topics": 0.01, "b_wh_question": 0.0001, "b_yn_question": 0.003}

lr = {"a_affirmative": 0.01, "a_conditional": 0.01, "a_doubt_question": 0.01, "a_emphasis": 0.01, "a_negative": 0.01, "a_relative": 0.01, "a_topics": 0.01, "a_wh_question": 0.01, "a_yn_question": 0.003,
         "b_affirmative": 0.01, "b_conditional": 0.01, "b_doubt_question": 0.01, "b_emphasis": 0.01, "b_negative": 0.01, "b_relative": 0.01, "b_topics": 0.01, "b_wh_question": 0.01, "b_yn_question": 0.01}

param_dist = {
    "warm_start": [True, False],
    "oob_score": [True, False],
    "n_estimators": [10, 25, 50, 100],
    "bootstrap": [True, False]
}

warm_start = [True, False]
oob_score = [True, False]
n_estimators = [10, 25, 50, 100]
boostrap = [True, False]

In [9]:
bestsParams = {}


for data in datasets:
    df = pd.read_csv("./PreprocessedDataset/" + data + ".csv", sep=" ")
    bestRocAucScore = 0
    X = df.iloc[:, :-1]
    y = df.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=42, test_size=0.3)
    # for warm in warm_start:
    # for oob in oob_score:
    for estimators in n_estimators:
        # for boot in boostrap:
        mlp = MLPClassifier(random_state=42, hidden_layer_sizes=(30, 30), max_iter=50, activation="relu", solver="adam", alpha=alphas[data], learning_rate_init=lr[data])
        bag = BaggingClassifier(mlp, n_estimators=estimators)
        y_train_proba = cross_val_predict(bag, X_train, y_train, cv=10, method="predict_proba")
        y_scores = y_train_proba[:, 1]
        score = roc_auc_score(y_train, y_scores)
        if(score > bestRocAucScore):
            bestsParams[data] = {
                # "warm_start":warm,
                # "oob_score": oob,
                "n_estimators": estimators,
                # "bootstrap": boot,
                "RocAucScore": score
            }
            bestRocAucScore = score

ValueError: Out of bag estimation only available if bootstrap=True

In [6]:
print(bestsParams)

{'a_affirmative': {'warm_start': True, 'oob_score': True, 'n_estimators': 10, 'bootstrap': True, 'RocAucScore': 0.7098041325871717}, 'a_conditional': {'warm_start': True, 'oob_score': True, 'n_estimators': 10, 'bootstrap': True, 'RocAucScore': 0.7602000544365813}, 'a_doubt_question': {'warm_start': True, 'oob_score': True, 'n_estimators': 10, 'bootstrap': True, 'RocAucScore': 0.746338041565606}, 'a_emphasis': {'warm_start': True, 'oob_score': True, 'n_estimators': 10, 'bootstrap': True, 'RocAucScore': 0.6677010057554793}, 'a_negative': {'warm_start': True, 'oob_score': True, 'n_estimators': 10, 'bootstrap': True, 'RocAucScore': 0.7080861162908968}, 'a_relative': {'warm_start': True, 'oob_score': True, 'n_estimators': 10, 'bootstrap': True, 'RocAucScore': 0.8113244777907263}, 'a_topics': {'warm_start': True, 'oob_score': True, 'n_estimators': 10, 'bootstrap': True, 'RocAucScore': 0.7605155528873213}, 'a_wh_question': {'warm_start': True, 'oob_score': True, 'n_estimators': 10, 'bootstrap