In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt
from sklearn.utils.validation import column_or_1d
from  warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

In [7]:
datasets = ["a_affirmative", "a_conditional", "a_doubt_question", "a_emphasis", "a_negative", "a_relative", "a_topics", "a_wh_question", "a_yn_question",
         "b_affirmative", "b_conditional", "b_doubt_question", "b_emphasis", "b_negative", "b_relative", "b_topics", "b_wh_question", "b_yn_question"]

params = {"a_affirmative"  : {'alpha': 0.01, 'learning_rate_init': 0.001, 'RocAucScore': 0.6707128099173553}
"a_conditional"  : {'alpha': 0.0001, 'learning_rate_init': 0.001, 'RocAucScore': 0.9008955309287199}
"a_doubt_question"  : {'alpha': 0.01, 'learning_rate_init': 0.001, 'RocAucScore': 0.8716886548152294}
"a_emphasis"  : {'alpha': 0.003, 'learning_rate_init': 0.003, 'RocAucScore': 0.6775686154474032}
"a_negative"  : {'alpha': 0.001, 'learning_rate_init': 0.001, 'RocAucScore': 0.6369907830462201}
"a_relative"  : {'alpha': 0.001, 'learning_rate_init': 0.003, 'RocAucScore': 0.8694264172552014}
"a_topics"  : {'alpha': 0.01, 'learning_rate_init': 0.01, 'RocAucScore': 0.7813671076538724}
"a_wh_question"  : {'alpha': 0.01, 'learning_rate_init': 0.001, 'RocAucScore': 0.7247282608695651}
"a_yn_question"  : {'alpha': 0.001, 'learning_rate_init': 0.003, 'RocAucScore': 0.8674487082066871}
"b_affirmative"  : {'alpha': 0.001, 'learning_rate_init': 0.003, 'RocAucScore': 0.6021795665634676}
"b_conditional"  : {'alpha': 0.0001, 'learning_rate_init': 0.001, 'RocAucScore': 0.6692768397578848}
"b_doubt_question"  : {'alpha': 0.01, 'learning_rate_init': 0.003, 'RocAucScore': 0.7455791363242859}
"b_emphasis"  : {'alpha': 0.003, 'learning_rate_init': 0.01, 'RocAucScore': 0.768807541356417}
"b_negative"  : {'alpha': 0.003, 'learning_rate_init': 0.01, 'RocAucScore': 0.7529255906381099}
"b_relative"  : {'alpha': 0.001, 'learning_rate_init': 0.01, 'RocAucScore': 0.6786704877173586}
"b_topics"  : {'alpha': 0.003, 'learning_rate_init': 0.01, 'RocAucScore': 0.8481867131205953}
"b_wh_question"  : {'alpha': 0.0001, 'learning_rate_init': 0.01, 'RocAucScore': 0.872823148661088}
"b_yn_question"  : {'alpha': 0.003, 'learning_rate_init': 0.003, 'RocAucScore': 0.719996476650469}}

n_estimators = [10, 25, 50, 100]

In [8]:
bestsParams = {}


for data in datasets:
    X_train = pd.read_csv("./SplitData/" + data + "_X_train.csv")
    X_test = pd.read_csv("./SplitData/" + data + "_X_test.csv")
    y_train = pd.read_csv("./SplitData/" + data + "_y_train.csv")
    y_train = column_or_1d(y_train, warn=True)
    y_test = pd.read_csv("./SplitData/" + data + "_y_test.csv")
    y_test = column_or_1d(y_test, warn=True)
    bestRocAucScore = 0
    for estimators in n_estimators:
        mlp = MLPClassifier(random_state=42, hidden_layer_sizes=(30, 30, 30), max_iter=50, activation="relu", solver="adam", alpha=params[data]['alpha'], learning_rate_init=params[data]['learning_rate_init'])
        bag = BaggingClassifier(mlp, n_estimators=estimators)
        y_train_proba = cross_val_predict(bag, X_train, y_train, cv=10, method="predict_proba")
        y_scores = y_train_proba[:, 1]
        score = roc_auc_score(y_train, y_scores)
        if(score > bestRocAucScore):
            bestsParams[data] = {
                "n_estimators": estimators,
                "RocAucScore": score
            }
            bestRocAucScore = score

  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.path while we load stuff.
  
  # Remove the CWD from sys.

In [9]:
for i in bestsParams:
    print(i, " :", bestsParams[i])

a_affirmative  : {'n_estimators': 50, 'RocAucScore': 0.7386098749735113}
a_conditional  : {'n_estimators': 100, 'RocAucScore': 0.9189205943502394}
a_doubt_question  : {'n_estimators': 25, 'RocAucScore': 0.909683825587906}
a_emphasis  : {'n_estimators': 100, 'RocAucScore': 0.7705387205387205}
a_negative  : {'n_estimators': 100, 'RocAucScore': 0.7380501122505386}
a_relative  : {'n_estimators': 50, 'RocAucScore': 0.8933368327288923}
a_topics  : {'n_estimators': 100, 'RocAucScore': 0.8192390131728366}
a_wh_question  : {'n_estimators': 100, 'RocAucScore': 0.740139751552795}
a_yn_question  : {'n_estimators': 100, 'RocAucScore': 0.8903123416919961}
b_affirmative  : {'n_estimators': 100, 'RocAucScore': 0.6790712074303406}
b_conditional  : {'n_estimators': 100, 'RocAucScore': 0.7896272698311565}
b_doubt_question  : {'n_estimators': 100, 'RocAucScore': 0.8417337169628762}
b_emphasis  : {'n_estimators': 100, 'RocAucScore': 0.8361434096679667}
b_negative  : {'n_estimators': 100, 'RocAucScore': 0.8