In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import roc_auc_score
from sklearn.utils.validation import column_or_1d
import matplotlib.pyplot as plt

In [2]:
datasets = ["a_affirmative", "a_conditional", "a_doubt_question", "a_emphasis", "a_negative", "a_relative", "a_topics", "a_wh_question", "a_yn_question",
         "b_affirmative", "b_conditional", "b_doubt_question", "b_emphasis", "b_negative", "b_relative", "b_topics", "b_wh_question", "b_yn_question"]

alphas = {"a_affirmative": 0.0001, "a_conditional": 0.0001, "a_doubt_question": 0.0001, "a_emphasis": 0.0001, "a_negative": 0.0003, "a_relative": 0.001, "a_topics": 0.0001, "a_wh_question": 0.0003, "a_yn_question": 0.0001,
         "b_affirmative": 0.0001, "b_conditional": 0.0003, "b_doubt_question": 0.001, "b_emphasis": 0.01, "b_negative": 0.0003, "b_relative": 0.0001, "b_topics": 0.0001, "b_wh_question": 0.003, "b_yn_question": 0.0001}

lr = {"a_affirmative": 0.001, "a_conditional": 0.003, "a_doubt_question": 0.003, "a_emphasis": 0.003, "a_negative": 0.003, "a_relative": 0.003, "a_topics": 0.003, "a_wh_question": 0.003, "a_yn_question": 0.001,
         "b_affirmative": 0.01, "b_conditional": 0.003, "b_doubt_question": 0.003, "b_emphasis": 0.003, "b_negative": 0.001, "b_relative": 0.003, "b_topics": 0.003, "b_wh_question": 0.003, "b_yn_question": 0.003}

estimators = {"a_affirmative": 100, "a_conditional": 25, "a_doubt_question": 50, "a_emphasis": 100, "a_negative": 100, "a_relative": 50, "a_topics": 50, "a_wh_question": 50, "a_yn_question": 50,
         "b_affirmative": 10, "b_conditional": 50, "b_doubt_question": 25, "b_emphasis": 100, "b_negative": 50, "b_relative": 50, "b_topics": 10, "b_wh_question": 100, "b_yn_question": 100}


In [10]:
MLPScores = {}
BaggingScores = {}

for name in datasets:
    X = pd.read_csv("./dataset/" + name +"_datapoints.csv", sep=' ')
    y = pd.read_csv("./dataset/"+ name + "_targets.csv")
    y = column_or_1d(y, warn=True)
    X.drop([X.columns[0]], axis=1, inplace=True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=42, test_size=0.3)
    
    mlp = MLPClassifier(random_state=42, hidden_layer_sizes=(250, 250, 250), max_iter=50, activation="relu", solver="adam", alpha=alphas[name], learning_rate_init=lr[name])
    y_train_proba_mlp = cross_val_predict(mlp, X_train, y_train, cv=10, method="predict_proba")
    y_scores_mlp = y_train_proba_mlp[:, 1]
    MLPScores[name] = roc_auc_score(y_train, y_scores_mlp)

    bag = BaggingClassifier(mlp, n_estimators=estimators[name])
    y_train_proba_bag = cross_val_predict(bag, X_train, y_train, cv=10, method="predict_proba")
    y_scores_bag = y_train_proba_bag[:, 1]
    BaggingScores[name] = roc_auc_score(y_train, y_scores_bag)




In [11]:
print(MLPScores)
print(BaggingScores)

{'a_affirmative': 0.6592750468589395, 'a_conditional': 0.6668667677988604, 'a_doubt_question': 0.8531369708837824, 'a_emphasis': 0.5169733576813222, 'a_negative': 0.7181307440002598, 'a_relative': 0.8800303405385823, 'a_topics': 0.680384871476601, 'a_wh_question': 0.6520483193277311, 'a_yn_question': 0.8480455499919237, 'b_affirmative': 0.606063840384807, 'b_conditional': 0.706531768893885, 'b_doubt_question': 0.7884840803528256, 'b_emphasis': 0.8010412176277482, 'b_negative': 0.7742024673153134, 'b_relative': 0.6237572479670253, 'b_topics': 0.866579948619557, 'b_wh_question': 0.8733278227492113, 'b_yn_question': 0.6924317497328281}
{'a_affirmative': 0.7643826712618659, 'a_conditional': 0.8757642376011661, 'a_doubt_question': 0.9115410044565124, 'a_emphasis': 0.7743070187760454, 'a_negative': 0.771292176793427, 'a_relative': 0.9323734301412875, 'a_topics': 0.8249295114713401, 'a_wh_question': 0.7564313461233549, 'a_yn_question': 0.8985265349342235, 'b_affirmative': 0.6728642787004226, 