In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils.validation import column_or_1d

In [2]:
datasets = ["a_affirmative", "a_conditional", "a_doubt_question", "a_emphasis", "a_negative", "a_relative", "a_topics", "a_wh_question", "a_yn_question",
         "b_affirmative", "b_conditional", "b_doubt_question", "b_emphasis", "b_negative", "b_relative", "b_topics", "b_wh_question", "b_yn_question"]

alphas = {"a_affirmative": 0.0001, "a_conditional": 0.0001, "a_doubt_question": 0.0001, "a_emphasis": 0.0001, "a_negative": 0.0003, "a_relative": 0.0003, "a_topics": 0.0003, "a_wh_question": 0.0003, "a_yn_question": 0.0001,
         "b_affirmative": 0.01, "b_conditional": 0.01, "b_doubt_question": 0.001, "b_emphasis": 0.001, "b_negative": 0.003, "b_relative": 0.003, "b_topics": 0.01, "b_wh_question": 0.0001, "b_yn_question": 0.003}

lr = {"a_affirmative": 0.01, "a_conditional": 0.01, "a_doubt_question": 0.01, "a_emphasis": 0.01, "a_negative": 0.01, "a_relative": 0.01, "a_topics": 0.01, "a_wh_question": 0.01, "a_yn_question": 0.003,
         "b_affirmative": 0.01, "b_conditional": 0.01, "b_doubt_question": 0.01, "b_emphasis": 0.01, "b_negative": 0.01, "b_relative": 0.01, "b_topics": 0.01, "b_wh_question": 0.01, "b_yn_question": 0.01}

estimators = {"a_affirmative": 100, "a_conditional": 25, "a_doubt_question": 100, "a_emphasis": 100, "a_negative": 100, "a_relative": 50, "a_topics": 50, "a_wh_question": 10, "a_yn_question": 25,
         "b_affirmative": 25, "b_conditional": 50, "b_doubt_question": 25, "b_emphasis": 100, "b_negative": 100, "b_relative": 100, "b_topics": 50, "b_wh_question": 100, "b_yn_question": 100}

In [3]:
accDefaultMLP = {}
accDefaultBagging = {}
accPreMLP = {}
accPreBagging = {}

In [4]:
def trainDefault(name):
    X_train = pd.read_csv("./SplitData/" + name + "_X_train.csv")
    X_test = pd.read_csv("./SplitData/" + name + "_X_test.csv")
    y_train = pd.read_csv("./SplitData/" + name + "_y_train.csv")
    y_train = column_or_1d(y_train, warn=True)
    y_test = pd.read_csv("./SplitData/" + name + "_y_test.csv")
    y_test = column_or_1d(y_test, warn=True)
    mlp = MLPClassifier(random_state=42, hidden_layer_sizes=(30, 30), max_iter=50, activation="relu", solver="adam", alpha=alphas[i], learning_rate_init=lr[i])
    bag = BaggingClassifier(mlp, n_estimators=estimators[i])

    mlp.fit(X_train, y_train)
    bag.fit(X_train, y_train)
    predictMLP = mlp.predict(X_test)
    predictBagging = bag.predict(X_test)

    accDefaultMLP[name] = accuracy_score(y_test, predictMLP)
    accDefaultBagging[name] = accuracy_score(y_test, predictBagging)

def trainPre(name):
    X_train = pd.read_csv("./SplitPreprocessedData/" + name + "_X_train.csv")
    X_test = pd.read_csv("./SplitPreprocessedData/" + name + "_X_test.csv")
    y_train = pd.read_csv("./SplitPreprocessedData/" + name + "_y_train.csv")
    y_train = column_or_1d(y_train, warn=True)
    y_test = pd.read_csv("./SplitPreprocessedData/" + name + "_y_test.csv")
    y_test = column_or_1d(y_test, warn=True)

    mlp = MLPClassifier(random_state=42, hidden_layer_sizes=(30, 30), max_iter=50, activation="relu", solver="adam", alpha=alphas[i], learning_rate_init=lr[i])
    bag = BaggingClassifier(mlp, n_estimators=estimators[i])

    mlp.fit(X_train, y_train)
    bag.fit(X_train, y_train)
    predictMLP = mlp.predict(X_test)
    predictBagging = bag.predict(X_test)

    accPreMLP[name] = accuracy_score(y_test, predictMLP)
    accPreBagging[name] = accuracy_score(y_test, predictBagging)

In [5]:
for i in datasets:
    trainDefault(i)
    trainPre(i)



In [6]:
print(accDefaultMLP)
print(accDefaultBagging)
print(accPreMLP)
print(accPreBagging)

{'a_affirmative': 0.6165413533834586, 'a_conditional': 0.8490566037735849, 'a_doubt_question': 0.8719512195121951, 'a_emphasis': 0.7863247863247863, 'a_negative': 0.49110320284697506, 'a_relative': 0.7650085763293311, 'a_topics': 0.7683741648106904, 'a_wh_question': 0.46273291925465837, 'a_yn_question': 0.7614942528735632, 'b_affirmative': 0.5576208178438662, 'b_conditional': 0.7721021611001965, 'b_doubt_question': 0.7146666666666667, 'b_emphasis': 0.6369047619047619, 'b_negative': 0.6818181818181818, 'b_relative': 0.7142857142857143, 'b_topics': 0.838074398249453, 'b_wh_question': 0.7981927710843374, 'b_yn_question': 0.6183908045977011}
{'a_affirmative': 0.7255639097744361, 'a_conditional': 0.870020964360587, 'a_doubt_question': 0.8567073170731707, 'a_emphasis': 0.7492877492877493, 'a_negative': 0.7117437722419929, 'a_relative': 0.8593481989708405, 'a_topics': 0.7973273942093542, 'a_wh_question': 0.6801242236024845, 'a_yn_question': 0.8477011494252874, 'b_affirmative': 0.6728624535315