In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils.validation import column_or_1d
from time import perf_counter
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning, DataConversionWarning
simplefilter("ignore", category=ConvergenceWarning)
simplefilter("ignore", category=DataConversionWarning)

In [6]:
datasets = ["a_affirmative", "a_conditional", "a_doubt_question", "a_emphasis", "a_negative", "a_relative", "a_topics", "a_wh_question", "a_yn_question",
         "b_affirmative", "b_conditional", "b_doubt_question", "b_emphasis", "b_negative", "b_relative", "b_topics", "b_wh_question", "b_yn_question"]

alphas = {"a_affirmative": 0.0001, "a_conditional": 0.0001, "a_doubt_question": 0.0001, "a_emphasis": 0.0001, "a_negative": 0.0003, "a_relative": 0.0003, "a_topics": 0.0003, "a_wh_question": 0.0003, "a_yn_question": 0.0001,
         "b_affirmative": 0.01, "b_conditional": 0.01, "b_doubt_question": 0.001, "b_emphasis": 0.001, "b_negative": 0.003, "b_relative": 0.003, "b_topics": 0.01, "b_wh_question": 0.0001, "b_yn_question": 0.003}

lr = {"a_affirmative": 0.01, "a_conditional": 0.01, "a_doubt_question": 0.01, "a_emphasis": 0.01, "a_negative": 0.01, "a_relative": 0.01, "a_topics": 0.01, "a_wh_question": 0.01, "a_yn_question": 0.003,
         "b_affirmative": 0.01, "b_conditional": 0.01, "b_doubt_question": 0.01, "b_emphasis": 0.01, "b_negative": 0.01, "b_relative": 0.01, "b_topics": 0.01, "b_wh_question": 0.01, "b_yn_question": 0.01}

estimators = {"a_affirmative": 100, "a_conditional": 25, "a_doubt_question": 100, "a_emphasis": 100, "a_negative": 100, "a_relative": 50, "a_topics": 50, "a_wh_question": 10, "a_yn_question": 25,
         "b_affirmative": 25, "b_conditional": 50, "b_doubt_question": 25, "b_emphasis": 100, "b_negative": 100, "b_relative": 100, "b_topics": 50, "b_wh_question": 100, "b_yn_question": 100}

In [15]:
accDefaultMLP = {}
accDefaultBagging = {}
accPreMLP = {}
accPreBagging = {}
timePreMLP = {}
timePreBag = {}

In [16]:
def trainDefault(name):
    X_train = pd.read_csv("./SplitData/" + name + "_X_train.csv")
    X_test = pd.read_csv("./SplitData/" + name + "_X_test.csv")
    y_train = pd.read_csv("./SplitData/" + name + "_y_train.csv")
    y_train = column_or_1d(y_train, warn=True)
    y_test = pd.read_csv("./SplitData/" + name + "_y_test.csv")
    y_test = column_or_1d(y_test, warn=True)
    mlp = MLPClassifier(random_state=42, hidden_layer_sizes=(30, 30), max_iter=50, activation="relu", solver="adam", alpha=alphas[name], learning_rate_init=lr[name])
    bag = BaggingClassifier(mlp, n_estimators=estimators[name])

    mlp.fit(X_train, y_train)
    bag.fit(X_train, y_train)
    predictMLP = mlp.predict(X_test)
    predictBagging = bag.predict(X_test)

    accDefaultMLP[name] = accuracy_score(y_test, predictMLP)
    accDefaultBagging[name] = accuracy_score(y_test, predictBagging)

def trainPre(name):
    X_train = pd.read_csv("./SplitPreprocessedData/" + name + "_X_train.csv")
    X_test = pd.read_csv("./SplitPreprocessedData/" + name + "_X_test.csv")
    y_train = pd.read_csv("./SplitPreprocessedData/" + name + "_y_train.csv")
    y_train = column_or_1d(y_train, warn=True)
    y_test = pd.read_csv("./SplitPreprocessedData/" + name + "_y_test.csv")
    y_test = column_or_1d(y_test, warn=True)
    seconds = perf_counter()
    mlp = MLPClassifier(random_state=42, hidden_layer_sizes=(30, 30), max_iter=50, activation="relu", solver="adam", alpha=alphas[name], learning_rate_init=lr[name])
    bag = BaggingClassifier(mlp, n_estimators=estimators[name])

    mlp.fit(X_train, y_train)
    bag.fit(X_train, y_train)
    predictMLP = mlp.predict(X_test)
    predictBagging = bag.predict(X_test)

    # accPreMLP[name] = accuracy_score(y_test, predictMLP)
    # accPreBagging[name] = accuracy_score(y_test, predictBagging)
    # timePreMLP[name] = perf_counter() - seconds
    timePreBag[name] = perf_counter() - seconds 

In [17]:
for i in datasets:
    # trainDefault(i)
    trainPre(i)

In [18]:
# print(accDefaultMLP)
# print(accDefaultBagging)
# print(accPreMLP)
# print(accPreBagging)
# print(timePreMLP)
print(timePreBag)

{'a_affirmative': 21.678808600000025, 'a_conditional': 9.012337700000046, 'a_doubt_question': 27.021156899999994, 'a_emphasis': 31.37855909999996, 'a_negative': 21.517219599999976, 'a_relative': 20.676687200000003, 'a_topics': 20.462511099999972, 'a_wh_question': 3.635906299999988, 'a_yn_question': 8.085643600000026, 'b_affirmative': 5.332987499999945, 'b_conditional': 19.795343000000003, 'b_doubt_question': 8.102418499999999, 'b_emphasis': 28.953179200000022, 'b_negative': 34.51589100000001, 'b_relative': 47.26289059999999, 'b_topics': 23.826833100000044, 'b_wh_question': 33.90175629999999, 'b_yn_question': 41.08881409999992}
