In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_auc_score
import matplotlib.pyplot as plt

In [2]:
datasets = ["a_affirmative", "a_conditional", "a_doubt_question", "a_emphasis", "a_negative", "a_relative", "a_topics", "a_wh_question", "a_yn_question",
         "b_affirmative", "b_conditional", "b_doubt_question", "b_emphasis", "b_negative", "b_relative", "b_topics", "b_wh_question", "b_yn_question"]



param_dist = {
    "activation":["logistic", "tanh", "relu"],
    "solver": ["sgd", "adam"],
    "alpha": [0.0001, 0.0003, 0.001, 0.003, 0.01],
    "learning_rate_init": [0.001, 0.003, 0.01, 0.03],
}

activation = ["logistic", "tanh", "relu"]
solver = ["sgd", "adam"]
# layers = [(5, 5), (10, 10), (20, 20)]
layers =[(30, 30)]
alpha = [0.0001, 0.0003, 0.001, 0.003, 0.01]
learning_rate_init = [0.001, 0.003, 0.01, 0.03]

In [4]:
bestsParams = {}


for data in datasets:
    df = pd.read_csv("./PreprocessedDataset/" + data + ".csv", sep=" ")
    bestRocAucScore = 0
    X = df.iloc[:, :-1]
    y = df.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=42, test_size=0.3)
    for lay in layers:
        for a in alpha:
            for lri in learning_rate_init:
                mlp = MLPClassifier(random_state=42, hidden_layer_sizes=lay, max_iter=50, activation="relu", solver="adam", alpha=a, learning_rate_init=lri)
                y_train_proba = cross_val_predict(mlp, X_train, y_train, cv=10, method="predict_proba")
                y_scores = y_train_proba[:, 1]
                score = roc_auc_score(y_train, y_scores)
                if(score > bestRocAucScore):
                    bestsParams[data] = {
                        "layers": lay,
                        "alpha": a,
                        "learning_rate_init": lri,
                        "RocAucScore": score
                    }
                    bestRocAucScore = score



In [5]:
print(bestsParams)

{'a_affirmative': {'layers': (20, 20), 'alpha': 0.0003, 'learning_rate_init': 0.01, 'RocAucScore': 0.6187715238915196}, 'a_conditional': {'layers': (20, 20), 'alpha': 0.01, 'learning_rate_init': 0.01, 'RocAucScore': 0.6719991834512793}, 'a_doubt_question': {'layers': (20, 20), 'alpha': 0.001, 'learning_rate_init': 0.01, 'RocAucScore': 0.629601226993865}, 'a_emphasis': {'layers': (20, 20), 'alpha': 0.0003, 'learning_rate_init': 0.01, 'RocAucScore': 0.5929161095285158}, 'a_negative': {'layers': (20, 20), 'alpha': 0.0003, 'learning_rate_init': 0.01, 'RocAucScore': 0.6326418961611366}, 'a_relative': {'layers': (20, 20), 'alpha': 0.001, 'learning_rate_init': 0.01, 'RocAucScore': 0.6450625165403027}, 'a_topics': {'layers': (20, 20), 'alpha': 0.0001, 'learning_rate_init': 0.01, 'RocAucScore': 0.6545861318524633}, 'a_wh_question': {'layers': (20, 20), 'alpha': 0.003, 'learning_rate_init': 0.01, 'RocAucScore': 0.6604878048780488}, 'a_yn_question': {'layers': (20, 20), 'alpha': 0.003, 'learning_