BEFORE FEATURE SELECTION


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate

In [None]:
stance_data = pd.read_csv('cop_training_dataset.csv')

# X as features and y as target variable
X = stance_data.drop(columns=['subject', 'task', 'window_id','class'])
y = stance_data['class']

In [None]:
# Utility function to report best scores
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})"
                  .format(results['mean_test_score'][candidate],
                          results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")


In [None]:
# hidden_layer_sizes = [(50,), (100,), (50, 50)]

# activation = ['tanh', 'relu']

# solver = ['sgd', 'adam']

# alpha = [0.0001, 0.001, 0.01]

# learning_rate = ['constant', 'adaptive']

hidden_layer_sizes = [(50,), (80,), (50, 50)]
activation = ['relu']
solver = ['adam']
alpha = [0.0001, 0.001]
learning_rate = ['constant', 'adaptive']

In [None]:
param_grid = {'hidden_layer_sizes': hidden_layer_sizes,
               'activation': activation,
               'solver': solver,
               'alpha': alpha,
              'learning_rate' : learning_rate
             }
print(param_grid)

{'hidden_layer_sizes': [(50,), (80,), (50, 50)], 'activation': ['relu'], 'solver': ['adam'], 'alpha': [0.0001, 0.001], 'learning_rate': ['constant', 'adaptive']}


In [None]:
mlp = MLPClassifier(max_iter=1000, random_state=42)

grid_search = GridSearchCV(mlp, param_grid, cv=5, n_jobs=-1)

grid_search.fit(X, y)

report(grid_search.cv_results_)


Model with rank: 1
Mean validation score: 0.627 (std: 0.087)
Parameters: {'activation': 'relu', 'alpha': 0.001, 'hidden_layer_sizes': (80,), 'learning_rate': 'constant', 'solver': 'adam'}

Model with rank: 1
Mean validation score: 0.627 (std: 0.087)
Parameters: {'activation': 'relu', 'alpha': 0.001, 'hidden_layer_sizes': (80,), 'learning_rate': 'adaptive', 'solver': 'adam'}

Model with rank: 3
Mean validation score: 0.627 (std: 0.096)
Parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (50,), 'learning_rate': 'constant', 'solver': 'adam'}

Model with rank: 3
Mean validation score: 0.627 (std: 0.096)
Parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (50,), 'learning_rate': 'adaptive', 'solver': 'adam'}





odel with rank: 1
Mean validation score: 0.627 (std: 0.096)
Parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (50,), 'learning_rate': 'constant', 'solver': 'adam'}

Model with rank: 1
Mean validation score: 0.627 (std: 0.096)
Parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (50,), 'learning_rate': 'adaptive', 'solver': 'adam'}

Model with rank: 3
Mean validation score: 0.620 (std: 0.100)
Parameters: {'activation': 'relu', 'alpha': 0.001, 'hidden_layer_sizes': (50,), 'learning_rate': 'constant', 'solver': 'adam'}

Model with rank: 3
Mean validation score: 0.620 (std: 0.100)
Parameters: {'activation': 'relu', 'alpha': 0.001, 'hidden_layer_sizes': (50,), 'learning_rate': 'adaptive', 'solver': 'adam'}


In [None]:
best_params = grid_search.best_params_
best_rfc = MLPClassifier(**best_params, max_iter=1000, random_state=42)
scores = cross_validate(best_rfc, X, y, cv=5)

print("Accuracy per fold: ", scores['test_score'])
print("Average Accuracy: %0.2f" % (np.mean(scores['test_score'])))
print("Standard Deviation of Accuracy: +/- %0.2f" % (np.std(scores['test_score'])))



Accuracy per fold:  [0.77173913 0.50549451 0.64835165 0.58241758 0.62637363]
Average Accuracy: 0.63
Standard Deviation of Accuracy: +/- 0.09




P-VALUE

In [None]:
from scipy.stats import binomtest

n_samples = len(stance_data)  # total number of samples
successful_pred = int(n_samples * 0.63)  # total number of successful predictions
p_value = binomtest(successful_pred, n_samples, p=0.5, alternative='greater')
p_value

BinomTestResult(k=287, n=456, alternative='greater', statistic=0.6293859649122807, pvalue=1.8064520852962256e-08)

AFTER FEATURE SELECTION

In [None]:
# X as features and y as target variable
X1 = stance_data.drop(columns=['subject', 'task', 'window_id','class'])

In [None]:
hidden_layer_sizes1 = [(50,), (80,), (50, 50), (100, 50)]

activation1 = ['relu', 'tanh', 'logistic']

solver1 = ['adam']

alpha1 = [0.0001, 0.001]

learning_rate1 = ['constant', 'adaptive']

In [None]:
param_grid1 = {'hidden_layer_sizes': hidden_layer_sizes1,
               'activation': activation1,
               'solver': solver1,
               'alpha': alpha1,
              'learning_rate' : learning_rate1
             }
print(param_grid1)

{'hidden_layer_sizes': [(50,), (80,), (50, 50), (100, 50)], 'activation': ['relu', 'tanh', 'logistic'], 'solver': ['adam'], 'alpha': [0.0001, 0.001], 'learning_rate': ['constant', 'adaptive']}


In [None]:
mlp = MLPClassifier(max_iter=1000, random_state=42)

grid_search1 = GridSearchCV(mlp, param_grid1, cv=5, n_jobs=-1)

grid_search1.fit(X1, y)

report(grid_search1.cv_results_)


Model with rank: 1
Mean validation score: 0.627 (std: 0.087)
Parameters: {'activation': 'relu', 'alpha': 0.001, 'hidden_layer_sizes': (80,), 'learning_rate': 'constant', 'solver': 'adam'}

Model with rank: 1
Mean validation score: 0.627 (std: 0.087)
Parameters: {'activation': 'relu', 'alpha': 0.001, 'hidden_layer_sizes': (80,), 'learning_rate': 'adaptive', 'solver': 'adam'}

Model with rank: 3
Mean validation score: 0.627 (std: 0.096)
Parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (50,), 'learning_rate': 'constant', 'solver': 'adam'}

Model with rank: 3
Mean validation score: 0.627 (std: 0.096)
Parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (50,), 'learning_rate': 'adaptive', 'solver': 'adam'}





In [None]:
best_params1 = grid_search1.best_params_
best_rfc1 = MLPClassifier(**best_params1, max_iter=1000, random_state=42)
scores1 = cross_validate(best_rfc1, X1, y, cv=5)

print("Accuracy per fold: ", scores1['test_score'])
print("Average Accuracy: %0.2f" % (np.mean(scores1['test_score'])))
print("Standard Deviation of Accuracy: +/- %0.2f" % (np.std(scores1['test_score'])))



Accuracy per fold:  [0.77173913 0.50549451 0.64835165 0.58241758 0.62637363]
Average Accuracy: 0.63
Standard Deviation of Accuracy: +/- 0.09




In [None]:
from scipy.stats import binomtest

n_samples = len(stance_data)  # total number of samples
successful_pred1 = int(n_samples * 0.63)  # total number of successful predictions
p_value = binomtest(successful_pred1, n_samples, p=0.5, alternative='greater')
p_value

BinomTestResult(k=287, n=456, alternative='greater', statistic=0.6293859649122807, pvalue=1.8064520852962256e-08)