<a href="https://colab.research.google.com/github/nspyrop03/bci-challenge-ner15/blob/main/nn_templates.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/bci-challenge-ner15/

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/bci-challenge-ner15


In [25]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_curve, auc, make_scorer
from imblearn.over_sampling import SMOTE, ADASYN

from detection import SubjectData
from utils import train_subjects, test_subjects

In [26]:
RAND_STATE = 42

In [27]:
labels = pd.read_csv('./data/TrainLabels.csv')
y = labels['Prediction'].to_numpy()
print(y.shape)

(5440,)


In [18]:
X = []
for subject in train_subjects:
  sd = SubjectData(subject, train=True)
  X.append(np.hstack((sd.is_short.reshape(-1, 1), sd.get_green_similarity(), sd.get_feedback_similarity())))
X = np.vstack(X)
print(X.shape)

(5440, 26)


In [21]:
def get_session_and_trial(verbose=False):
    features = []
    for i in range(1, 5):
        for j in range(1, 61):
            features.append([i, j])
    for j in range(1, 101):
        features.append([5, j])
    features = np.array(features)
    if verbose: print(f'session_and_trial: {features.shape}')
    return features

def transform_data(X, verbose=False):
  extra_features = get_session_and_trial(verbose=verbose)
  extra_cols = []
  for i in range(X.shape[0] // extra_features.shape[0]):
    extra_cols.append(extra_features)
  extra_cols = np.array(extra_cols)
  extra_cols = extra_cols.reshape(extra_cols.shape[0] * extra_cols.shape[1], extra_cols.shape[2])
  if verbose: print(f'extra_cols: {extra_cols.shape}')
  final_X = np.hstack((extra_cols, X))
  return final_X

In [22]:
final_X = transform_data(X, verbose=True)

session_and_trial: (340, 2)
extra_cols: (5440, 2)


In [24]:
print(final_X.shape)
print(final_X[42])

(5440, 28)
[ 1.         43.          1.          0.0543408   0.07846813  0.59062705
  0.17124898  0.53101865  0.52969934  0.57644477  0.17124898  0.10183663
  0.12761402  0.21723377  0.13522701  0.44782589  0.45304061  0.30480136
  0.13522701  0.05373682  0.05618085  0.31781987  0.10753104  0.47171442
  0.476525    0.31740053  0.10753104  0.        ]


In [28]:
X_train, X_val, y_train, y_val = train_test_split(final_X, y, test_size=0.3, random_state=RAND_STATE)
print(f'X_train: {X_train.shape}\nX_val: {X_val.shape}')

X_train: (3808, 28)
X_val: (1632, 28)


In [30]:
smote = SMOTE(random_state=RAND_STATE)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
print(f'X_train_smote: {X_train_smote.shape}')

adasyn = ADASYN(random_state=RAND_STATE)
X_train_adasyn, y_train_adasyn = adasyn.fit_resample(X_train, y_train)
print(f'X_train_adasyn: {X_train_adasyn.shape}')

X_train_smote: (5356, 28)
X_train_adasyn: (5426, 28)


In [29]:
# Create custom scorer for GridSearchCV
# y_scores is the predicted probabilities
def auc_scorer(y_true, y_scores):
    fpr, tpr, _ = roc_curve(y_true, y_scores)
    return auc(fpr, tpr)
auc_score = make_scorer(auc_scorer, needs_proba=True)

In [31]:
def grid_train_mlp(X_train, y_train, X_val, y_val, grid, cv=5):
  gscv = GridSearchCV(estimator=MLPClassifier(random_state=RAND_STATE),
                      param_grid=grid,
                      cv=cv,
                      scoring=auc_score,
                      n_jobs=-1,
                      verbose=1)
  model = gscv.fit(X_train, y_train)
  print('Grid Train:')
  print(f'| Best parameters: {model.best_params_}')
  print(f'| Best score: {model.best_score_}')
  # Predict probabilities on the test set (for the positive class)
  y_proba = model.predict_proba(X_val)[:, 1]
  # Compute FPR, TPR
  fpr, tpr, thresholds = roc_curve(y_val, y_proba)
  # Compute AUC
  final_auc = auc(fpr, tpr)
  print("| Final AUC score on val set:", final_auc)
  print('-' * 30)


In [33]:
mlp_simple_grid = {
    'hidden_layer_sizes': [(20,), (50,), (100,), (120,)],
    'activation': ['relu', 'tanh'],
    'learning_rate_init': [0.001, 0.002, 0.01],
    'alpha': [1e-05, 4e-05, 1e-04]
}
print("-----[ SMOTE ]-----")
grid_train_mlp(X_train_smote, y_train_smote, X_val, y_val, mlp_simple_grid)

print("-----[ ADASYN ]-----")
grid_train_mlp(X_train_adasyn, y_train_adasyn, X_val, y_val, mlp_simple_grid)

-----[ SMOTE ]-----
Fitting 5 folds for each of 72 candidates, totalling 360 fits


 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]


Grid Train:
| Best parameters: {'activation': 'relu', 'alpha': 1e-05, 'hidden_layer_sizes': (20,), 'learning_rate_init': 0.001}
| Best score: nan
| Final AUC score on val set: 0.6516972102685858
------------------------------
-----[ ADASYN ]-----
Fitting 5 folds for each of 72 candidates, totalling 360 fits


 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]


Grid Train:
| Best parameters: {'activation': 'relu', 'alpha': 1e-05, 'hidden_layer_sizes': (20,), 'learning_rate_init': 0.001}
| Best score: nan
| Final AUC score on val set: 0.6394179403472325
------------------------------
