<a href="https://colab.research.google.com/github/nspyrop03/bci-challenge-ner15/blob/main/nn_templates.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/bci-challenge-ner15/

Mounted at /content/drive
/content/drive/MyDrive/bci-challenge-ner15


In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_curve, auc, make_scorer
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE, ADASYN

from detection import SubjectData
from utils import train_subjects, test_subjects

In [3]:
RAND_STATE = 42

In [4]:
labels = pd.read_csv('./data/TrainLabels.csv')
y = labels['Prediction'].to_numpy()
print(y.shape)

(5440,)


In [2]:
X = []
for subject in train_subjects:
  sd = SubjectData(subject, train=True)
  X.append(np.hstack((sd.is_short.reshape(-1, 1), sd.get_green_similarity(), sd.get_feedback_similarity())))
X = np.vstack(X)
print(X.shape)

(5440, 26)


In [3]:
def get_session_and_trial(verbose=False):
    features = []
    for i in range(1, 5):
        for j in range(1, 61):
            features.append([i, j])
    for j in range(1, 101):
        features.append([5, j])
    features = np.array(features)
    if verbose: print(f'session_and_trial: {features.shape}')
    return features

def transform_data(X, verbose=False):
  extra_features = get_session_and_trial(verbose=verbose)
  extra_cols = []
  for i in range(X.shape[0] // extra_features.shape[0]):
    extra_cols.append(extra_features)
  extra_cols = np.array(extra_cols)
  extra_cols = extra_cols.reshape(extra_cols.shape[0] * extra_cols.shape[1], extra_cols.shape[2])
  if verbose: print(f'extra_cols: {extra_cols.shape}')
  final_X = np.hstack((extra_cols, X))
  return final_X

In [4]:
final_X = transform_data(X, verbose=True)

session_and_trial: (340, 2)
extra_cols: (5440, 2)


In [5]:
print(final_X.shape)
print(final_X[42])

(5440, 28)
[ 1.         43.          1.          0.0543408   0.07846813  0.59062705
  0.17124898  0.53101865  0.52969934  0.57644477  0.17124898  0.10183663
  0.12761402  0.21723377  0.13522701  0.44782589  0.45304061  0.30480136
  0.13522701  0.05373682  0.05618085  0.31781987  0.10753104  0.47171442
  0.476525    0.31740053  0.10753104  0.        ]


In [7]:
np.save("./cache/rnn_data/handcrafted_features_train.npy", final_X)

In [8]:
X_train, X_val, y_train, y_val = train_test_split(final_X, y, test_size=0.3, random_state=RAND_STATE)
print(f'X_train: {X_train.shape}\nX_val: {X_val.shape}')

X_train: (3808, 28)
X_val: (1632, 28)


In [9]:
smote = SMOTE(random_state=RAND_STATE)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)
print(f'X_train_smote: {X_train_smote.shape}')

adasyn = ADASYN(random_state=RAND_STATE)
X_train_adasyn, y_train_adasyn = adasyn.fit_resample(X_train, y_train)
print(f'X_train_adasyn: {X_train_adasyn.shape}')

X_train_smote: (5356, 28)
X_train_adasyn: (5426, 28)


In [8]:
test_X = []
for subject in test_subjects:
  sd = SubjectData(subject, train=False)
  test_X.append(np.hstack((sd.is_short.reshape(-1, 1), sd.get_green_similarity(), sd.get_feedback_similarity())))
test_X = np.vstack(test_X)
print(f'test_X: {test_X.shape}')
test_X = transform_data(test_X, verbose=True)
print(f'test_X: {test_X.shape}')

test_X: (3400, 26)
session_and_trial: (340, 2)
extra_cols: (3400, 2)
test_X: (3400, 28)


In [9]:
np.save("./cache/rnn_data/handcrafted_features_test.npy", test_X)

In [None]:
# Create custom scorer for GridSearchCV
# y_scores is the predicted probabilities
def auc_scorer(y_true, y_scores):
    fpr, tpr, _ = roc_curve(y_true, y_scores)
    return auc(fpr, tpr)
auc_score = make_scorer(auc_scorer, needs_proba=True)

def grid_train_mlp(X_train, y_train, X_val, y_val, grid, cv=5, verbose=1):
  gscv = GridSearchCV(estimator=MLPClassifier(random_state=RAND_STATE),
                      param_grid=grid,
                      cv=cv,
                      scoring='roc_auc',
                      n_jobs=-1,
                      verbose=verbose)
  model = gscv.fit(X_train, y_train)
  print('Grid Train:')
  print(f'| Best parameters: {model.best_params_}')
  print(f'| Best score: {model.best_score_}')
  # Predict probabilities on the test set (for the positive class)
  y_proba = model.predict_proba(X_val)[:, 1]
  # Compute FPR, TPR
  fpr, tpr, thresholds = roc_curve(y_val, y_proba)
  # Compute AUC
  final_auc = auc(fpr, tpr)
  print("| Final AUC score on val set:", final_auc)
  print('-' * 30)


In [None]:
mlp_simple_grid = {
    'hidden_layer_sizes': [(20,), (50,), (100,), (120,)],
    'activation': ['relu', 'tanh'],
    'learning_rate_init': [0.001, 0.002, 0.01],
    'alpha': [1e-05, 4e-05, 1e-04]
}
print("-----[ SMOTE ]-----")
grid_train_mlp(X_train_smote, y_train_smote, X_val, y_val, mlp_simple_grid)

print("-----[ ADASYN ]-----")
grid_train_mlp(X_train_adasyn, y_train_adasyn, X_val, y_val, mlp_simple_grid)

-----[ SMOTE ]-----
Fitting 5 folds for each of 72 candidates, totalling 360 fits


 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]


Grid Train:
| Best parameters: {'activation': 'relu', 'alpha': 1e-05, 'hidden_layer_sizes': (20,), 'learning_rate_init': 0.001}
| Best score: nan
| Final AUC score on val set: 0.6516972102685858
------------------------------
-----[ ADASYN ]-----
Fitting 5 folds for each of 72 candidates, totalling 360 fits


 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]


Grid Train:
| Best parameters: {'activation': 'relu', 'alpha': 1e-05, 'hidden_layer_sizes': (20,), 'learning_rate_init': 0.001}
| Best score: nan
| Final AUC score on val set: 0.6394179403472325
------------------------------


In [None]:
print(np.unique(y_train), np.unique(y_val))

[0 1] [0 1]


Fitting 5 folds for each of 12 candidates, totalling 60 fits
Grid Train:
| Best parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (10,), 'learning_rate_init': 0.1}
| Best score: 0.6379013253968352
| Final AUC score on val set: 0.6417643567294851

In [None]:
mlp_simpler_grid = {
    'hidden_layer_sizes': [(10,), (15,), (20,), (30,)],
    'activation': ['relu', 'tanh'],
    'learning_rate_init': [0.0001, 0.0005, 0.001],
    'alpha': [1e-05, 4e-05, 7e-05]
}
grid_train_mlp(X_train, y_train, X_val, y_val, mlp_simpler_grid, verbose=10)

Fitting 5 folds for each of 72 candidates, totalling 360 fits
Grid Train:
| Best parameters: {'activation': 'relu', 'alpha': 7e-05, 'hidden_layer_sizes': (15,), 'learning_rate_init': 0.001}
| Best score: 0.6445663266250831
| Final AUC score on val set: 0.640669980709304
------------------------------


In [None]:
mlp_smoter_grid = {
    'hidden_layer_sizes': [(15,), (17,), (18,)],
    'activation': ['relu', 'identity'],
    'learning_rate_init': [0.007, 0.005, 0.001],
    'alpha': [1e-04, 7e-05, 3e-04]
}
grid_train_mlp(X_train_smote, y_train_smote, X_val, y_val, mlp_smoter_grid, verbose=10)

Fitting 5 folds for each of 54 candidates, totalling 270 fits
Grid Train:
| Best parameters: {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (15,), 'learning_rate_init': 0.001}
| Best score: 0.6807830225702288
| Final AUC score on val set: 0.6437620566849681
------------------------------




In [None]:
mlp_grid1 = {
    'hidden_layer_sizes': [(15,), (16,), (20,)],
    'activation': ['relu', 'tanh'],
    'learning_rate_init': [0.0007, 0.0005, 0.0003],
    'alpha': [5e-04, 8e-04, 1e-05, 3e-05],
    'max_iter': [250, 300, 400],
    'solver': ['adam', 'sgd']
}
grid_train_mlp(X_train_smote, y_train_smote, X_val, y_val, mlp_grid1, verbose=10)
grid_train_mlp(X_train_adasyn, y_train_adasyn, X_val, y_val, mlp_grid1, verbose=10)

Fitting 5 folds for each of 432 candidates, totalling 2160 fits
Grid Train:
| Best parameters: {'activation': 'relu', 'alpha': 1e-05, 'hidden_layer_sizes': (15,), 'learning_rate_init': 0.0007, 'max_iter': 400, 'solver': 'adam'}
| Best score: 0.6856220970055567
| Final AUC score on val set: 0.6454926546965425
------------------------------
Fitting 5 folds for each of 432 candidates, totalling 2160 fits


KeyboardInterrupt: 

In [None]:
model = MLPClassifier(
    hidden_layer_sizes=(15,),
    activation='relu',
    max_iter=400,
    learning_rate_init=0.0007,
    solver='adam',
    alpha=1e-05
)
model.fit(X_train_smote, y_train_smote)
pred = model.predict_proba(test_X)[:, 1]
print(pred.shape)

(3400,)


In [None]:
ids = []
for i in range(len(test_subjects)):
    name = f'S{test_subjects[i]}'
    for session in range(1, 5):
        for fid in range(1, 61):
            ids.append(f'{name}_Sess{session:02}_FB{fid:03}')
    for fid in range(1, 101):
        ids.append(f'{name}_Sess05_FB{fid:03}')
ids = np.array(ids)
print(f'ids: {ids.shape}')
print(ids)

ids: (3400,)
['S01_Sess01_FB001' 'S01_Sess01_FB002' 'S01_Sess01_FB003' ...
 'S25_Sess05_FB098' 'S25_Sess05_FB099' 'S25_Sess05_FB100']


In [None]:
result = pd.DataFrame({
    'IdFeedback': ids,
    'Prediction': pred
})
result.to_csv('./submissions/mlp_smote_templates1.csv', sep=',', index=False)

Let's try some SVMs...

In [None]:
def grid_train_scv(X_train, y_train, X_val, y_val, grid, cv=5, verbose=1):
  gscv = GridSearchCV(estimator=SVC(random_state=RAND_STATE, probability=True),
                      param_grid=grid,
                      cv=cv,
                      scoring='roc_auc',
                      n_jobs=-1,
                      verbose=verbose)
  model = gscv.fit(X_train, y_train)
  print('Grid Train:')
  print(f'| Best parameters: {model.best_params_}')
  print(f'| Best score: {model.best_score_}')
  # Predict probabilities on the test set (for the positive class)
  y_proba = model.predict_proba(X_val)[:, 1]
  # Compute FPR, TPR
  fpr, tpr, thresholds = roc_curve(y_val, y_proba)
  # Compute AUC
  final_auc = auc(fpr, tpr)
  print("| Final AUC score on val set:", final_auc)
  print('-' * 30)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
Grid Train:

| Best parameters: {'C': 0.1, 'degree': 3, 'kernel': 'poly'}

| Best score: 0.5702042687480635

| Final AUC score on val set: 0.6095266359994065

------------------------------

96 fits -> 5+ hours...

In [None]:
svm_small_grid = {
    'C': [0.001, 0.01, 0.1],
    'kernel': ['sigmoid', 'poly'],
    'degree': [2, 3],
    'gamma': ['scale', 'auto']
}
grid_train_scv(X_train, y_train, X_val, y_val, svm_small_grid, cv=4)


Fitting 4 folds for each of 24 candidates, totalling 96 fits
Grid Train:
| Best parameters: {'C': 0.1, 'degree': 3, 'gamma': 'auto', 'kernel': 'poly'}
| Best score: 0.6031417306622532
| Final AUC score on val set: 0.6047521887520404
------------------------------


In [None]:
first_svm = SVC(random_state=RAND_STATE, probability=True, C=0.1, kernel='poly', degree=3, gamma='auto')
first_svm.fit(X_train_smote, y_train_smote)
pred = first_svm.predict_proba(X_val)[:, 1]
fpr, tpr, thresholds = roc_curve(y_val, pred)
final_auc = auc(fpr, tpr)
print("SMOTE: ", final_auc)

SMOTE:  0.6431147054459119


In [17]:
def scale_and_train_sgd(X_train, y_train, X_val, y_val, grid, n_iter=30, cv=3, verbose=1):
  scaler = StandardScaler()
  X_scaled = scaler.fit_transform(X_train)
  X_val_scaled = scaler.transform(X_val)
  search = RandomizedSearchCV(
    SGDClassifier(random_state=RAND_STATE),
    grid,
    n_iter=n_iter,
    cv=cv,
    n_jobs=-1,
    random_state=RAND_STATE,
    verbose=verbose,
    scoring='roc_auc'
  )
  model = search.fit(X_scaled, y_train)
  print('RandomGrid Train:')
  print(f'| Best parameters: {model.best_params_}')
  print(f'| Best score: {model.best_score_}')
  y_proba = model.predict_proba(X_val_scaled)[:, 1]
  fpr, tpr, thresholds = roc_curve(y_val, y_proba)
  final_auc = auc(fpr, tpr)
  print("| Final AUC score on val set:", final_auc)
  print('-' * 30)

Fitting 3 folds for each of 30 candidates, totalling 90 fits
RandomGrid Train:

| Best parameters: {'alpha': 0.01, 'eta0': 0.01, 'l1_ratio': np.float64(0.06489224710898156), 'learning_rate': 'optimal', 'loss': 'log_loss', 'max_iter': 1000, 'penalty': 'l2'}

| Best score: 0.6418813886230889

| Final AUC score on val set: 0.6463217836474254

------------------------------

In [25]:
from scipy.stats import uniform, loguniform

sgd_small_grid = {
    'loss': ['hinge', 'log_loss', 'modified_huber'],
    'penalty': ['l1', 'l2', 'elasticnet'],
    'alpha': [1e-5, 1e-4, 1e-3, 1e-2, 0.1],  # Small values work better
    'learning_rate': ['constant', 'optimal', 'invscaling'],
    'eta0': [0.01, 0.1, 1],  # Samples between 0.01 and 1.01
    'max_iter': [1000, 2000],  # Fixed options
    'l1_ratio': uniform(0, 1)  # Only used if penalty='elasticnet'
}

sgd_bigger_grid = {
    'loss': ['log_loss', 'modified_huber'],  # Drop 'hinge' since 'log_loss' won
    'penalty': ['l2', 'elasticnet'],        # 'l1' less likely to outperform 'l2'
    'alpha': loguniform(1e-4, 1e-1),        # Wider range around 0.01 (your best)
    'learning_rate': ['optimal', 'adaptive', 'invscaling'],  # Added 'adaptive'
    'eta0': [0.001, 0.01, 0.1],            # Finer granularity near 0.01
    'max_iter': [1000, 2000, 3000],        # Test longer training
    'l1_ratio': uniform(0.01, 0.3),        # Focus near 0.06 (your best)
    'early_stopping': [True],               # Prevent overfitting
    'validation_fraction': [0.1],           # For early stopping
    'tol': [1e-4, 1e-3]                    # Stopping tolerance
}

sgd_final_grid = {
    # Fixed based on best results
    'loss': ['log_loss'],
    'penalty': ['elasticnet'],
    'early_stopping': [True],
    'validation_fraction': [0.1],

    # Optimized ranges around best values
    'alpha': loguniform(1e-5, 1e-2),  # Tight around 0.00095
    'l1_ratio': uniform(0.1, 0.35),    # Best was 0.223 → test [0.1, 0.45]
    'eta0': [0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.15, 0.2],         # Best was 0.1 → test nearby
    'learning_rate': ['invscaling', 'adaptive'],  # 'adaptive' is a wildcard
    'power_t': uniform(0.1, 0.5),      # Only for 'invscaling'
    'max_iter': [2000, 3000, 5000],    # Test longer runs
    'tol': [1e-4, 3e-4, 5e-4, 5e-5, 3e-5, 7e-5, 1e-5],               # Best was 1e-4 → try stricter
    'shuffle': [True, False]           # Sometimes matters
}

print("-----> SMOTE:")
scale_and_train_sgd(X_train_smote, y_train_smote, X_val, y_val, sgd_final_grid, n_iter=100, cv=5)

print("-----> ADASYN:")
scale_and_train_sgd(X_train_adasyn, y_train_adasyn, X_val, y_val, sgd_final_grid, n_iter=100, cv=5)

-----> SMOTE:
Fitting 5 folds for each of 100 candidates, totalling 500 fits
RandomGrid Train:
| Best parameters: {'alpha': np.float64(0.004289592532297353), 'early_stopping': True, 'eta0': 0.08, 'l1_ratio': np.float64(0.24628111110045256), 'learning_rate': 'invscaling', 'loss': 'log_loss', 'max_iter': 2000, 'penalty': 'elasticnet', 'power_t': np.float64(0.37398594162404364), 'shuffle': True, 'tol': 0.0001, 'validation_fraction': 0.1}
| Best score: 0.6576164359712109
| Final AUC score on val set: 0.6531143344709898
------------------------------
-----> ADASYN:
Fitting 5 folds for each of 100 candidates, totalling 500 fits
RandomGrid Train:
| Best parameters: {'alpha': np.float64(0.0018773201057654923), 'early_stopping': True, 'eta0': 0.15, 'l1_ratio': np.float64(0.21965788335074055), 'learning_rate': 'adaptive', 'loss': 'log_loss', 'max_iter': 2000, 'penalty': 'elasticnet', 'power_t': np.float64(0.14707849413428006), 'shuffle': False, 'tol': 7e-05, 'validation_fraction': 0.1}
| Best sc

In [20]:
first_sgd = SGDClassifier(
    random_state=RAND_STATE,
    alpha=np.float64(0.0009503349811016555),
    early_stopping=True,
    eta0=0.1,
    l1_ratio=np.float64(0.22334485973140536),
    learning_rate='invscaling',
    loss='log_loss',
    max_iter=3000,
    penalty='elasticnet',
    tol=0.0001,
    validation_fraction=0.1
)
smote_scaler = StandardScaler()
X_scaled = smote_scaler.fit_transform(X_train_smote)
X_val_scaled = smote_scaler.transform(X_val)
first_sgd.fit(X_scaled, y_train_smote)
pred = first_sgd.predict_proba(X_val_scaled)[:, 1]
fpr, tpr, thresholds = roc_curve(y_val, pred)
final_auc = auc(fpr, tpr)
print("SMOTE: ", final_auc)

SMOTE:  0.6527674729188307


In [27]:
final_smote_sgd = SGDClassifier(
    random_state=RAND_STATE,
    alpha=np.float64(0.004289592532297353),
    early_stopping=True,
    eta0=0.15,
    l1_ratio=np.float64(0.24628111110045256),
    learning_rate='invscaling',
    loss='log_loss',
    max_iter=2000,
    penalty='elasticnet',
    tol=0.0001,
    validation_fraction=0.1,
    shuffle=True,
    power_t=np.float64(0.37398594162404364)
)
smote_scaler = StandardScaler()
X_scaled = smote_scaler.fit_transform(X_train_smote)
X_val_scaled = smote_scaler.transform(X_val)
X_test_scaled = smote_scaler.transform(test_X)

final_smote_sgd.fit(X_scaled, y_train_smote)
pred = final_smote_sgd.predict_proba(X_test_scaled)[:, 1]

#fpr, tpr, thresholds = roc_curve(y_val, pred)
#final_auc = auc(fpr, tpr)
#print("SMOTE: ", final_auc) #-> SMOTE:  0.6528101350348717

In [30]:
print(pred.shape)
# Construct IdFeedback column
ids = []
for i in range(len(test_subjects)):
    name = f'S{test_subjects[i]}'
    for session in range(1, 5):
        for fid in range(1, 61):
            ids.append(f'{name}_Sess{session:02}_FB{fid:03}')
    for fid in range(1, 101):
        ids.append(f'{name}_Sess05_FB{fid:03}')
ids = np.array(ids)
print(f'ids: {ids.shape}')
print(ids)

result = pd.DataFrame({
    'IdFeedback': ids,
    'Prediction': pred
})
result.to_csv('./submissions/sgd_smote_final1.csv', sep=',', index=False)

(3400,)
ids: (3400,)
['S01_Sess01_FB001' 'S01_Sess01_FB002' 'S01_Sess01_FB003' ...
 'S25_Sess05_FB098' 'S25_Sess05_FB099' 'S25_Sess05_FB100']
