In [21]:
from detection import SubjectData
from utils import train_subjects
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix, f1_score

In [18]:
RAND_STATE = 42

In [22]:
labels = pd.read_csv('data/TrainLabels.csv')
y = labels['Prediction'].to_numpy()
print(y.shape)

(5440,)


In [2]:
X = []
for i in range(len(train_subjects)):
    sd = SubjectData(train_subjects[i])
    X.append(sd.errp_features)

In [6]:
X = np.array(X).reshape(X.shape[0]*X.shape[1], X.shape[2])
X.shape


(5440, 80)

ErrP features shape: (n_samples, n_features)

n_features: ch1_check, ch1_mean, ch1_peak, ch1_latency, ch1_amplitude, ch2_check, ch2_mean, ..., ch16_amplitude

Idea is to get the mean for each feature across all channels

Check will be 1 if most checks are 1 otherwise 0


In [16]:
X_reshaped = X.reshape(X.shape[0], 16, 5)
print(f'X_reshaped: {X_reshaped.shape}')

# Apply majority vote: 1 if sum > 8, else 0
check = X_reshaped[:, :, 0]  # shape (n_samples, 16)
majority_check = (check.sum(axis=1) > 8).astype(int)  # shape (n_samples,)

mean_mean = X_reshaped[:, :, 1].mean(axis=1)
mean_peak = X_reshaped[:, :, 2].mean(axis=1)
mean_latency = X_reshaped[:, :, 3].mean(axis=1)
mean_amplitude = X_reshaped[:, :, 4].mean(axis=1)

final_X = np.stack([majority_check, mean_mean, mean_peak, mean_latency, mean_amplitude], axis=1)
print(f'final_X: {final_X.shape}')

X_reshaped: (5440, 16, 5)
final_X: (5440, 5)


In [23]:
X_train, X_val, y_train, y_val = train_test_split(final_X, y, test_size=0.3, random_state=RAND_STATE)

In [24]:
mlp_grid = {
    'hidden_layer_sizes': [(10,), (20,), (50,), (100,), (200,)],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate_init': [0.001, 0.01]
}

In [25]:
gscv = GridSearchCV(estimator=MLPClassifier(random_state=RAND_STATE), param_grid=mlp_grid, cv=5, n_jobs=-1, verbose=4)
model = gscv.fit(X_train, y_train)
print(f'Best parameters: {model.best_params_}')
print(f'Best score: {model.best_score_}')

score = f1_score(y_val, model.predict(X_val))
print(f'F1 score: {score}')

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV 3/5] END alpha=0.0001, hidden_layer_sizes=(10,), learning_rate_init=0.01;, score=0.702 total time=   0.3s
[CV 2/5] END alpha=0.0001, hidden_layer_sizes=(10,), learning_rate_init=0.01;, score=0.703 total time=   0.4s
[CV 1/5] END alpha=0.0001, hidden_layer_sizes=(10,), learning_rate_init=0.01;, score=0.703 total time=   0.4s
[CV 4/5] END alpha=0.0001, hidden_layer_sizes=(10,), learning_rate_init=0.01;, score=0.696 total time=   0.4s
[CV 5/5] END alpha=0.0001, hidden_layer_sizes=(10,), learning_rate_init=0.01;, score=0.703 total time=   0.4s
[CV 2/5] END alpha=0.0001, hidden_layer_sizes=(10,), learning_rate_init=0.001;, score=0.703 total time=   1.3s
[CV 3/5] END alpha=0.0001, hidden_layer_sizes=(10,), learning_rate_init=0.001;, score=0.703 total time=   1.5s
[CV 5/5] END alpha=0.0001, hidden_layer_sizes=(10,), learning_rate_init=0.001;, score=0.703 total time=   1.6s
[CV 1/5] END alpha=0.0001, hidden_layer_sizes=(10,), le