In [3]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.metrics import accuracy_score, roc_auc_score
import numpy as np
from detection import SubjectData
from utils import train_subjects, test_subjects
from sklearn.metrics import roc_curve, auc, make_scorer
from sklearn.model_selection import train_test_split, GridSearchCV, LeaveOneOut,StratifiedKFold
import pandas as pd
from imblearn.over_sampling import SMOTE

In [2]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/bci-challenge-ner15/

ModuleNotFoundError: No module named 'google.colab'

In [4]:
RAND_STATE = 42

In [5]:
labels = pd.read_csv('./data/TrainLabels.csv')
global_y = labels['Prediction'].to_numpy()
print(global_y.shape)

(5440,)


In [6]:
X = []
for subject in train_subjects:
  sd = SubjectData(subject, train=True)
  X.append(np.hstack((sd.is_short.reshape(-1, 1), sd.get_green_similarity(), sd.get_feedback_similarity())))
X = np.vstack(X)
print(X.shape)

(5440, 26)


In [7]:
test_set = []
for i in range(len(test_subjects)):
    sd = SubjectData(test_subjects[i], train=False)
    test_set.append(np.hstack((sd.is_short.reshape(-1, 1), sd.get_green_similarity(), sd.get_feedback_similarity())))

test_set = np.vstack(test_set)
print(f'test_set: {test_set.shape}')

test_set: (3400, 26)


In [8]:
def get_session_and_trial(verbose=False):
    features = []
    for i in range(1, 5):
        for j in range(1, 61):
            features.append([i, j])
    for j in range(1, 101):
        features.append([5, j])
    features = np.array(features)
    if verbose: print(f'session_and_trial: {features.shape}')
    return features

def transform_data(X, verbose=False):
  extra_features = get_session_and_trial(verbose=verbose)
  extra_cols = []
  for i in range(X.shape[0] // extra_features.shape[0]):
    extra_cols.append(extra_features)
  extra_cols = np.array(extra_cols)
  extra_cols = extra_cols.reshape(extra_cols.shape[0] * extra_cols.shape[1], extra_cols.shape[2])
  if verbose: print(f'extra_cols: {extra_cols.shape}')
  final_X = np.hstack((extra_cols, X))
  return final_X

In [9]:
X = transform_data(X)
test_set = transform_data(test_set)

In [10]:
def remove_high_correlation_columns(X, verbose=False):
  cols_to_remove = [10, 8, 18, 16, 26, 24]
  X = np.delete(X, cols_to_remove, axis=1)
  if verbose: print(f'X after removing columns: {X.shape}')
  return X

In [11]:
X = remove_high_correlation_columns(X)
test_set = remove_high_correlation_columns(test_set)

In [12]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
test_set = scaler.transform(test_set)

In [13]:
print(X.shape)
print(test_set.shape)

(5440, 22)
(3400, 22)


In [14]:
X_train, X_val, y_train, y_val = train_test_split(X, global_y, test_size=0.3, random_state=RAND_STATE)
print(f'X_train: {X_train.shape}\nX_val: {X_val.shape}')

X_train: (3808, 22)
X_val: (1632, 22)


In [15]:
def loocv_with_smote_svm(X, y, best_params):
    loo = LeaveOneOut()
    y_true, y_prob = [], []

    for train_idx, test_idx in loo.split(X):
        X_train, X_test = X[train_idx], X[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]

        smote = SMOTE(random_state=42)
        X_res, y_res = smote.fit_resample(X_train, y_train)

        model = SVC(**best_params, probability=True)
        model.fit(X_res, y_res)

        prob = model.predict_proba(X_test)[:, 1]
        y_true.append(y_test[0])
        y_prob.append(prob[0])

    return np.array(y_true), np.array(y_prob)

param_grid = {
    'C': [0.01, 0.1, 1, 10],  # Ευρύ φάσμα κανονικοποίησης
    'kernel': ['rbf', 'linear', 'poly'],
    'gamma': ['scale', 'auto'] + [0.01, 0.1, 1],  # Για μη γραμμικούς πυρήνες
    'degree': [2, 3],  # Μόνο για πολυωνυμικό πυρήνα
    'class_weight': [None, 'balanced',{0: 1, 1: 2}],  # Ισορροπία κλάσεων
    'shrinking': [True],  # Ενεργοποίηση βελτιστοποίησης shrinking
}


smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X_train, y_train)

grid = GridSearchCV(
    estimator=SVC(probability=True, random_state=RAND_STATE),
    param_grid=param_grid,
    scoring='roc_auc',
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
    n_jobs=-1,         # Παράλληλος υπολογισμός
    verbose=3,         # Λεπτομερής έξοδος
    refit=True
)
model = grid.fit(X_res, y_res)
best_params = grid.best_params_

print(f"✅ Best Params: {best_params}")
best_model = grid.best_estimator_
y_proba = model.predict_proba(X_val)[:, 1]
fpr, tpr, thresholds = roc_curve(y_val, y_proba)
final_auc = auc(fpr, tpr)
print("| Final AUC score on val set:", final_auc)
print('-' * 30)


Fitting 5 folds for each of 360 candidates, totalling 1800 fits
[CV 2/5] END C=0.01, class_weight=None, degree=2, gamma=scale, kernel=linear, shrinking=True;, score=0.669 total time=   3.0s
[CV 1/5] END C=0.01, class_weight=None, degree=2, gamma=scale, kernel=linear, shrinking=True;, score=0.647 total time=   3.3s
[CV 3/5] END C=0.01, class_weight=None, degree=2, gamma=scale, kernel=linear, shrinking=True;, score=0.637 total time=   3.2s
[CV 1/5] END C=0.01, class_weight=None, degree=2, gamma=scale, kernel=rbf, shrinking=True;, score=0.662 total time=   5.8s
[CV 3/5] END C=0.01, class_weight=None, degree=2, gamma=scale, kernel=rbf, shrinking=True;, score=0.679 total time=   5.9s
[CV 5/5] END C=0.01, class_weight=None, degree=2, gamma=scale, kernel=rbf, shrinking=True;, score=0.686 total time=   5.9s
[CV 2/5] END C=0.01, class_weight=None, degree=2, gamma=scale, kernel=rbf, shrinking=True;, score=0.659 total time=   6.0s
[CV 4/5] END C=0.01, class_weight=None, degree=2, gamma=scale, ker

KeyboardInterrupt: 

In [None]:
best_model=SVC(
    C = 0.1,
    kernel = 'rbf',
    gamma = 0.1,
    class_weight = None,
    shrinking = True,
    probability = True,
    random_state = RAND_STATE
)

In [36]:
best_model.fit(X_res, y_res)

In [37]:
test_probs = best_model.predict_proba(test_set)[:, 1]

In [25]:
# Construct IdFeedback column
ids = []
for i in range(len(test_subjects)):
    name = f'S{test_subjects[i]}'
    for session in range(1, 5):
        for fid in range(1, 61):
            ids.append(f'{name}_Sess{session:02}_FB{fid:03}')
    for fid in range(1, 101):
        ids.append(f'{name}_Sess05_FB{fid:03}')
ids = np.array(ids)
print(f'ids: {ids.shape}')
print(ids)

ids: (3400,)
['S01_Sess01_FB001' 'S01_Sess01_FB002' 'S01_Sess01_FB003' ...
 'S25_Sess05_FB098' 'S25_Sess05_FB099' 'S25_Sess05_FB100']


In [38]:
# Αποθήκευση των αποτελεσμάτων
svm_result = pd.DataFrame({
    'IdFeedback': ids,
    'Prediction': test_probs,
})
svm_result.to_csv('submissions/svm4.csv', sep=',', index=False)