In [None]:
%run -i utils.py

<h3><left>I. Data Wrangling</left></h3>

In [None]:
# Use the median for each of the bad channels
stephanie_bad_chan = {
    'Rew_605_rest': ['Fp1', 'Fp2'],
    'Rew_609_rest': ['F3', 'F7', 'Fp1'],
    'Rew_611_rest': ['Fp2', 'T3'],
    'Rew_613_rest': ['F7', 'Fp2'],
    'Rew_614_rest': ['C3', 'CZ', 'F3', 'F7', 'F8', 'FZ', 'Fp1', 'O1', 'P3', 'PZ', 'T3', 'T5'],
    'Rew_615_rest': ['C4', 'F4', 'F8', 'Fp2', 'O2', 'P4', 'T4', 'T6'],
    'Rew_619_rest': ['F4'],
    'Rew_622_rest': ['F7'],
    'Rew_624_rest': ['F3', 'F7', 'Fp1', 'T3', 'T4', 'T5', 'T6'],
    'Rew_626_rest': ['F3', 'F4', 'T3', 'T4', 'T5'],
    'Rew_701_rest': ['C4', 'F7', 'F8', 'O1', 'O2', 'T3', 'T3', 'T4', 'T5'],
    'Rew_702_rest': ['C3', 'F3'],
    'Rew_703_rest': ['F4', 'F7', 'F8', 'Fp2', 'T3', 'T4', 'T6'],
    'Rew_704_rest': ['C3'],
    'Rew_706_rest': ['T4']
}

In [None]:
# Load researcher and subject folders
subjects = dict()
stephanie_folder = Path("C:\\Users\\peter\\git\\EEG-artifact-rejection\\artifact-rejection\\eeg-data\\Stephanie")
sub_folders = [[file, os.path.join(stephanie_folder, file)] for file in os.listdir(stephanie_folder)]

<h4><left>Data Architecture</left></h4>

In [None]:
for sub in sub_folders:
    sub_id, path_ = sub[0], sub[1]
    files = os.listdir(Path(path_))
    sub_files = dict()
    for file in files:
        full_path = os.path.join(Path(path_), file)
        if 'epoch' in file:
            sub_files['epoch'] = full_path
        if 'reject' in file:
            sub_files['reject'] = full_path
        elif 'stages' in file:
            sub_files['stage'] = full_path
    subjects[sub_id] = sub_files

<h4><left>Data Processing</left></h4>

In [None]:
for sub_ in subjects.keys():
    file_path = subjects[sub_]['epoch']
    mat_reject = subjects[sub_]['reject']
    mat_stage = subjects[sub_]['stage']

    files = load_subject_dir(file_path, mat_reject, mat_stage)
    epochs = files['epochs']
    rejects = files['rejects']

    # Clean data
    index, scaling_time, scalings = ['epoch', 'time'], 1e3, dict(grad=1e13)
    df = epochs.to_data_frame(
        picks=None, scalings=scalings, scaling_time=scaling_time, index=index)
    df_epochs = df.groupby('epoch').mean()

    try:
        stages = files['stages']
        df_epochs['stage'] = stages
    except Exception as ex:
        print(ex)
        pass

    df_epochs = df.groupby('epoch').mean()

<h3><left>II. Model Selection & Training</left></h3>

In [None]:
clfSVC = LinearSVC(penalty='l2', loss='hinge', dual=True, tol=0.0001, C=10.0, multi_class='ovr', fit_intercept=True,
                   intercept_scaling=1, class_weight=None, verbose=1, random_state=42, max_iter=1000)

<h4><left>Cross Validation</left></h4>

In [None]:
X, y = df_epochs.values, rejects
tscv = TimeSeriesSplit(n_splits=3)
clf_score = []

for train_index, test_index in tscv.split(df_epochs):
    X_train, y_train = X[train_index], y[test_index]
    clfSVC.fit(X_train, y_train)
    clfSVC.predict(X_train)
    clf_score += clfSVC.score(X_train, y_train)

<h3><left>III. Model Testing & Evaluation</left></h3>

- **Precision** _(how many selected items are relevant?)_:

$$\frac{\text{True Positives}}{\text{Trust Positives + False Positives}}$$

- **Recall** _(how many relevant items are selected?)_:

$$\frac{\text{True Positives}}{\text{Trust Positives + False Negatives}}$$

- **F1 Score** _(weighted average of Precision and Recall)_:

$$\frac{2 \cdot (\text{Recall} \cdot \text{Precision})}{\text{Recall} + \text{Precision}}$$

In [None]:
# Model testing
for sub__ in x_test:
    file_path = sub__['epoch']
    mat_reject = sub__['reject']
    mat_stage = sub__['stage']

    files = load_subject_dir(file_path, mat_reject, mat_stage)
    epochs = files['epochs']
    rejects = files['rejects']

    # Clean data
    index, scaling_time, scalings = ['epoch', 'time'], 1e3, dict(grad=1e13)
    df = epochs.to_data_frame(
        picks=None, scalings=scalings, scaling_time=scaling_time, index=index)
    df_epochs = df.groupby('epoch').mean()

    try:
        stages = files['stages']
        df_epochs['stage'] = stages
    except Exception as ex:
        print(ex)
        pass

    df_epochs = df.groupby('epoch').mean()
    X, y = df_epochs.values, rejects
    X, y_true = X, y
    y_pred = clfSVC.predict(X)

    print("\tRecall: %1.3f" % recall_score(y_true, y_pred))
    print("\tF1: %1.3f\n" % f1_score(y_true, y_pred))