For each subject, the DE and PSD feature is a 4-dimensional matrix of VideoNum _ ElecNum _ TrialDur\ \* FreqBand.

-   VideoNum:
    -   negative 1-12
    -   neutral 13-16
    -   positive 17-28
-   ElecNum: 32 electrodes
-   TrialDur: 30s
-   FreqBand: delta, theta, alpha, beta and gamma


In [1]:
import pandas as pd
import pickle
import numpy as np
import os

# https://www.youtube.com/watch?v=Bmt89hHyxuM
# https://www.youtube.com/watch?v=x-jk8qDaTsc

DIR_PATH = "data/raw/synapse/EEG_Features/EEG_Features/PSD/"


def get_subject_data(path: str) -> np.ndarray:
    with open(path, "rb") as f:
        data = pickle.load(f)
        assert isinstance(data, np.ndarray)
        return data

In [2]:
def get_subjects(dir):
    for f in os.listdir(dir):
        if f.endswith(".pkl"):
            yield get_subject_data(os.path.join(dir, f))

In [3]:
import numpy as np
import random


def compute_rms_per_video(psd_matrix, video_indices, band_indices):
    selected_psd = psd_matrix[video_indices, :, :, band_indices]
    rms_values = np.sqrt(
        np.mean(
            selected_psd**2,
            axis=(
                1,  # elec num
                2,  # duration
            ),
        )
    )
    return rms_values


def get_waves_by_subject(path):
    for subject in get_subjects(path):
        yield compute_rms_per_video(
            subject,
            random.randint(0, 27),  # random video
            [2, 3],  # alpha and beta
        )

In [4]:
import pandas as pd

column_sets = [
    ["neo_alpha", "neo_beta"],
    ["modern_alpha", "modern_beta"],
    ["tech_alpha", "tech_beta"],
    ["brutal_alpha", "brutal_beta"],
    ["bio_alpha", "bio_beta"],
    ["park_alpha", "park_beta"],
]

df_list = [
    pd.DataFrame(get_waves_by_subject(DIR_PATH), columns=columns)
    for columns in column_sets
]
df = pd.concat(df_list, axis=1)

df

Unnamed: 0,neo_alpha,neo_beta,modern_alpha,modern_beta,tech_alpha,tech_beta,brutal_alpha,brutal_beta,bio_alpha,bio_beta,park_alpha,park_beta
0,1.176962e-06,3.405693e-07,1.611911e-06,3.130609e-07,1.486361e-06,3.890940e-07,1.593370e-06,4.070130e-07,1.506408e-06,4.092633e-07,1.135341e-06,4.789880e-07
1,5.412308e-07,2.819967e-07,1.025812e-06,2.931775e-07,8.747063e-07,2.723659e-07,7.013455e-07,2.555077e-07,5.891026e-07,2.313443e-07,5.312480e-07,1.922545e-07
2,8.493576e-07,4.309609e-07,1.551664e-06,7.672581e-07,1.551664e-06,7.672581e-07,9.820764e-07,3.083286e-07,9.209090e-07,5.204173e-07,1.229376e-06,5.241795e-07
3,8.790391e-07,4.042979e-07,7.212114e-07,5.264786e-07,7.559133e-07,4.030793e-07,7.559133e-07,4.030793e-07,1.012030e-06,6.897990e-07,8.453494e-07,3.573112e-07
4,1.126781e-06,3.130186e-07,7.274571e-07,2.256217e-07,7.031826e-07,2.893262e-07,9.289262e-07,4.947304e-07,1.166122e-06,4.986102e-07,1.297219e-06,3.775486e-07
...,...,...,...,...,...,...,...,...,...,...,...,...
118,1.360272e-06,3.446002e-07,9.484516e-07,5.581920e-07,1.600921e-06,5.139729e-07,1.340577e-06,4.039993e-07,1.221537e-06,6.342885e-07,1.251112e-06,3.926074e-07
119,1.042708e-06,3.003167e-07,1.207726e-06,5.434621e-07,1.366101e-06,6.717767e-07,9.942229e-07,3.023960e-07,1.366377e-06,5.980510e-07,1.042708e-06,3.003167e-07
120,9.635022e-07,8.041358e-07,8.386717e-07,4.912636e-07,7.499176e-07,3.433120e-07,7.722135e-07,5.674426e-07,8.263021e-07,6.537708e-07,7.997910e-07,7.773748e-07
121,1.869571e-06,4.935757e-07,1.815276e-06,5.241075e-07,1.256483e-06,3.532194e-07,1.381162e-06,4.704950e-07,1.858352e-06,5.146385e-07,1.568232e-06,5.235078e-07


In [7]:
df.to_csv("data/processed/alpha_beta.csv", index=False)