In [1]:
import pandas as pd
import os
import numpy as np
from scipy import signal
from util_eprime_convert import _text_to_df

def filter_signal(dat:np.ndarray, 
                  band=[1,40], 
                  sampling_rate:int=500):
    # data: 1D array
    [b, a] = signal.iirnotch(60, 30, sampling_rate) # power noise
    dat = signal.filtfilt(b, a, dat)
    [b, a] = signal.butter(3, band, 'bandpass', fs=sampling_rate)
    dat = signal.filtfilt(b, a, dat)
    # dat =  np.convolve(dat, np.ones(sampling_rate//20) / (sampling_rate//20), mode='same')
    return dat

def preprocess_d2_data(path, num_trials:int = 96, overlap:bool = True):
    r'''
    preprocessing for d2 data (compute CONC + segmentation)

    Args:
        path (str): path for the d2 original data.
        fs (int): target sampling frequency
        num_trials (int): number of trials for segmentation and ONC computation
        overlap (bool): True - half of num_trials are used for overlap, 1 - no overlap
    '''
    ROOT_PATH = path 
    fs_origin = 500
    fs = 125
    num_total_trials = 1320

    subj_list = os.listdir(ROOT_PATH)
    for idx, subj in enumerate(subj_list):
        dat_list = os.listdir(ROOT_PATH+subj+'/d2/')
        save_path = f'./d2_data/S{idx}/'
        os.makedirs(save_path, exist_ok=True)
        num1, num2 = (2, 5) if len(dat_list) < 7 else (3, 7)

        easy_ = np.asarray(pd.read_csv(ROOT_PATH + subj + '/d2/' + dat_list[num1], delimiter='\t')).T
        easy = easy_[:,0::2]
        event = np.append(np.where(easy[8]==4)[0], np.where(easy_[8,1::2]==4)[0])
        event.sort()
        event = np.append(event, np.where(easy_[8]==5)[0][-1]//2)//4
        end = easy.shape[1]//fs_origin
        easy = easy[:8,:end*fs_origin]
        easy = filter_signal(easy)
        easy = signal.resample(easy, fs*end, axis=1)

        # compute CONC
        txt = _text_to_df(ROOT_PATH + subj + '/d2/' + dat_list[num2])
        txt = np.asarray(txt.loc[:,['CorrectKey', 'd2test.RESP']])[:-1]
        conc = []
        num_overlap = num_trials//2 if overlap else num_trials
        for seg, i in enumerate(range(0, num_total_trials-num_trials, num_overlap)):
            # if seg < 26:
            tmp = txt[i:i+num_trials]
            # conc.append((tmp[(tmp[:,0]==tmp[:,1]) & (tmp[:,0] == '1')].shape[0] - np.sum(tmp[:,0]!=tmp[:,1])) / num_trials)
            conc.append(np.sum(tmp[:,0]!=tmp[:,1]))
            # ttt = easy[:,event[i]:event[i+num_trials//2]]
            # np.save(save_path + f'segment{seg}.npy', easy[:,event[i]:event[i+num_trials]])
        np.save(save_path + 'conc.npy',np.array(conc,float))
        

preprocess_d2_data(r'D:/One_한양대학교/private object minsu/coding/data/samsung_2024/Day 2/', 48, False)

In [3]:
conc = np.load('./d2_data/S0/conc.npy')
# segment0 = np.load('./d2_data/S0/segment1.npy')

In [1]:
"""
PATH: d2_data/S{subject_num}/
    - conc.npy: size = (26,), CONC values of 96 trials with 50 % overlap
    - segment{num}.npy: size = (8,x) where x = length of 96 trials
        - 8 electrode channels: Fp(AF7, FPZ, AF8), Central (C3, CZ, C4), Ear (Left, Right) 
        - 125 Hz downsampled, 1-40 Hz bandpass filtered, 60 Hz notch filtered
    - There are a total of 26 segments corresponding to each of the 26 CONC values.
TODO
    - Divide each segments by an epoch (you can choose your own epoch size). 
    - Labels for those epochs are corresponding CONC value.
    - You can determine threshold values for CONC values to make it binaray classification task.
""" 

'\nPATH: d2_data/S{subject_num}/\n    - conc.npy: size = (26,), CONC values of 96 trials with 50 % overlap\n    - segment{num}.npy: size = (8,x) where x = length of 96 trials\n        - 8 electrode channels: Fp(AF7, FPZ, AF8), Central (C3, CZ, C4), Ear (Left, Right) \n        - 125 Hz downsampled, 1-40 Hz bandpass filtered, 60 Hz notch filtered\n    - There are a total of 26 segments corresponding to each of the 26 CONC values.\nTODO\n    - Divide each segments by an epoch (you can choose your own epoch size). \n    - Labels for those epochs are corresponding CONC value.\n    - You can determine threshold values for CONC values to make it binaray classification task.\n'