In [1]:
import pandas as pd
import os, time, glob, sys
from collections import Counter
import re
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# 경로 내 모든 response data 파일 처리
path = 'C:/Users/USER/Downloads/eeg-fnirs-face-word-eeg/response-data/'
fnames = os.listdir(path)
total_df = pd.DataFrame()

#각 피험자별 데이터 반복 처리
for fname in fnames:   
    # 데이터 파일에서 필요한 열만 선택
    df = pd.read_csv(path+fname).iloc[:,:7]
    df.columns = ["time", "type", "component", "response", "type_stim",'scroll','stimuli']
    
    # 자극(stimuli) 행만 추출; 이미지 자극(.png) 포함된 행으로 필터링
    stim_df = df[df['stimuli'].str.contains('.png', na=False)].copy().reset_index().drop(['index'],axis=1)
    # 자극 파일명에서 자극 코드 추출
    stim_df['filename'] = stim_df['stimuli'].str.extract(r"([a-z]+-[a-z]+-\d+\.png)")

    # congruency (자극의 정합성 조건) 추출
    stim_df['congruency'] = stim_df['filename'].str.extract(r'^([a-z]+)-')

    # emotion 추출
    stim_df['emotion'] = stim_df['filename'].str.extract(r'-([a-z]+)-\d+\.png')[0]
    stim_df['emotion'] = stim_df['emotion'].map({'ha': 'happy', 'sa': 'sad'})

    # response 데이터에서 response 라벨링 (이미 scroll='down' 조건 반영됨)
    resp_df = df[df['component'].str.contains('Mouse:', na=False)].copy()
    resp_df = resp_df[resp_df['scroll'].str.contains('down', na=False)].copy()
    resp_df = resp_df[resp_df['response'].isin(['Right', 'Left'])].copy()
    resp_df['response_label'] = resp_df['response'].map({'Right': 'incong', 'Left': 'cong'})

    # stim과 resp를 시간 및 정답여부로 1:1 매칭
    responses, rts, corrects = [], [], []
    resp_idx = 0

    for _, stim_row in stim_df.iterrows():
        stim_time = stim_row['time']
        
        # stim 이후의 첫 응답 찾기
        while resp_idx < len(resp_df) and resp_df.iloc[resp_idx]['time'] <= stim_time:
            resp_idx += 1
        
        if resp_idx < len(resp_df):
            resp_row = resp_df.iloc[resp_idx]
            response = resp_row['response']
            rt = resp_row['time'] - stim_time
            # 정답 여부 판단 (위 조건대로)
            if stim_row['congruency'] == 'cong' and response == 'Left':
                correct = True
            elif stim_row['congruency'] == 'incong' and response == 'Right':
                correct = True
            else:
                correct = False

            responses.append(response)
            rts.append(rt)
            corrects.append(correct)
            resp_idx += 1
        else:
            responses.append(None)
            rts.append(None)
            corrects.append(None)

    # 6. 결과 열 추가
    stim_df['response'] = responses
    stim_df['response_time'] = rts
    stim_df['correct'] = corrects

    sub_name = fname.split('_')[0]
    sub_group = pd.DataFrame([sub_name for i in range(len(stim_df))],columns=['subject'])
    stim_df['subject'] = sub_group
    total_df = pd.concat([total_df,stim_df],axis=0)
total_df = total_df.reset_index().drop(['index'],axis=1)

In [48]:
total_df.isnull().sum()

time             0
type             0
component        0
response         0
type_stim        0
scroll           0
stimuli          0
filename         0
congruency       0
emotion          0
response_time    0
correct          0
subject          0
dtype: int64

In [49]:
list(total_df['subject'].unique())

['cey', 'kjy', 'lyj']

In [50]:
total_df.to_csv('C:/Users/USER/Downloads/eeg-fnirs-face-word-eeg/response-data/total_response_data.csv')

# RT data analysis

In [12]:
response_df = pd.read_csv('C:/Users/USER/Downloads/eeg-fnirs-face-word-eeg/response-data/total_response_data.csv').drop(['Unnamed: 0'],axis=1)
print(response_df.head())
# 중복된 행 개수
num_duplicates = response_df.duplicated().sum()
print(f"중복 행 개수: {num_duplicates}")

      time type component response type_stim scroll  \
0  63.5401  EXP   unnamed     Left     image      =   
1  70.4303  EXP   unnamed    Right     image      =   
2  76.1352  EXP   unnamed    Right     image      =   
3  81.1578  EXP   unnamed    Right     image      =   
4  85.9116  EXP   unnamed     Left     image      =   

                                             stimuli          filename  \
0    'stimuli-ha-sa\\incong-cong-HA\\cong-ha-39.png'    cong-ha-39.png   
1  'stimuli-ha-sa\\incong-cong-HA\\incong-ha-19.png'  incong-ha-19.png   
2  'stimuli-ha-sa\\incong-cong-HA\\incong-ha-23.png'  incong-ha-23.png   
3  'stimuli-ha-sa\\incong-cong-SA\\incong-sa-19.png'  incong-sa-19.png   
4    'stimuli-ha-sa\\incong-cong-SA\\cong-sa-46.png'    cong-sa-46.png   

  congruency emotion  response_time  correct subject  
0       cong   happy         3.5813     True     cey  
1     incong   happy         1.4268     True     cey  
2     incong   happy         0.9624     True     cey  
3   

In [7]:
# 전체 응답 수와 정답 수 함께
summary = response_df.groupby('subject')['correct'].agg(['sum', 'count'])
summary.columns = ['n_correct', 'n_total']
summary['accuracy'] = summary['n_correct'] / summary['n_total']
print(summary)


         n_correct  n_total  accuracy
subject                              
cey            107      107  1.000000
kjy             97      107  0.906542
lyj            103      107  0.962617


In [16]:
# 피험자별 평균 반응시간
rt_summary = response_df.groupby('subject')['response_time'].mean()
print(rt_summary)
rt_correct_only = response_df[response_df['correct'] == True].groupby('subject')['response_time'].mean()
print(rt_summary)
rt_correct_only = response_df[response_df['correct'] == False].groupby('subject')['response_time'].mean()
print(rt_summary)

subject
cey    1.417844
kjy    1.547814
lyj    1.504321
Name: response_time, dtype: float64
subject
cey    1.417844
kjy    1.547814
lyj    1.504321
Name: response_time, dtype: float64
subject
cey    1.417844
kjy    1.547814
lyj    1.504321
Name: response_time, dtype: float64


In [17]:
cong_acc = response_df.groupby(['subject', 'congruency'])['correct'].mean().unstack()
print(cong_acc)

congruency      cong    incong
subject                       
cey         1.000000  1.000000
kjy         0.830189  0.981481
lyj         0.947368  0.980000


In [18]:
# 피험자별 happy/sad trial 수
trial_counts = response_df.groupby(['subject', 'emotion']).size().unstack(fill_value=0)
print(trial_counts)

emotion  happy  sad
subject            
cey         52   55
kjy         56   51
lyj         50   57


In [19]:
# 개수 + 정답 수 + 정확도까지 정리
summary = response_df.groupby(['subject', 'emotion'])['correct'].agg(['count', 'sum', 'mean'])
summary.columns = ['n_trials', 'n_correct', 'accuracy']
print(summary)


                 n_trials  n_correct  accuracy
subject emotion                               
cey     happy          52         52  1.000000
        sad            55         55  1.000000
kjy     happy          56         51  0.910714
        sad            51         46  0.901961
lyj     happy          50         48  0.960000
        sad            57         55  0.964912
