In [1]:
import pandas as pd
from scipy import stats
import glob
import os
import numpy as np

### Import Raw Data .csv 

In [None]:
def load_csv_data_from_directory(directory_path):
    csv_files = glob.glob(os.path.join(directory_path, '*.csv'))
    print(f'Found {len(csv_files)} CSV files.') 
    dataframes_list = []
    
    for csv_file in csv_files:
        print(f'Reading {csv_file}...') 
        df = pd.read_csv(csv_file)
        dataframes_list.append(df)
    
    if dataframes_list:
        combined_dataframe = pd.concat(dataframes_list, ignore_index=True)
        return combined_dataframe
    else:
        print("No CSV files found in the directory.")
        return pd.DataFrame() 

In [None]:
import os
import pandas as pd

def load_csv_files(path):
    files = os.listdir(path)
    dataframes = {}
    for file in files:
        if file.endswith('.csv') and file.startswith('nback'):
            df = pd.read_csv(os.path.join(path, file))
            key = os.path.splitext(file)[0]
            dataframes[key] = df
    return dataframes

path = "ParticipantData"
csv_dataframes = load_csv_files(path)

nback_dfs = {}
nback_dataframekeys = [key for key in csv_dataframes.keys() if key.startswith('nback')]
nback_dataframekeys.sort()

for key in nback_dataframekeys:
    nback_dfs[key] = csv_dataframes[key]
print("nback dataframe keys:", nback_dataframekeys)

nback dataframe keys: ['nback10_untitled_2023-12-07_19h15.11.770', 'nback15_untitled_2023-12-11_14h03.28.052', 'nback16_untitled_2023-12-11_16h21.30.750', 'nback20_untitled_2023-12-11_18h21.31.407', 'nback21_untitled_2023-12-12_11h18.41.563', 'nback22_untitled_2023-12-12_14h54.13.735', 'nback23_untitled_2023-12-12_17h51.46.199', 'nback24_untitled_2023-12-13_13h29.05.845', 'nback25_untitled_2023-12-13_18h22.37.815', 'nback26_untitled_2023-12-14_14h15.22.840', 'nback27_untitled_2023-12-14_16h05.24.918', 'nback28_untitled_2023-12-14_17h46.02.251', 'nback29_untitled_2023-12-15_09h57.44.101', 'nback2_untitled_2023-12-07_12h20.10.465', 'nback30_untitled_2023-12-15_13h21.04.144', 'nback31_untitled_2023-12-15_15h56.47.539', 'nback32_untitled_2023-12-18_09h49.59.030', 'nback33_untitled_2023-12-18_12h03.06.949', 'nback34_untitled_2023-12-18_13h55.48.507', 'nback35_untitled_2023-12-18_15h48.45.427', 'nback36_untitled_2023-12-18_18h19.26.486', 'nback37_untitled_2023-12-19_09h44.14.666', 'nback38_u

### N-back Task Features

In [None]:
nback_dfs = {}

nback_dataframekeys = [key for key in csv_dataframes.keys() if key.startswith('nback')]
nback_dataframekeys.sort()
psychopydf = csv_dataframes[nback_dataframekeys[0]]

nback_dfs = {}

for key in csv_dataframes.keys():
    if key.startswith('nback'):
        nback_dfs[key] = csv_dataframes[key]

In [None]:
nback_participants_data = {}

participants_indexes = []
# Fetch the digits from the dataframe keys, those are the first digits after the word 'nback' and before the first underscore
for key in nback_dfs.keys():
    participants_indexes.append(int(key[5:].split('_')[0]))
participants_indexes.sort()

for idx in participants_indexes:
    key = [key for key in nback_dfs.keys() if key.startswith(f'nback{idx}_')][0]
    psychopydf = nback_dfs[key]
    
    if 'response_0.rt' not in psychopydf.columns:
        psychopydf['response_0.rt'] = np.nan

    psychopydf_selectedcols = psychopydf[['response_0.keys', 'response_0.corr', 'response_1.keys', 'response_1.corr', 'response_2.keys', 'response_2.corr',
                                          'response_0.rt', 'response_1.rt', 'response_2.rt', 'corrAns', 'trials_0.thisN', 'trials_1.thisN', 'trials_2.thisN']]

    participant_data = {
        'hits': [],
        'hit_rate': [],
        'correct_rejections': [],
        'rt_target': [],
        'rt_non_target': [],
        'composite_score': [],
    }

    for i in range(3):  # For nback 0, nback 1, nback 2
        trials_col = f'trials_{i}.thisN'
        response_col = f'response_{i}.'
        
        # Calculate Hits and Hit Rate
        total_targets = len(psychopydf_selectedcols[(psychopydf_selectedcols[trials_col].notna()) & (psychopydf_selectedcols['corrAns'] != 'None')])
        hits = len(psychopydf_selectedcols[(psychopydf_selectedcols[trials_col].notna()) & (psychopydf_selectedcols['corrAns'] != 'None') & (psychopydf_selectedcols[f'{response_col}corr'] == 1)])
        hit_rate = (hits / total_targets * 100) if total_targets > 0 else 0

        # Calculate Correct Rejections
        total_non_targets = len(psychopydf_selectedcols[(psychopydf_selectedcols[trials_col].notna()) & (psychopydf_selectedcols['corrAns'] == 'None')])
        correct_rejections = len(psychopydf_selectedcols[(psychopydf_selectedcols[trials_col].notna()) & (psychopydf_selectedcols['corrAns'] == 'None') & (psychopydf_selectedcols[f'{response_col}corr'] == 1)])
        correct_rejections_percentage = (correct_rejections / total_non_targets) * 100 if total_non_targets > 0 else 0

        # Calculate RT Target
        rt_target = psychopydf_selectedcols[(psychopydf_selectedcols[trials_col].notna()) & (psychopydf_selectedcols['corrAns'] != 'None') & (psychopydf_selectedcols[f'{response_col}corr'] == 1)][f'{response_col}rt'].mean() * 1000

        # Calculate RT Non-Target
        rt_non_target = psychopydf_selectedcols[(psychopydf_selectedcols[trials_col].notna()) & (psychopydf_selectedcols['corrAns'] == 'None') & (psychopydf_selectedcols[f'{response_col}corr'] == 0)][f'{response_col}rt'].mean() * 1000

        # Calculate Composite Score
        composite_score = ((hit_rate / 100) / rt_target) * 100 if rt_target > 0 else 0

        participant_data['hits'].append(hits)
        participant_data['hit_rate'].append(hit_rate)
        participant_data['correct_rejections'].append(correct_rejections_percentage)
        participant_data['rt_target'].append(rt_target)
        participant_data['rt_non_target'].append(rt_non_target)
        participant_data['composite_score'].append(composite_score)
    
    nback_participants_data[idx] = participant_data
    
print(nback_participants_data)

{2: {'hits': [8, 9, 7], 'hit_rate': [100.0, 90.0, 87.5], 'correct_rejections': [100.0, 100.0, 93.75], 'rt_target': [429.662737500621, 468.9398778007469, 395.24815713853707], 'rt_non_target': [nan, nan, 1204.8808499821462], 'composite_score': [0.23274068536105127, 0.191922257544156, 0.2213799063187806]}, 3: {'hits': [8, 10, 8], 'hit_rate': [100.0, 100.0, 100.0], 'correct_rejections': [100.0, 100.0, 96.875], 'rt_target': [432.0889249938773, 444.56208002520725, 543.1930249906145], 'rt_non_target': [nan, nan, 2013.5157000040635], 'composite_score': [0.2314338420069827, 0.2249404627455627, 0.18409662016872885]}, 4: {'hits': [8, 4, 8], 'hit_rate': [100.0, 40.0, 100.0], 'correct_rejections': [100.0, 100.0, 96.875], 'rt_target': [404.70643748994917, 649.7102749708574, 789.0615499636624], 'rt_non_target': [nan, nan, 1578.8971000583842], 'composite_score': [0.2470926843175888, 0.06156590335868429, 0.12673282585446619]}, 10: {'hits': [8, 10, 8], 'hit_rate': [100.0, 100.0, 100.0], 'correct_rejecti

In [None]:
participants_nback_data_per_trial = {}

for idx in participants_indexes:
    key = [key for key in nback_dfs.keys() if key.startswith(f'nback{idx}_')][0]
    psychopydf = nback_dfs[key]
    
    participant_data = {
        '0_correct': [],
        '1_correct': [],
        '2_correct': [],
        '0_key': [],
        '1_key': [],
        '2_key': [],
        '0_rt': [],
        '1_rt': [],
        '2_rt': []
    }

    for i in range(3):  # For nback 0, nback 1, nback 2
        # Select the rows for the current nback
        if i == 0:
            psychopydf_selectedcols = psychopydf.iloc[1:41]
        elif i == 1:
            psychopydf_selectedcols = psychopydf.iloc[42:82]
        else:  # i == 2
            psychopydf_selectedcols = psychopydf.iloc[83:123]

        correct = psychopydf_selectedcols[f'response_{i}.corr']
        key = psychopydf_selectedcols[f'response_{i}.keys']
        rt = psychopydf_selectedcols[f'response_{i}.rt']
        
        participant_data[f'{i}_correct'] = correct.tolist()
        participant_data[f'{i}_key'] = key.tolist()
        participant_data[f'{i}_rt'] = rt.tolist()
        
    participants_nback_data_per_trial[idx] = participant_data

### Mental Rotation Test (MRT) Features

In [None]:
import pandas as pd

mrt_df = pd.read_csv('ParticipantData/MRT_spatial_ability.csv')
mrt_median = mrt_df['MRTScore'].median()
mrt_df['MRTGroup'] = 'Low'

# Get the indices of the participants at the median
indices_at_median = mrt_df[mrt_df['MRTScore'] == mrt_median].index

# Calculate the index that splits the participants at the median in half
split_index = len(indices_at_median) // 2

# Assign 'High' to the top 50% of participants
mrt_df.loc[mrt_df['MRTScore'].sort_values(ascending=False).index[:len(mrt_df)//2], 'MRTGroup'] = 'High'

mrt_participants_data = {}

for idx in participants_indexes:
    # Fetch the data for the participant and reset the index
    mrt_participant_data = mrt_df[mrt_df['ParticipantID'] == idx].reset_index(drop=True)
    
    if not mrt_participant_data.empty:
        mrt_participants_data[idx] = {
            'MRTScore': mrt_participant_data['MRTScore'][0],
            'MRTAcc': mrt_participant_data['MRTAcc'][0],
            'MRTGroup': mrt_participant_data['MRTGroup'][0]
        }
    else:
        print(f"No data found for participant {idx}")
        
# Count the number of 'Low' and 'High' values in the 'MRTGroup' column
mrt_group_counts = mrt_df['MRTGroup'].value_counts()

print(mrt_group_counts)

Low     30
High    30
Name: MRTGroup, dtype: int64


### Extract N-back and MRT Features into Dataframe for .csv

In [None]:
import numpy as np
import pandas as pd

# filepath: vscode-notebook-cell:/c%3A/Users/Mobile%20Workstation%203/Documents/GitHub/VR_data_analysis/pre-post_test_analysis.ipynb#X14sZmlsZQ%3D%3D
data = []

for participant_id in participants_indexes:
    participant_nback_data = nback_participants_data[participant_id]
    mrt_participant_data = mrt_participants_data[participant_id]

    for subexperiment in range(1, 9):
        participant_data = {
            'participant_number': participant_id,
            'subexperiment_number': subexperiment,
            'MRTScore': mrt_participant_data['MRTScore'],
            'MRTAcc': mrt_participant_data['MRTAcc'],
            'MRTGroup': mrt_participant_data['MRTGroup'],
            'nback_hits_0': participant_nback_data['hits'][0],
            'nback_hit_rate_0': participant_nback_data['hit_rate'][0],
            'nback_correct_rejections_0': participant_nback_data['correct_rejections'][0],
            'nback_rt_target_0': participant_nback_data['rt_target'][0],
            'nback_rt_nontarget_0': participant_nback_data['rt_non_target'][0],
            'nback_composite_0': participant_nback_data['composite_score'][0],
            'nback_hits_1': participant_nback_data['hits'][1],
            'nback_hit_rate_1': participant_nback_data['hit_rate'][1],
            'nback_correct_rejections_1': participant_nback_data['correct_rejections'][1],
            'nback_rt_target_1': participant_nback_data['rt_target'][1],
            'nback_rt_nontarget_1': participant_nback_data['rt_non_target'][1],
            'nback_composite_1': participant_nback_data['composite_score'][1],
            'nback_hits_2': participant_nback_data['hits'][2],
            'nback_hit_rate_2': participant_nback_data['hit_rate'][2],
            'nback_correct_rejections_2': participant_nback_data['correct_rejections'][2],
            'nback_rt_target_2': participant_nback_data['rt_target'][2],
            'nback_rt_nontarget_2': participant_nback_data['rt_non_target'][2],
            'nback_composite_2': participant_nback_data['composite_score'][2],
        }

        nback_hits_avg = np.mean([participant_nback_data['hits'][i] for i in range(3)])
        nback_hit_rate_avg = np.mean([participant_nback_data['hit_rate'][i] for i in range(3)])
        nback_rt_target_avg = np.mean([participant_nback_data['rt_target'][i] for i in range(3)])
        nback_rt_non_target_avg = np.mean([participant_nback_data['rt_non_target'][i] for i in range(3)])
        nback_composite_avg = np.mean([participant_nback_data['composite_score'][i] for i in range(3)])

        participant_data['nback_hits_avg'] = nback_hits_avg
        participant_data['nback_hit_rate_avg'] = nback_hit_rate_avg
        participant_data['nback_rt_target_avg'] = nback_rt_target_avg
        participant_data['nback_rt_non_target_avg'] = nback_rt_non_target_avg
        participant_data['nback_composite_avg'] = nback_composite_avg
        
        data.append(participant_data)

pre_post_test_analysis_df = pd.DataFrame(data)

print(pre_post_test_analysis_df)

     participant_number  subexperiment_number  MRTScore  MRTAcc MRTGroup  \
0                     2                     1         0    0.00      Low   
1                     2                     2         0    0.00      Low   
2                     2                     3         0    0.00      Low   
3                     2                     4         0    0.00      Low   
4                     2                     5         0    0.00      Low   
..                  ...                   ...       ...     ...      ...   
475                  81                     4         4   16.67      Low   
476                  81                     5         4   16.67      Low   
477                  81                     6         4   16.67      Low   
478                  81                     7         4   16.67      Low   
479                  81                     8         4   16.67      Low   

     nback_hits_0  nback_hit_rate_0  nback_correct_rejections_0  \
0               8   

### Output Dataframe into .csv

In [None]:
pre_post_test_analysis_df.to_csv('pre_post_test_analysis.csv', index=False)