# Calculation of Behavioral Parameters

In [22]:
import pandas as pd
import numpy as np
from scipy.stats import binomtest
from openpyxl import load_workbook

Behavioral parameters: 
* Response Time (RT);
* Performance Accuracy;
* Memory Capacity;

These parameters are evaluated for the right visual field (RVF) and left visual field (LVF). 
Initially, Hit, Miss, False Alarm, and Correct Rejection are determined, followed by the calculation of the behavioral parameters.

## **Import and Inspect the Data**

In [23]:
# Import raw data
data_raw = pd.read_excel("raw_data/Data_behav_metric_DA.xlsx", usecols = "A:G")

In [24]:
# Replace RT < 150 ms, but not when RT = 0 with NaN values
data_raw.loc[(data_raw['MemoryTest.RT'] < 150) & (data_raw['MemoryTest.RT'] > 0), 'MemoryTest.RT'] = np.nan

In [25]:
# Replace values and names with English ones

data_raw.rename(columns = {'StimKiek': 'Stimulus Count', 'Kryptis': 'Visual Field'}, inplace = True)

data_raw['Visual Field'] = data_raw['Visual Field'].replace({'Dešinė': 'R', 'Kairė': 'L'})

In [26]:
data_raw.head()

Unnamed: 0,Subject,MemoryTest.ACC,MemoryTest.CRESP,MemoryTest.RESP,MemoryTest.RT,Stimulus Count,Visual Field
0,2,1,5,5.0,703.0,3,R
1,2,1,5,5.0,723.0,4,R
2,2,0,4,5.0,783.0,3,R
3,2,1,5,5.0,722.0,3,L
4,2,1,5,5.0,582.0,3,L


## **Hit, Miss, False Alarm, Correct Rejection, Performance Accuracy, Memory Capacity** 

* **ACC:** indicates correct/incorrect response (0 or 1)
* **CRESP:** marks correct answer 
* **RESP:** participant response
* **RT:** response time

---

Hit, Miss, False Alarm, and Correct Rejection of each subject is calculated in each condition (3 or 4 stimuli in left or right visual field - 3L, 3R, 4L, 4R)

**Calculations:**

* **Hit:** MemoryTest.CRESP = 4 + MemoryTest.RESP = 4
* **Miss:** MemoryTest.CRESP = 4 + MemoryTest.RESP = 5
* **False alarm:** MemoryTest.CRESP = 5 + MemoryTest.RESP = 4
* **Correct rejection:** MemoryTest.CRESP = 5 + MemoryTest.RESP = 5

* **Percentage** = (Hit\Miss\False alarm\Correct rejection) / n
  * Where n is repetition count for condition:
    * n = 60, when calculating RVF/LVF or 3/4 stimuli. Calculation is 240 / 2 (one of the conditions) / 2 (change did / did not happen)
    * n = 30, when calculating each of the conditions: 3R, 3L, 4R, 4L

* **Performance accuracy (%, ACC)** = (((p(hit) + p(correct rejection)) / 2) – p(false alarm)) × 100%
* **Memory capacity** = (((p(hit) - p(false alarm)) / (1 – p(false alarm))) × set size

For RVF / LVF:
  * **Performance** -> (performance_3_x + performance_4_x) / 2
  * **Capacity** -> (capacity_3_x + capacity_4_x) / 2


In [27]:
import pandas as pd
import numpy as np

# Define the calculation function
def calculate_metrics(df):
    """
    This function processes raw behavioral data from visual working memory (VWM) tasks.
    It calculates the rates of Hit, Miss, False Alarm, Correct Rejection, Task Performance,
    and Memory Capacity for all unique conditions specified in the DataFrame.

    - df (DataFrame): The raw behavioral data.

    Returns a DataFrame containing the computed metrics for each subject under all conditions.
    """
    
    # DataFrame to store all metrics
    all_metrics = pd.DataFrame()

    # Define the conditions
    conditions = [
        ('R', 3, '_3R', 30), ('L', 3, '_3L', 30), ('R', 4, '_4R', 30), ('L', 4, '_4L', 30),
        ('L', None, '_L', 60), ('R', None, '_R', 60), (None, 3, '_3', 60), (None, 4, '_4', 60)
    ]

    for visual_field, stimulus_count, condition_suffix, n in conditions:
        if visual_field is not None and stimulus_count is not None:
            filtered_df = df[(df['Visual Field'] == visual_field) & (df['Stimulus Count'] == stimulus_count)]
            set_size = stimulus_count
        elif visual_field is not None:
            filtered_df = df[df['Visual Field'] == visual_field]
            set_size = None  # Not used for combined visual field conditions
        elif stimulus_count is not None:
            filtered_df = df[df['Stimulus Count'] == stimulus_count]
            set_size = stimulus_count

        if not filtered_df.empty:
            # Calculate hits, misses, false alarms, and correct rejections
            hit = filtered_df[(filtered_df['MemoryTest.CRESP'] == 4) & (filtered_df['MemoryTest.RESP'] == 4)].groupby('Subject').size().reset_index(name=f'Hit{condition_suffix}')
            miss = filtered_df[(filtered_df['MemoryTest.CRESP'] == 4) & (filtered_df['MemoryTest.RESP'] == 5)].groupby('Subject').size().reset_index(name=f'Miss{condition_suffix}')
            false_alarm = filtered_df[(filtered_df['MemoryTest.CRESP'] == 5) & (filtered_df['MemoryTest.RESP'] == 4)].groupby('Subject').size().reset_index(name=f'False_Alarm{condition_suffix}')
            correct_rejection = filtered_df[(filtered_df['MemoryTest.CRESP'] == 5) & (filtered_df['MemoryTest.RESP'] == 5)].groupby('Subject').size().reset_index(name=f'Correct_Rejection{condition_suffix}')
        
            # Merge metrics into one dataframe
            metrics = pd.merge(hit, miss, on='Subject', how='outer')
            metrics = pd.merge(metrics, false_alarm, on='Subject', how='outer')
            metrics = pd.merge(metrics, correct_rejection, on='Subject', how='outer')
            metrics.fillna(0, inplace=True)

            # Calculate percentages
            for col in ['Hit', 'Miss', 'False_Alarm', 'Correct_Rejection']:
                col_name = f'{col}{condition_suffix}'
                metrics[f'P_{col_name}'] = metrics[col_name] / n

            # Calculate performance and memory capacity
            performance_col_name = f'Performance{condition_suffix}'
            if set_size is not None:
                memory_capacity_col_name = f'Memory_Capacity{condition_suffix}'
                metrics[performance_col_name] = (((metrics[f'P_Hit{condition_suffix}'] + metrics[f'P_Correct_Rejection{condition_suffix}']) / 2) - metrics[f'P_False_Alarm{condition_suffix}']) * 100
                metrics[memory_capacity_col_name] = (((metrics[f'P_Hit{condition_suffix}'] - metrics[f'P_False_Alarm{condition_suffix}']) / (1 - metrics[f'P_False_Alarm{condition_suffix}'])) * set_size)
            else:
                metrics[performance_col_name] = (((metrics[f'P_Hit{condition_suffix}'] + metrics[f'P_Correct_Rejection{condition_suffix}']) / 2) - metrics[f'P_False_Alarm{condition_suffix}']) * 100

            if all_metrics.empty:
                all_metrics = metrics
            else:
                all_metrics = pd.merge(all_metrics, metrics, on='Subject', how='outer')

    # Add combined metrics for visual field and stimulus count conditions
    def calculate_combined_metrics(metrics_df, visual_field=None, stimulus_count=None):
        if visual_field is not None:
            combined_suffix = f'_{visual_field}'
            performance_col_name = f'Performance{combined_suffix}'
            memory_capacity_col_name = f'Memory_Capacity{combined_suffix}'
            metrics_df[performance_col_name] = (metrics_df[f'Performance_3{visual_field}'] + metrics_df[f'Performance_4{visual_field}']) / 2
            metrics_df[memory_capacity_col_name] = (metrics_df[f'Memory_Capacity_3{visual_field}'] + metrics_df[f'Memory_Capacity_4{visual_field}']) / 2
        elif stimulus_count is not None:
            combined_suffix = f'_{stimulus_count}'
            performance_col_name = f'Performance{combined_suffix}'
            memory_capacity_col_name = f'Memory_Capacity{combined_suffix}'
            metrics_df[performance_col_name] = (metrics_df[f'Performance_{stimulus_count}L'] + metrics_df[f'Performance_{stimulus_count}R']) / 2
            metrics_df[memory_capacity_col_name] = (metrics_df[f'Memory_Capacity_{stimulus_count}L'] + metrics_df[f'Memory_Capacity_{stimulus_count}R']) / 2
        return metrics_df

    for vf in ['L', 'R']:
        all_metrics = calculate_combined_metrics(all_metrics, visual_field=vf)

    for sc in [3, 4]:
        all_metrics = calculate_combined_metrics(all_metrics, stimulus_count=sc)

    all_metrics.fillna(0, inplace=True)
    return all_metrics


In [28]:
all_metrics = calculate_metrics(data_raw)

In [29]:
all_metrics.head

<bound method NDFrame.head of      Subject  Hit_3R  Miss_3R  False_Alarm_3R  Correct_Rejection_3R  P_Hit_3R  \
0          1      19     11.0             3.0                    27  0.633333   
1          2      26      4.0             2.0                    28  0.866667   
2          3      22      8.0             1.0                    29  0.733333   
3          4      18      8.0             0.0                    28  0.600000   
4          5      22      8.0             0.0                    30  0.733333   
..       ...     ...      ...             ...                   ...       ...   
175      635      24      6.0             4.0                    26  0.800000   
176      636      26      4.0             2.0                    27  0.866667   
177      637      20     10.0             6.0                    24  0.666667   
178      638      24      6.0             4.0                    26  0.800000   
179      639      21      9.0             2.0                    28  0.700000  

In [24]:
# Write to Excel
all_metrics.to_excel("VWM_Perf_Cap.xlsx", index = False)

## **Response Time, Accuracy, Assymetry Indices, and Subject's Data rejection** 

In [30]:
# Filter out correct answers (RT < 150 ms is already filtered out)
correct_answers = data_raw[data_raw['MemoryTest.ACC'] == 1]

### Calculate Mean Values: RT, Accuracy, Performance, Memory Capacity

In [31]:
# Mean RT for each subject 
VWM_RT_ACC = correct_answers.groupby('Subject')['MemoryTest.RT'].mean().reset_index()
VWM_RT_ACC.rename(columns = {'MemoryTest.RT': 'Mean_RT'}, inplace = True)

# Mean accuracy
VWM_RT_ACC['Mean_ACC'] = correct_answers.groupby('Subject').size().div(240).reset_index(name = 'Mean_ACC')['Mean_ACC']

# Mean performance
VWM_RT_ACC = VWM_RT_ACC.merge(all_metrics[['Subject', 'Performance_L', 'Performance_R']], on = 'Subject', how = 'left')
VWM_RT_ACC['Mean_Performance'] = (VWM_RT_ACC['Performance_L'] + VWM_RT_ACC['Performance_R']) / 2

# Mean memory capacity
VWM_RT_ACC = VWM_RT_ACC.merge(all_metrics[['Subject', 'Memory_Capacity_L', 'Memory_Capacity_R']], on = 'Subject', how = 'left')
VWM_RT_ACC['Mean_Capacity'] = (VWM_RT_ACC['Memory_Capacity_L'] + VWM_RT_ACC['Memory_Capacity_R']) / 2

# Mean RT for 3 and 4 stimuli
rt_3 = correct_answers[correct_answers['Stimulus Count'] == 3].groupby('Subject')['MemoryTest.RT'].mean().reset_index(name = 'RT_3')
rt_4 = correct_answers[correct_answers['Stimulus Count'] == 4].groupby('Subject')['MemoryTest.RT'].mean().reset_index(name = 'RT_4')

VWM_RT_ACC = VWM_RT_ACC.merge(rt_3, on = 'Subject', how = 'left')
VWM_RT_ACC = VWM_RT_ACC.merge(rt_4, on = 'Subject', how = 'left')

# LVF and RVF: Mean RT, ACC, and Performance
for visual_field in ['L', 'R']:
    rt_vf = correct_answers[correct_answers['Visual Field'] == visual_field].groupby('Subject')['MemoryTest.RT'].mean().reset_index(name = f'{visual_field}VF_RT')
    acc_vf = correct_answers[correct_answers['Visual Field'] == visual_field].groupby('Subject').size().div(120).reset_index(name = f'{visual_field}VF_ACC')
    
    VWM_RT_ACC = VWM_RT_ACC.merge(rt_vf, on = 'Subject', how = 'left')
    VWM_RT_ACC = VWM_RT_ACC.merge(acc_vf, on = 'Subject', how = 'left')
    VWM_RT_ACC[f'{visual_field}VF_Performance'] = all_metrics[f'Performance_{visual_field}']

# LVF and RVF: Mean RT, ACC, and Performance for 3 and 4 stimuli
for visual_field in ['L', 'R']:
    for stim_count in [3, 4]:
        condition_rt = correct_answers[(correct_answers['Visual Field'] == visual_field) & (correct_answers['Stimulus Count'] == stim_count)].groupby('Subject')['MemoryTest.RT'].mean().reset_index(name = f'{visual_field}VF_RT_{stim_count}')
        condition_acc = correct_answers[(correct_answers['Visual Field'] == visual_field) & (correct_answers['Stimulus Count'] == stim_count)].groupby('Subject').size().div(60).reset_index(name = f'{visual_field}VF_ACC_{stim_count}')
        
        VWM_RT_ACC = VWM_RT_ACC.merge(condition_rt, on = 'Subject', how = 'left')
        VWM_RT_ACC = VWM_RT_ACC.merge(condition_acc, on = 'Subject', how = 'left')
        VWM_RT_ACC[f'{visual_field}VF_Performance_{stim_count}'] = all_metrics[f'Performance_{stim_count}{visual_field}']

In [32]:
VWM_RT_ACC

Unnamed: 0,Subject,Mean_RT,Mean_ACC,Performance_L,Performance_R,Mean_Performance,Memory_Capacity_L,Memory_Capacity_R,Mean_Capacity,RT_3,...,LVF_Performance_3,LVF_RT_4,LVF_ACC_4,LVF_Performance_4,RVF_RT_3,RVF_ACC_3,RVF_Performance_3,RVF_RT_4,RVF_ACC_4,RVF_Performance_4
0,1,920.752747,0.758333,75.833333,57.500000,66.666667,2.142857,1.722222,1.932540,874.666667,...,83.333333,973.600000,0.750000,68.333333,886.021739,0.766667,66.666667,970.658537,0.683333,48.333333
1,2,819.883178,0.891667,77.500000,85.833333,81.666667,3.116667,3.078818,3.097742,749.305556,...,90.000000,902.705882,0.850000,65.000000,783.648148,0.900000,83.333333,881.672727,0.916667,88.333333
2,3,929.744681,0.783333,75.000000,66.666667,70.833333,2.189655,2.206207,2.197931,833.644231,...,85.000000,1027.511628,0.716667,65.000000,839.490196,0.850000,81.666667,1070.975610,0.683333,51.666667
3,4,840.988439,0.720833,58.333333,60.833333,59.583333,1.904762,2.117391,2.011077,746.810526,...,75.000000,981.135135,0.616667,41.666667,737.065217,0.766667,76.666667,932.731707,0.683333,45.000000
4,5,742.099448,0.754167,46.666667,67.500000,57.083333,2.096154,2.266667,2.181410,709.940594,...,68.333333,846.948718,0.650000,25.000000,697.307692,0.866667,86.666667,721.585366,0.683333,48.333333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,635,959.151961,0.850000,73.333333,75.000000,74.166667,2.761494,2.868132,2.814813,854.679245,...,90.000000,1132.978261,0.766667,56.666667,869.740000,0.833333,70.000000,1018.346154,0.866667,80.000000
176,636,908.708995,0.787500,65.833333,73.333333,69.583333,2.017931,2.470899,2.244415,835.447619,...,83.333333,990.307692,0.650000,48.333333,881.301887,0.883333,81.666667,1008.933333,0.750000,65.000000
177,637,605.339286,0.700000,56.666667,40.000000,48.333333,2.090909,1.351190,1.721050,609.141304,...,70.000000,564.571429,0.700000,43.333333,622.340909,0.733333,53.333333,645.411765,0.566667,26.666667
178,638,951.221106,0.829167,83.333333,57.500000,70.416667,2.992308,2.197324,2.594816,825.537736,...,93.333333,1061.865385,0.866667,73.333333,861.940000,0.833333,70.000000,1135.829268,0.683333,45.000000


### Participants' Data Rejection

In [12]:
VWM_RT_ACC.head()

Unnamed: 0,Subject,Mean_RT,Mean_ACC,Performance_L,Performance_R,Mean_Performance,Memory_Capacity_L,Memory_Capacity_R,Mean_Capacity,RT_3,...,LVF_Performance_3,LVF_RT_4,LVF_ACC_4,LVF_Performance_4,RVF_RT_3,RVF_ACC_3,RVF_Performance_3,RVF_RT_4,RVF_ACC_4,RVF_Performance_4
0,1,920.752747,0.758333,75.833333,57.5,66.666667,2.142857,1.722222,1.93254,874.666667,...,83.333333,973.6,0.75,68.333333,886.021739,0.766667,66.666667,970.658537,0.683333,48.333333
1,2,819.883178,0.891667,77.5,85.833333,81.666667,3.116667,3.078818,3.097742,749.305556,...,90.0,902.705882,0.85,65.0,783.648148,0.9,83.333333,881.672727,0.916667,88.333333
2,3,929.744681,0.783333,75.0,66.666667,70.833333,2.189655,2.206207,2.197931,833.644231,...,85.0,1027.511628,0.716667,65.0,839.490196,0.85,81.666667,1070.97561,0.683333,51.666667
3,4,840.988439,0.720833,58.333333,60.833333,59.583333,1.904762,2.117391,2.011077,746.810526,...,75.0,981.135135,0.616667,41.666667,737.065217,0.766667,76.666667,932.731707,0.683333,45.0
4,5,742.099448,0.754167,46.666667,67.5,57.083333,2.096154,2.266667,2.18141,709.940594,...,68.333333,846.948718,0.65,25.0,697.307692,0.866667,86.666667,721.585366,0.683333,48.333333


#### **For mean accuracy (no of correct responses)**

**Binomial test to determine the threshold**

In [33]:
n = 240  # No of trials per participant
p = 0.5 


# min number of correct responses needed to consider performance above chance
threshold = min(x for x in range(n+1) 
                            if binomtest(x, n, p, alternative='greater').pvalue < 0.05)

print(f"Thresold (number of correct responses): {threshold}")

Thresold (number of correct responses): 134


**Determine the rejection**

Exclude participants who are not performing above the determined threshold

In [34]:
# No of correct responses
VWM_RT_ACC['Correct_Answers'] = correct_answers.groupby('Subject')['MemoryTest.ACC'].sum().reset_index(name = 'Correct_Answers')['Correct_Answers']

# Rejection
VWM_RT_ACC['Rejection'] = VWM_RT_ACC['Correct_Answers'].apply(lambda acc: 'keep' if acc >= threshold else 'reject')

VWM_RT_ACC.head()

Unnamed: 0,Subject,Mean_RT,Mean_ACC,Performance_L,Performance_R,Mean_Performance,Memory_Capacity_L,Memory_Capacity_R,Mean_Capacity,RT_3,...,LVF_ACC_4,LVF_Performance_4,RVF_RT_3,RVF_ACC_3,RVF_Performance_3,RVF_RT_4,RVF_ACC_4,RVF_Performance_4,Correct_Answers,Rejection
0,1,920.752747,0.758333,75.833333,57.5,66.666667,2.142857,1.722222,1.93254,874.666667,...,0.75,68.333333,886.021739,0.766667,66.666667,970.658537,0.683333,48.333333,182,keep
1,2,819.883178,0.891667,77.5,85.833333,81.666667,3.116667,3.078818,3.097742,749.305556,...,0.85,65.0,783.648148,0.9,83.333333,881.672727,0.916667,88.333333,214,keep
2,3,929.744681,0.783333,75.0,66.666667,70.833333,2.189655,2.206207,2.197931,833.644231,...,0.716667,65.0,839.490196,0.85,81.666667,1070.97561,0.683333,51.666667,188,keep
3,4,840.988439,0.720833,58.333333,60.833333,59.583333,1.904762,2.117391,2.011077,746.810526,...,0.616667,41.666667,737.065217,0.766667,76.666667,932.731707,0.683333,45.0,173,keep
4,5,742.099448,0.754167,46.666667,67.5,57.083333,2.096154,2.266667,2.18141,709.940594,...,0.65,25.0,697.307692,0.866667,86.666667,721.585366,0.683333,48.333333,181,keep


In [15]:
# Participants' data to reject:
VWM_RT_ACC[(VWM_RT_ACC['Rejection'] == 'reject')]

Unnamed: 0,Subject,Mean_RT,Mean_ACC,Performance_L,Performance_R,Mean_Performance,Memory_Capacity_L,Memory_Capacity_R,Mean_Capacity,RT_3,...,LVF_ACC_4,LVF_Performance_4,RVF_RT_3,RVF_ACC_3,RVF_Performance_3,RVF_RT_4,RVF_ACC_4,RVF_Performance_4,Correct_Answers,Rejection
44,108,880.992248,0.5375,22.5,36.666667,29.583333,0.009412,0.579431,0.294422,881.478261,...,0.416667,-1.666667,806.096774,0.516667,38.333333,875.057143,0.583333,35.0,129,reject
59,123,852.976378,0.529167,21.666667,24.166667,22.916667,0.371981,0.324675,0.348328,836.969697,...,0.5,10.0,801.96875,0.533333,23.333333,896.0,0.516667,25.0,127,reject
73,205,903.314815,0.470833,-39.166667,-35.0,-37.083333,-inf,1.5,-inf,863.444444,...,0.466667,-23.333333,880.166667,0.516667,-38.333333,993.703704,0.45,-31.666667,113,reject
120,316,747.776923,0.541667,32.5,19.166667,25.833333,1.048701,-0.130435,0.459133,724.307692,...,0.6,30.0,770.285714,0.466667,23.333333,825.586207,0.483333,15.0,130,reject


In [35]:
# Rename Subject Column
VWM_RT_ACC = VWM_RT_ACC.rename(columns = {'Subject': 'id'})

In [36]:
VWM_RT_ACC.to_excel("VWM_RT_ACC_Perf_Cap.xlsx", index = False)

# Prepare the Data for Stats

In [37]:
beh_data = pd.read_excel("VWM_RT_ACC_Perf_Cap.xlsx")
all_metrics = pd.read_excel("VWM_Perf_Cap.xlsx")

In [38]:
# Rename Subject Column
all_metrics = all_metrics.rename(columns = {'Subject': 'id'})

## Select Relevant Parameters

In [39]:
all_metrics.columns

Index(['id', 'Hit_3R', 'Miss_3R', 'False_Alarm_3R', 'Correct_Rejection_3R',
       'P_Hit_3R', 'P_Miss_3R', 'P_False_Alarm_3R', 'P_Correct_Rejection_3R',
       'Performance_3R', 'Memory_Capacity_3R', 'Hit_3L', 'Miss_3L',
       'False_Alarm_3L', 'Correct_Rejection_3L', 'P_Hit_3L', 'P_Miss_3L',
       'P_False_Alarm_3L', 'P_Correct_Rejection_3L', 'Performance_3L',
       'Memory_Capacity_3L', 'Hit_4R', 'Miss_4R', 'False_Alarm_4R',
       'Correct_Rejection_4R', 'P_Hit_4R', 'P_Miss_4R', 'P_False_Alarm_4R',
       'P_Correct_Rejection_4R', 'Performance_4R', 'Memory_Capacity_4R',
       'Hit_4L', 'Miss_4L', 'False_Alarm_4L', 'Correct_Rejection_4L',
       'P_Hit_4L', 'P_Miss_4L', 'P_False_Alarm_4L', 'P_Correct_Rejection_4L',
       'Performance_4L', 'Memory_Capacity_4L', 'Hit_L', 'Miss_L',
       'False_Alarm_L', 'Correct_Rejection_L', 'P_Hit_L', 'P_Miss_L',
       'P_False_Alarm_L', 'P_Correct_Rejection_L', 'Performance_L', 'Hit_R',
       'Miss_R', 'False_Alarm_R', 'Correct_Rejection_R'

In [40]:
beh_data_mean = beh_data[['id', 'Mean_RT', 'Mean_ACC','Mean_Performance','Mean_Capacity']]

## Reshape the Data

In [41]:
# Select parameters and reshape the data to a long format
cap = all_metrics[['id', 'Memory_Capacity_3', 'Memory_Capacity_4']].melt(id_vars = ["id"], 
                  value_vars = ['Memory_Capacity_3', 'Memory_Capacity_4'],
                  var_name = "stimuli_count", 
                  value_name = "memory_capacity")

perf = all_metrics[['id', 'Performance_3', 'Performance_4']].melt(id_vars = ["id"], 
                  value_vars = ['Performance_3', 'Performance_4'],
                  var_name = "stimuli_count", 
                  value_name = "performance")

rt = beh_data[['id', 'RT_3', 'RT_4']].melt(id_vars = ["id"], 
                  value_vars = ['RT_3', 'RT_4'],
                  var_name = "stimuli_count", 
                  value_name = "rt")


In [42]:
# Map conditions
conditions = {
    'Memory_Capacity_3': '3_stim',
    'Memory_Capacity_4': '4_stim',
    'Performance_3': '3_stim',
    'Performance_4': '4_stim',
    'RT_3': '3_stim',
    'RT_4': '4_stim'
}

In [43]:
# Apply mapping
cap['stimuli_count'] = cap['stimuli_count'].map(conditions)
perf['stimuli_count'] = perf['stimuli_count'].map(conditions)
rt['stimuli_count'] = rt['stimuli_count'].map(conditions)

In [44]:
# merge DFs into one
cap_perf_rt = cap.merge(perf, on = ['id','stimuli_count'], how = 'left')
cap_perf_rt = cap_perf_rt.merge(rt, on = ['id','stimuli_count'], how = 'left')
cap_perf_rt.head()

Unnamed: 0,id,stimuli_count,memory_capacity,performance,rt
0,1,3_stim,1.888889,75.0,874.666667
1,2,3_stim,2.485714,86.666667,749.305556
2,3,3_stim,2.275862,83.333333,833.644231
3,4,3_stim,1.971429,75.833333,746.810526
4,5,3_stim,2.196154,77.5,709.940594


### Add Group Labels

In [45]:
def assign_group_label(df):
    """
    Based on the participant ID, adds a group label to the participant, as a value in new "group" column.
    
    """
    def determine_group(id_value):
        if id_value < 100:
            return 'OC'
        elif id_value < 200:
            return 'IUD'
        elif id_value < 300:
            return 'NCF'
        elif id_value < 400:
            return 'NCL'
        elif id_value > 400:
            return 'M'
        else:
            return 'Unknown'
    df = df.copy()  
    df['group'] = df['id'].apply(determine_group)
    
    return df

In [46]:
cap_perf_rt = assign_group_label(cap_perf_rt)
cap_perf_rt.head(2)

Unnamed: 0,id,stimuli_count,memory_capacity,performance,rt,group
0,1,3_stim,1.888889,75.0,874.666667,OC
1,2,3_stim,2.485714,86.666667,749.305556,OC


In [47]:
cap_perf_rt.to_excel("cap_perf_rt_for_stats.xlsx", index = False)