In [1]:
import pandas as pd
import numpy as np
import os
import glob
from sharedcontrolpaper.force_sensitive_stopping_task_utils import get_subject_label, string_to_numbers, process_trial_data, find_sum_of_intervals, convert_dict_to_df

In [2]:
parent_directory = os.path.dirname(os.getcwd())
data_path = os.path.join(parent_directory, 'data', 'experiment')
task = "force_sensitive_stopping_task"
exp_stage = "final"
pattern = os.path.join(data_path, exp_stage, '*', task, '*.csv')
data_files = glob.glob(pattern)

In [3]:
shared_control_metrics = {}

for file in data_files:
    subject_label = get_subject_label(file)
    
    df = pd.read_csv(file)
    
    # some post processing
    df['block'] = df['block'].str.strip("'")
    # create a dataFrame for each block
    df_test = df.query("block != 'practice'")
    block_1 = df_test.query("block == 'block 1'")
    block_2 = df_test.query("block == 'block 2'")

    block_1 = block_1.reset_index(drop=True)
    block_2 = block_2.reset_index(drop=True)

    task_dfs = [block_1, block_2]
    
    for df in task_dfs:
        if 'AI-engaged' in df['condition'].values:
            ai_data = df.copy()
            ai_data['distances'] = ai_data['distances'].apply(string_to_numbers)
            ai_data['pressures'] = ai_data['pressures'].apply(string_to_numbers)
            ai_data['time_stamps'] = ai_data['time_stamps'].apply(string_to_numbers)
        else:
            non_ai_data = df.copy()
            non_ai_data['distances'] = non_ai_data['distances'].apply(string_to_numbers)
            non_ai_data['pressures'] = non_ai_data['pressures'].apply(string_to_numbers)
            non_ai_data['time_stamps'] = non_ai_data['time_stamps'].apply(string_to_numbers)
    
    shared_control_metrics[subject_label] = {'AI': {'data': ai_data}, 'Non-AI': {'data': non_ai_data}}
    
    for block in shared_control_metrics[subject_label].keys():
        trial_results, ssrt_list = process_trial_data(shared_control_metrics[subject_label][block]['data'], block=block)
        shared_control_metrics[subject_label][block]['trial_results'] = trial_results
        shared_control_metrics[subject_label][block]['ssrt_list'] = ssrt_list 


In [4]:
%store shared_control_metrics

Stored 'shared_control_metrics' (dict)


## Excluded Observations

In [5]:
exclusions = {"s027": ["AI", 80, 96]}

## Grabbing SSRT and other metrics across conditions

In [6]:
def grab_mean_metric(measure):

    """
    Function to find the mean of a specified metric across different trial conditions 
    for each subject. The provided measure should be a string representing the metric 
    to analyze (e.g., 'ssrt' or 'duration_of_inhibition'). The results are saved to a 
    CSV file with each row corresponding to a subject, with columns for the means 
    of the measure in each of the three trial conditions.

    Parameters:
    - measure (str): The name of the measure to calculate the mean for.

    Outputs:
    - Saves a CSV file with means for each subject across non_ai trials, ai_condition_stop_trials, 
      and ai_condition_ai_trials, as well as handling specific conditions based on the flag.
    """

    condition_measure = {}

    for subject in shared_control_metrics.keys():
        non_ai_trials = []
        ai_disengaged_trials = []
        ai_engaged_trials = []
        
        for block in shared_control_metrics[subject].keys():
            
            for trial in shared_control_metrics[subject][block]['trial_results'].keys():

                if subject in exclusions.keys() and trial in exclusions[subject] and block in exclusions[subject]:
                    continue
        

                if block == 'Non-AI':
                    non_ai_trials.append(shared_control_metrics[subject][block]['trial_results'][trial][measure])
                    
                elif (block == 'AI') and (shared_control_metrics[subject][block]['trial_results'][trial]['condition'] == 'AI-disengaged'):
                    ai_disengaged_trials.append(shared_control_metrics[subject][block]['trial_results'][trial][measure])
                    
                elif (block == 'AI') and (shared_control_metrics[subject][block]['trial_results'][trial]['condition'] == 'AI-engaged'):
                    ai_engaged_trials.append(shared_control_metrics[subject][block]['trial_results'][trial][measure])
                    
        avg_ai_engaged = np.nanmean(ai_engaged_trials)
        avg_ai_disengaged = np.nanmean(ai_disengaged_trials)
        avg_non_ai = np.nanmean(non_ai_trials)  
        condition_measure[subject] = {'non_ai': avg_non_ai, 'ai_disengaged': avg_ai_disengaged, 'ai_engaged': avg_ai_engaged}

    df = pd.DataFrame(condition_measure).T
    df = df.sort_index()
    return df

In [7]:
force_sensitive_stopping_task_ssrt = grab_mean_metric('ssrt')
%store force_sensitive_stopping_task_ssrt

Stored 'force_sensitive_stopping_task_ssrt' (DataFrame)


In [8]:
duration_of_inhibition = grab_mean_metric('duration_of_inhibition')
%store duration_of_inhibition

Stored 'duration_of_inhibition' (DataFrame)


In [9]:
go_task_accuracy_before_stop_onset = grab_mean_metric('go_task_accuracy_before_stop_onset')
%store go_task_accuracy_before_stop_onset

Stored 'go_task_accuracy_before_stop_onset' (DataFrame)


In [10]:
go_task_accuracy_after_stop_onset = grab_mean_metric('go_task_accuracy_after_stop_onset')
%store go_task_accuracy_after_stop_onset

Stored 'go_task_accuracy_after_stop_onset' (DataFrame)


In [24]:
ball_before_ring_proportion_before_stop_onset = grab_mean_metric('ball_before_ring_proportion_before_stop_onset')
print(ball_before_ring_proportion_before_stop_onset.mean())

non_ai           0.133025
ai_disengaged    0.099186
ai_engaged       0.096252
dtype: float64


In [23]:
ball_after_ring_proportion_before_stop_onset = grab_mean_metric('ball_after_ring_proportion_before_stop_onset')
print(ball_after_ring_proportion_before_stop_onset.mean())

non_ai           0.000177
ai_disengaged    0.000093
ai_engaged       0.000235
dtype: float64


In [21]:
first_non_zero_pressure_timestamp = grab_mean_metric('first_non_zero_pressure_timestamp')
print(first_non_zero_pressure_timestamp.mean())

non_ai           0.050664
ai_disengaged    0.047763
ai_engaged       0.049668
dtype: float64


In [22]:
first_full_pressure_timestamp = grab_mean_metric('first_full_pressure_timestamp')
print(first_full_pressure_timestamp.mean())

non_ai           0.177985
ai_disengaged    0.144065
ai_engaged       0.150170
dtype: float64


## Finding the proportion of full pressure points (pressure = 1) at each time interval

In [15]:
# Initialize dictionaries to store pressures for each condition
non_ai = {}
ai_disengaged = {}
ai_engaged = {}

for subject in shared_control_metrics.keys():
    non_ai_trials = []
    ai_disengaged_trials = []
    ai_engaged_trials = []

    # Loop through the blocks and extract pressures at intervals
    for block in shared_control_metrics[subject].keys():
        for trial in shared_control_metrics[subject][block]['trial_results'].keys():
            
            if subject in exclusions.keys() and trial in exclusions[subject] and block in exclusions[subject]:
                continue
            
            if block == 'Non-AI':
                non_ai_trials.append(shared_control_metrics[subject][block]['trial_results'][trial]['pressures_at_intervals_until_stop_onset'])
                
            elif (block == 'AI') and (shared_control_metrics[subject][block]['trial_results'][trial]['condition'] == 'AI-disengaged'):
                ai_disengaged_trials.append(shared_control_metrics[subject][block]['trial_results'][trial]['pressures_at_intervals_until_stop_onset'])
                
            elif (block == 'AI') and (shared_control_metrics[subject][block]['trial_results'][trial]['condition'] == 'AI-engaged'):
                ai_engaged_trials.append(shared_control_metrics[subject][block]['trial_results'][trial]['pressures_at_intervals_until_stop_onset'])

    # Find the maximum interval length to pad the lists to the same length
    max_length = max(
        max([len(lst) for lst in non_ai_trials], default=0),
        max([len(lst) for lst in ai_disengaged_trials], default=0),
        max([len(lst) for lst in ai_engaged_trials], default=0)
    )
            
    non_ai = find_sum_of_intervals(non_ai_trials, non_ai, max_length, subject)
    ai_disengaged = find_sum_of_intervals(ai_disengaged_trials, ai_disengaged, max_length, subject)
    ai_engaged = find_sum_of_intervals(ai_engaged_trials, ai_engaged, max_length, subject)


# Convert dictionaries into DataFrames for each condition
time_intervals = [f"{i * 100}-{(i + 1) * 100}ms" for i in range(max_length)]

non_ai_proportion_ones, ai_disengaged_proportion_ones, ai_engaged_proportion_ones = (convert_dict_to_df(non_ai, time_intervals), 
                                                           convert_dict_to_df(ai_disengaged, time_intervals), 
                                                           convert_dict_to_df(ai_engaged, time_intervals))

  measures_dict[subject] = np.nansum(np.vstack(trials) == 1, axis=0) / counts # Count number of pressures=1


In [16]:
%store non_ai_proportion_ones
%store ai_disengaged_proportion_ones
%store ai_engaged_proportion_ones

Stored 'non_ai_proportion_ones' (DataFrame)
Stored 'ai_disengaged_proportion_ones' (DataFrame)
Stored 'ai_engaged_proportion_ones' (DataFrame)


## Finding the proportion of trials where subjects inhibited

In [17]:
proportion = {}

for subject in shared_control_metrics.keys():
    count_non_ai = 0
    count_ai_disengaged = 0
    count_ai_engaged = 0
    for block in shared_control_metrics[subject].keys():
        
        for trial in shared_control_metrics[subject][block]['trial_results'].keys():

            if subject in exclusions.keys() and trial in exclusions[subject] and block in exclusions[subject]:
                continue

            if (block == 'Non-AI'):
                if not np.isnan(shared_control_metrics[subject][block]['trial_results'][trial]['ssrt']):
                    count_non_ai += 1
            
            elif (block == 'AI') and (shared_control_metrics[subject][block]['trial_results'][trial]['condition'] == 'AI-disengaged'):
                if not np.isnan(shared_control_metrics[subject][block]['trial_results'][trial]['ssrt']):
                    count_ai_disengaged += 1
                    
            elif (block == 'AI') and (shared_control_metrics[subject][block]['trial_results'][trial]['condition'] == 'AI-engaged'):
                if not np.isnan(shared_control_metrics[subject][block]['trial_results'][trial]['ssrt']):
                    count_ai_engaged += 1

    proportions_non_ai = count_non_ai / 100
    proportions_ai_disengaged = count_ai_disengaged / 20
    proportions_ai_engaged = count_ai_engaged / 80
    
    proportion[subject] = {
        'proportion_non_ai': proportions_non_ai,
        'proportion_ai_disengaged': proportions_ai_disengaged,
        'proportion_ai_engaged': proportions_ai_engaged
    }

df = pd.DataFrame(proportion).T
df = df.sort_index()
df.loc['mean'] = df.mean()
print(df.loc['mean'])



proportion_non_ai           0.999750
proportion_ai_disengaged    0.996250
proportion_ai_engaged       0.989062
Name: mean, dtype: float64


## Create CSVs of SSRT by each half of trials in a block 

In [18]:
def grab_mean_metric_by_halves(measure):
    """
    Function to find the mean of a specified metric across different trial conditions 
    for each subject, split by halves of trials.

    Parameters:
    - measure (str): The name of the measure to calculate the mean for.

    Outputs:
    - Saves two CSV files with means for each subject across non_ai trials and ai condition trials
      split by halves.
    """
    # Initialize structures for data collection
    condition_measure_first_half = {}
    condition_measure_second_half = {}

    for subject in shared_control_metrics.keys():
        non_ai_first_half = []
        non_ai_second_half = []
        ai_disengaged_first_half = []
        ai_disengaged_second_half = []
        ai_engaged_first_half = []
        ai_engaged_second_half = []
        
        for block in shared_control_metrics[subject].keys():
            trial_results = shared_control_metrics[subject][block]['trial_results']
            num_trials = len(trial_results)

            for index, trial in enumerate(trial_results.keys()):
                if subject in exclusions.keys() and trial in exclusions[subject] and block in exclusions[subject]:
                    continue
                
                ssrt_value = shared_control_metrics[subject][block]['trial_results'][trial][measure]
                
                if pd.isna(ssrt_value):
                    continue
                
                if block == 'Non-AI':
                    if index < num_trials / 2:  # First half
                        non_ai_first_half.append(ssrt_value)
                    else:  # Second half
                        non_ai_second_half.append(ssrt_value)
                    
                elif block == 'AI':
                    condition = shared_control_metrics[subject][block]['trial_results'][trial]['condition']
                    if condition == 'AI-disengaged':
                        if index < num_trials / 2:  # First half
                            ai_disengaged_first_half.append(ssrt_value)
                        else:  # Second half
                            ai_disengaged_second_half.append(ssrt_value)
                    elif condition == 'AI-engaged':
                        if index < num_trials / 2:  # First half
                            ai_engaged_first_half.append(ssrt_value)
                        else:  # Second half
                            ai_engaged_second_half.append(ssrt_value)
                            
        condition_measure_first_half[subject] = {
            'non_ai': np.nanmean(non_ai_first_half),
            'ai_disengaged': np.nanmean(ai_disengaged_first_half),
            'ai_engaged': np.nanmean(ai_engaged_first_half)
        }
        
        condition_measure_second_half[subject] = {
            'non_ai': np.nanmean(non_ai_second_half),
            'ai_disengaged': np.nanmean(ai_disengaged_second_half),
            'ai_engaged': np.nanmean(ai_engaged_second_half)
        }
    
    df_first_half = pd.DataFrame(condition_measure_first_half).T
    df_second_half = pd.DataFrame(condition_measure_second_half).T

    df_first_half = df_first_half.sort_index()

    df_second_half = df_second_half.sort_index()
    
    return df_first_half, df_second_half

In [19]:
ssrt_first_half, ssrt_second_half = grab_mean_metric_by_halves('ssrt')
%store ssrt_first_half
%store ssrt_second_half

Stored 'ssrt_first_half' (DataFrame)
Stored 'ssrt_second_half' (DataFrame)
