# Force Sensitive Data Processing

This notebook processes data from the force-sensitive stopping task experiment.

Running this notebook will:

1. Load and process raw CSV data files from the experiment
2. Calculate key metrics like Stop Signal Reaction Time (SSRT)
3. Aggregate data across AI-assisted and Non-AI conditions
4. Prepare the data for statistical analysis and visualization
5. Save the processed data to a JSON file

In [1]:
import pandas as pd
import os
from glob import glob
import json
from sharedcontrolpaper.force_sensitive_stopping_task_utils import get_subject_label, aggregate_trial_data, process_trial_data, grab_mean_metric, find_sum_of_intervals, convert_dict_to_df, calculate_proportions_non_nan, collect_trial_metric, grab_mean_metric_by_halves, convert_formats, rename_index_column, convert_to_milliseconds

## Set up paths for data 

From the root of this project, the force sensitive stopping data lives `/data/experiment/final/sub-s*/force_sensitive_stopping_task`. 

In [2]:
parent_directory = os.path.dirname(os.getcwd())
data_path = os.path.join(parent_directory, 'data', 'experiment')
task = "force_sensitive_stopping_task"
exp_stage = "final"
pattern = os.path.join(data_path, exp_stage, 'sub-s*', task, '*.csv')
data_files = glob(pattern)
print(f"Found {len(data_files)} datafiles in {pattern}")

Found 40 datafiles in /Users/kritiaxh/Documents/paperRepos/SharedControlPaper/data/experiment/final/sub-s*/force_sensitive_stopping_task/*.csv


## Derive task metrics from performance 

Loop through each subject, read in the dataframe, and get trials separated by block number. Then, aggregate these data to get measures for AI and Non-AI trial conditions. 

In [3]:
shared_control_metrics = {}

for file in data_files:
    # Extract subject info and load data
    subject_label = get_subject_label(file)
    df = pd.read_csv(file)
    df['block'] = df['block'].str.strip("'")
    
    # Filter out practice trials and split into blocks
    df_test = df.query("block != 'practice'")
    blocks = {
        'block 1': df_test.query("block == 'block 1'").reset_index(drop=True),
        'block 2': df_test.query("block == 'block 2'").reset_index(drop=True)
    }

    # Process AI and non-AI data separately    
    for block_df in blocks.values():
        if 'AI-assisted' in block_df['condition'].values:
            ai_data_agg = aggregate_trial_data(block_df.copy())
        else:
            non_ai_data_agg = aggregate_trial_data(block_df.copy())

    # Store aggregated data
    shared_control_metrics[subject_label] = {
        'AI': {'data': ai_data_agg},
        'Non-AI': {'data': non_ai_data_agg}
    }

    # Process trial results and SSRT for each condition
    for condition in ['AI', 'Non-AI']:
        condition_data = shared_control_metrics[subject_label][condition]['data']
        trial_results, ssrt_list = process_trial_data(condition_data, block=condition)
        
        shared_control_metrics[subject_label][condition].update({
            'trial_results': trial_results,
            'ssrt_list': ssrt_list
        })

s042
s029
s016
s011
s018
s027
s020
s043
s021
s019
s026
s010
s028
s017
s035
s032
s004
s005
s033
s034
s012
s015
s023
s024
s041
s025
s022
s014
s013
s040
s031
s036
s009
s007
s038
s006
s039
s037
s008
s030


In [4]:
# Timing metrics
force_sensitive_stopping_task_ssrt = grab_mean_metric(shared_control_metrics, 'ssrt')
duration_of_inhibition = grab_mean_metric(shared_control_metrics, 'duration_of_inhibition')

# Accuracy metrics before and after stop signal
go_task_accuracy_before_stop_onset = grab_mean_metric(shared_control_metrics, 
    'go_task_accuracy_before_stop_onset')
go_task_accuracy_after_stop_onset = grab_mean_metric(shared_control_metrics,
    'go_task_accuracy_after_stop_onset')

# Ball and ring interaction metrics
ball_after_ring_proportion_before_stop_onset = grab_mean_metric(shared_control_metrics,
    'ball_after_ring_proportion_before_stop_onset')
proportion_stops_before_stop_onset = grab_mean_metric(shared_control_metrics,
    'proportion_stops_before_stop_onset')

# Pressure timing metrics (aggregated across AI conditions)
first_non_zero_pressure_timestamp = grab_mean_metric(shared_control_metrics,
    'first_non_zero_pressure_timestamp', aggregate_ai=True)
first_full_pressure_timestamp = grab_mean_metric(shared_control_metrics,
    'first_full_pressure_timestamp', aggregate_ai=True)

## Find proportion of full pressure points (pressure = 1) at each time interval

In [5]:
non_ai, ai_failed, ai_assisted = {}, {}, {}
for subject, subject_data in shared_control_metrics.items():
    results = collect_trial_metric(subject, subject_data, 'pressures_at_intervals_until_stop_onset')
    # Find the maximum interval length to pad the lists to the same length
    max_length = max(
        max([len(lst) for lst in results['non_ai']], default=0),
        max([len(lst) for lst in results['ai_failed']], default=0),
        max([len(lst) for lst in results['ai_assisted']], default=0)
    )
            
    non_ai = find_sum_of_intervals(results['non_ai'], non_ai, max_length, subject)
    ai_failed = find_sum_of_intervals(results['ai_failed'], ai_failed, max_length, subject)
    ai_assisted = find_sum_of_intervals(results['ai_assisted'], ai_assisted, max_length, subject)


# Convert dictionaries into DataFrames for each condition
time_intervals = [f"{i * 100}-{(i + 1) * 100}ms" for i in range(max_length)]

non_ai_proportion_ones, ai_failed_proportion_ones, ai_assisted_proportion_ones = (
    convert_dict_to_df(non_ai, time_intervals), 
    convert_dict_to_df(ai_failed, time_intervals), 
    convert_dict_to_df(ai_assisted, time_intervals)
)

  measures_dict[subject] = np.nansum(np.vstack(trials) == 1, axis=0) / counts # Count number of pressures=1


## Finding the proportion of trials where subjects inhibited

In [6]:
condition_measure = {}
proportion = {}

for subject, subject_data in shared_control_metrics.items():
    results = collect_trial_metric(subject, subject_data, 'ssrt')
    counts, total_counts = calculate_proportions_non_nan(results)

    proportion[subject] = {
        'non_ai': counts['non_ai'] / total_counts['non_ai'] if total_counts['non_ai'] > 0 else 0,
        'ai_failed': counts['ai_failed'] / total_counts['ai_failed'] if total_counts['ai_failed'] > 0 else 0,
        'ai_assisted': counts['ai_assisted'] / total_counts['ai_assisted'] if total_counts['ai_assisted'] > 0 else 0,
    }


df = pd.DataFrame(proportion).T
df = df.sort_index()

## Create CSVs of SSRT by each half of trials in a block

In [7]:
ssrt_first_half, ssrt_second_half = grab_mean_metric_by_halves(shared_control_metrics, 'ssrt')

## Store all data that will be written to JSON file

In [8]:
# Convert data formats and standardize column names
converted_shared_control_metrics = convert_formats(shared_control_metrics)

# Rename index column to subject_id
for df in [force_sensitive_stopping_task_ssrt, duration_of_inhibition]:
    rename_index_column(df)

# Convert all time measurements to milliseconds
time_measures = [
    force_sensitive_stopping_task_ssrt,
    duration_of_inhibition, 
    ssrt_first_half,
    ssrt_second_half,
    first_non_zero_pressure_timestamp,
    first_full_pressure_timestamp
]

# Convert all time measurements to milliseconds
for measure in time_measures:
    convert_to_milliseconds(measure)

# Group DataFrames that need to be converted to dictionaries
dataframes_to_convert = {
    'non_ai_proportion_ones': non_ai_proportion_ones,
    'ai_failed_proportion_ones': ai_failed_proportion_ones, 
    'ai_assisted_proportion_ones': ai_assisted_proportion_ones,
    'ssrt_first_half': ssrt_first_half,
    'ssrt_second_half': ssrt_second_half,
    'go_task_accuracy_after_stop_onset': go_task_accuracy_after_stop_onset,
    'go_task_accuracy_before_stop_onset': go_task_accuracy_before_stop_onset,
    'duration_of_inhibition': duration_of_inhibition,
    'force_sensitive_stopping_task_ssrt': force_sensitive_stopping_task_ssrt,
    'first_non_zero_pressure_timestamp': first_non_zero_pressure_timestamp,
    'first_full_pressure_timestamp': first_full_pressure_timestamp
}

# Convert all DataFrames to dictionaries 
data_to_save = {
    key: df.to_dict() for key, df in dataframes_to_convert.items()
}

# Add shared control metrics which is already a dictionary
data_to_save['shared_control_metrics'] = converted_shared_control_metrics

# Save data to JSON file so it can be read in by other notebooks
with open('force_sensitive_data.json', 'w') as f:
    json.dump(data_to_save, f, indent=4)