# Extending fMRI vision decoding methods to mental imagery
## Anonymized code to reproduce behavioral experiment and analysis

Subjects were recruited using the Prolific platform, and the experiment was hosted on the Meadows platform.
This notebook serves to collect the stimuli and reconstructed images for each of the tested reconstruction methods, assign them a unique stimulus ID, package all of the stimuli into a folder to be uploaded to a Meadows stimulus set, and handle parsing responses from the Meadows annotation format. Instructions will be provided throughout for off-notebook tasks that need to be done for the full process.

In [None]:
# Package imports
import os, sys, shutil
from tqdm import tqdm
import numpy as np
import pandas as pd
# Set the display options to show all columns
pd.set_option('display.max_columns', None)
import matplotlib as plt
from PIL import Image
from matplotlib.lines import Line2D
import math
import random
from datetime import datetime
from scipy.stats import binomtest
from collections import defaultdict

# Configure experiment and response version, in case multiple experiments or sets of responses are produced in sequence.
experiment_version = 1
response_version = 1
stimuli_path = f"stimuli_v{experiment_version}/"
response_path = f"responses_v{experiment_version}/"
dataframe_path = f"dataframes_v{experiment_version}/"
os.makedirs(stimuli_path, exist_ok=True)
os.makedirs(response_path, exist_ok=True)
os.makedirs(dataframe_path), exist_ok=True)

# CREATE EXPERIMENT DATAFRAME AND TRIAL FILES FOR MEADOWS

In [None]:
#Experiment column key:
# 1: Experiment 1, two way identification for all reconstruction methods
# 2: Experiment 2, vision vs imagery similarity comparison
# 3: Experiment 3, BOI vs Base model similarity comparison
df_exp = pd.DataFrame(columns=["experiment", "stim1", "stim2", "stim3", "sample", "subject", "target_on_left", "method", "catch_trial", "rep", "mode", "stimtype"])
i=0
random_count = 0
stimuli_root = "ENTER PATH FOR STIMULI HERE"
for subj in [1,2,5,7]: #1,2,5,7
    #Experiment 1, mental imagery two way identification
    for mode in ["vision", "imagery"]:
        for sample in tqdm(range(12)):
            gt_sample = f"{sample}_ground_truth"
            for method in ["mindeye", "boi-me1", "braindiffuser", "boi-bd", "mindeye2", "tagaki"]: 
                for rep in range(10):
                    # Get random sample to compare against
                    random_sample = random.choice([x for x in range(12) if x != sample])
                    random_rep = random.choice([x for x in range(10)])
                    
                    sample_recon = f"{sample}_{rep}_{mode}_subject{subj}_{method}"
                    random_recon = f"{random_sample}_{random_rep}_{mode}_subject{subj}_{method}"
                    
                    # Load the stimulus images and save as pngs to stimuli folder
                    gt_sample_path = f"{stimuli_root}vision/{method}/subject{subj}/{sample}/ground_truth.png"
                    sample_recon_path = f"{stimuli_root}{mode}/{method}/subject{subj}/{sample}/{rep}.png"
                    random_recon_path = f"{stimuli_root}{mode}/{method}/subject{subj}/{random_sample}/{random_rep}.png"
                    
                    # Copy the stimulus images to the stimuli folder
                    shutil.copy(gt_sample_path, f"stimuli_v{experiment_version}/{gt_sample}.png")
                    shutil.copy(sample_recon_path, f"stimuli_v{experiment_version}/{sample_recon}.png")
                    shutil.copy(random_recon_path, f"stimuli_v{experiment_version}/{random_recon}.png")
        
                    # Configure stimuli names and order in experiment dataframe
                    order = random.randrange(2)
                    sample_names = [sample_recon, random_recon]
                    left_sample = sample_names.pop(order)
                    right_sample = sample_names.pop()
                    if sample < 6:
                        stimtype = "simple"
                    else:
                        stimtype = "complex"
                    df_exp.loc[i] = {"experiment" : 1, "stim1" : gt_sample, "stim2" : left_sample, "stim3" : right_sample, "sample" : sample, "subject" : subj, 
                                    "target_on_left" : order == 0, "method" : method, "catch_trial" : None, "rep" : rep, "mode" : mode, "stimtype" : stimtype, "trial_rep" : 0}
                    i+=1
    # Experiment 2: Vision vs Imagery similarity comparison w/ the Drag-Rate task
    for sample in tqdm(range(12)):
        gt_sample = f"{sample}_ground_truth"
        for method in ["mindeye", "boi-me1", "braindiffuser", "boi-bd", "mindeye2", "tagaki"]: 
            for rep in range(10):
                vision_recon = f"{sample}_{rep}_vision_subject{subj}_{method}"
                imagery_recon = f"{sample}_{rep}_imagery_subject{subj}_{method}"
                
                # Load the stimulus images and save as pngs to stimuli folder
                gt_sample_path = f"{stimuli_root}vision/{method}/subject{subj}/{sample}/ground_truth.png"
                vision_recon_path = f"{stimuli_root}vision/{method}/subject{subj}/{sample}/{rep}.png"
                imagery_recon_path = f"{stimuli_root}imagery/{method}/subject{subj}/{sample}/{rep}.png"
                
                # Copy the stimulus images to the stimuli folder
                shutil.copy(gt_sample_path, f"stimuli_v{experiment_version}/{gt_sample}.png")
                shutil.copy(vision_recon_path, f"stimuli_v{experiment_version}/{vision_recon}.png")
                shutil.copy(imagery_recon_path, f"stimuli_v{experiment_version}/{imagery_recon}.png")
    
                # Configure stimuli names and order in experiment dataframe
                order = random.randrange(2)
                sample_names = [vision_recon, imagery_recon]
                left_sample = sample_names.pop(order)
                right_sample = sample_names.pop()
                if sample < 6:
                    stimtype = "simple"
                else:
                    stimtype = "complex"
                df_exp.loc[i] = {"experiment" : 2, "stim1" : gt_sample, "stim2" : left_sample, "stim3" : right_sample, "sample" : sample, "subject" : subj, 
                                "target_on_left" : order == 0, "method" : method, "catch_trial" : None, "rep" : rep, "mode" : "both", "stimtype" : stimtype}
                i+=1
    #Experiment 3: BOI vs Base model similarity comparison w/ the Drag-Rate task
    for sample in tqdm(range(12)):
        gt_sample = f"{sample}_ground_truth"
        for mode in ["vision", "imagery"]:
            for (boi, base) in [("boi-bd", "braindiffuser"), ("boi-me1", "mindeye")]:
                for rep in range(10):
                    boi_recon = f"{sample}_{rep}_{mode}_subject{subj}_{boi}"
                    base_recon = f"{sample}_{rep}_{mode}_subject{subj}_{base}"
                    
                    # Load the stimulus images and save as pngs to stimuli folder
                    gt_sample_path = f"{stimuli_root}vision/{base}/subject{subj}/{sample}/ground_truth.png"
                    boi_recon_path = f"{stimuli_root}{mode}/{boi}/subject{subj}/{sample}/{rep}.png"
                    base_recon_path = f"{stimuli_root}{mode}/{base}/subject{subj}/{sample}/{rep}.png"
                    
                    # Copy the stimulus images to the stimuli folder
                    shutil.copy(gt_sample_path, f"stimuli_v{experiment_version}/{gt_sample}.png")
                    shutil.copy(boi_recon_path, f"stimuli_v{experiment_version}/{boi_recon}.png")
                    shutil.copy(base_recon_path, f"stimuli_v{experiment_version}/{base_recon}.png")
        
                    # Configure stimuli names and order in experiment dataframe
                    order = random.randrange(2)
                    sample_names = [boi_recon, base_recon]
                    left_sample = sample_names.pop(order)
                    right_sample = sample_names.pop()
                    if sample < 6:
                        stimtype = "simple"
                    else:
                        stimtype = "complex"
                    df_exp.loc[i] = {"experiment" : 3, "stim1" : gt_sample, "stim2" : left_sample, "stim3" : right_sample, "sample" : sample, "subject" : subj, 
                                    "target_on_left" : order == 0, "method" : base, "catch_trial" : None, "rep" : rep, "mode" : mode, "stimtype" : stimtype, "trial_rep" : 0}
                    i+=1
df_exp = df_exp.sample(frac=1)
print(len(df_exp))


In [None]:
# Check if all images are present in final stimuli folder
count_not_found = 0
stim_path = f"stimuli_v{experiment_version}/"
for index, row in df_exp.iterrows():
    if not (os.path.exists(f"{stim_path}{row['stim1']}.png")):
        print(f"{row['stim1']}.png")
        count_not_found += 1
    if not (os.path.exists(f"{stim_path}{row['stim2']}.png")):
        print(f"{row['stim2']}.png")
        count_not_found += 1
    if not (os.path.exists(f"{stim_path}{row['stim3']}.png")):
        print(f"{row['stim3']}.png")
        count_not_found += 1
print(count_not_found)

### At this point all of the stimuli should be collected in a stimulus folder that can be uploaded to Meadows as a stimulus set for the experiment.
### To prepare the trial-wise data for the experiment, we will create a pID column to assign trials to different participants

In [None]:
# Shuffle Experiments 1, 2, and 3 individually
df_exp1 = df_exp[df_exp['experiment'] == 1].sample(frac=1, random_state=42)
df_exp23 = df_exp[df_exp['experiment'].isin([2, 3])].sample(frac=1, random_state=42)

# Calculate the number of participants needed
# These trial numbers are calculated to approximate a 10 minute experiment duration, you can increase or decrease them for longer or shorter experiments
exp1_trials_per_participant = 22
exp23_trials_per_participant = 18
num_participants = max(len(df_exp1) // exp1_trials_per_participant, len(df_exp23) // exp23_trials_per_participant)

# Assign pID for Experiment 1
df_exp1['pID'] = [i % num_participants for i in range(len(df_exp1))]

# Shuffle df_exp23 again before assigning pID to ensure randomness in distribution
df_exp23 = df_exp23.sample(frac=1, random_state=42)  # Re-shuffle to mix experiments 2 and 3
df_exp23['pID'] = [i % num_participants for i in range(len(df_exp23))]

# Combine and shuffle the dataframe to mix up the trials across all experiments
df_exp_pid = pd.concat([df_exp1, df_exp23]).sample(frac=1, random_state=42)

# Sort by pID to ensure the dataframe is ordered by participant, facilitating even distribution
df_exp_pid.sort_values(by='pID', inplace=True)

# Ensure pID is the first column
cols = list(df_exp_pid.columns)
cols.insert(0, cols.pop(cols.index('pID')))
df_exp_pid = df_exp_pid[cols]

### We will now add "catch trials", which are not real trials of the experiment but are instead foolproof trials designed to "catch" participants who are not paying attention to the instructions, so that they may be filtered out later.

In [None]:
#Add catch trials within each pID section
df_exp_catch = df_exp_pid.copy()
for pID in np.unique(df_exp_catch['pID']):
    for experiment_list in [[1], [2, 3]]: 
        df_pid = df_exp_catch[(df_exp_catch['experiment'].isin(experiment_list)) & (df_exp_catch['pID'] == pID)]
        
        # Ground truth catch trials
        try:
            gt_catch_trials = df_pid.sample(n=3)
        except:
            print(pID, df_pid)
        gt_catch_trials['catch_trial'] = "ground_truth"
        for index, row in gt_catch_trials.iterrows():
            
            order = random.randrange(2)
            ground_truth = row['stim1']
            stims = [ground_truth, row['stim2']]
            
            gt_catch_trials.at[index, 'stim2'] = stims.pop(order)
            gt_catch_trials.at[index, 'stim3'] = stims.pop()
            # Target on left here means the ground truth repeat is on the left
            gt_catch_trials.at[index, 'target_on_left'] = (order == 0)
        df_exp_catch = pd.concat([df_exp_catch, gt_catch_trials])
# shuffle catch trials into the sessions
df_exp_catch = df_exp_catch.sample(frac=1).sort_values(by='pID', kind='mergesort')
print(len(df_exp_catch))
print(len(df_exp_catch[(df_exp_catch['pID'] == 0)]))
print(len(np.unique(df_exp_catch['pID'])))
print(len(df_exp_catch[df_exp_catch['experiment'] == 1]))
print(len(df_exp_catch[df_exp_catch['experiment'].isin([2, 3])]))

### We can now save save the experiment dataframe (used for keeping track of all experiment information) and the Meadows .tsv stimulus files, used to configure Meadows and tell it which stimuli to show to specific participants in specific trials. This can be uploaded to Meadows at the experiment deployment stage.

In [None]:

df_exp_catch.to_csv(f'dataframes_v{experiment_version}/experiment_v{experiment_version}.csv', index=False)
df_catch_exp1 = df_exp_catch[df_exp_catch['experiment'] == 1]
df_catch_exp23 = df_exp_catch[df_exp_catch['experiment'].isin([2, 3])]

df_exp_tsv1 = df_catch_exp1[['pID', 'stim1', 'stim2', 'stim3']].copy()
df_exp_tsv23 = df_catch_exp23[['pID', 'stim1', 'stim2', 'stim3']].copy()
df_exp_tsv1.to_csv(f"dataframes_v{experiment_version}/meadow_trials_v{experiment_version}_exp1.tsv", sep="\t", index=False, header=False) 
df_exp_tsv23.to_csv(f"dataframes_v{experiment_version}/meadow_trials_v{experiment_version}_exp23.tsv", sep="\t", index=False, header=False) 

# THE FOLLOWING CELLS ARE FOR PROCESSING RESPONSES
### At this point, the experiment has been completed, and we have the experiment dataframe from earlier, and a Meadows response dataframe, we are going to merge these into a master spreadsheet for analysis.

In [None]:
df_experiment = pd.read_csv(dataframe_path + f"experiment_v{experiment_version}.csv")
df_responses = pd.read_csv(f"{response_path}annotations_v{response_version}.csv")

### Parse the responses and associate them with the trials in the experiment dataframe, to have all information available in one dataframe

In [None]:

# Initialize a list to hold row dictionaries before creating the final dataframe
rows_list = []
df_2afc = df_responses[df_responses["task"] == "Match-To-Sample"]
df_similarity = df_responses[df_responses["task"] == "Drag-And-Rate"]
# Parse trials in 2AFC experiment
for index, row in tqdm(df_2afc.iterrows()):
    if row['label'] == row['stim2_id']:
        picked_left = True
    elif row['label'] == row['stim3_id']:
        picked_left = False
    else:
        print("Error")
        break
    start_timestamp = row['time_trial_start']
    end_timestamp = row['time_trial_response']
    start = datetime.fromisoformat(start_timestamp.replace("Z", "+00:00"))
    end = datetime.fromisoformat(end_timestamp.replace("Z", "+00:00"))
    # Calculate the difference in seconds
    response_time = (end - start).total_seconds()
    
    experiment_row = df_experiment[(df_experiment['stim1'] == row['stim1_name']) & (df_experiment['stim2'] == row['stim2_name']) & (df_experiment['stim3'] == row['stim3_name'])].iloc[0]
    row_data = {
        **experiment_row.to_dict(),
        "picked_left": picked_left,
        "picked_target": picked_left == experiment_row['target_on_left'],
        "participant": row['participation'],
        "response_time": response_time,
    }
    
    rows_list.append(row_data)
    
# Parse trials in similarity range experiment by iterating through df_responses two rows at a time, since each stimuli has its own row
for index in tqdm(range(0, len(df_similarity), 2)):
    row1 = df_similarity.iloc[index]
    row2 = df_similarity.iloc[index + 1]
    # Ensure the two rows belong to the same trial
    assert row1["trial"] == row2["trial"], "Rows do not belong to the same trial"
    
    # Attempt to find a matching experiment row
    match1 = df_experiment[(df_experiment['stim2'] == row1['stim1_name']) & (df_experiment['stim3'] == row2['stim1_name'])]
    match2 = df_experiment[(df_experiment['stim2'] == row2['stim1_name']) & (df_experiment['stim3'] == row1['stim1_name'])]
    
    # Determine which match is correct
    if not match1.empty:
        correct_match = match1
        stim2_row, stim3_row = row1, row2
    elif not match2.empty:
        correct_match = match2
        stim2_row, stim3_row = row2, row1
    else:
        continue  # Skip if no correct match is found
    
    # Extract the correct match's data
    experiment_row = correct_match.iloc[0]
    
    # Calculate response times
    start_timestamp = row1['time_trial_start']
    end_timestamp1 = row1['time_trial_response']
    end_timestamp2 = row2['time_trial_response']
    start = datetime.fromisoformat(start_timestamp.replace("Z", "+00:00"))
    end1 = datetime.fromisoformat(end_timestamp1.replace("Z", "+00:00"))
    end2 = datetime.fromisoformat(end_timestamp2.replace("Z", "+00:00"))
    end = max(end1, end2)
    response_time = (end - start).total_seconds()
    
    # Determine if the left stimulus was picked based on the 'y' value
    picked_left = stim2_row['y'] > stim3_row['y']
    
    # Determine target and distractor based on target_on_left flag
    if experiment_row['target_on_left']:
        target_similarity, target_confidence = stim2_row['y'], stim2_row['x']
        distractor_similarity, distractor_confidence = stim3_row['y'], stim3_row['x']
    else:
        target_similarity, target_confidence = stim3_row['y'], stim3_row['x']
        distractor_similarity, distractor_confidence = stim2_row['y'], stim2_row['x']
    
    # Determine if the target was picked
    picked_target = target_similarity > distractor_similarity
    
    # Compile the row data
    row_data = {
        **experiment_row.to_dict(),
        "picked_left": picked_left,
        "participant": row1['participation'],
        "response_time": response_time,
        "stim2_similarity": stim2_row['y'],
        "stim2_confidence": stim2_row['x'],
        "stim3_similarity": stim3_row['y'],
        "stim3_confidence": stim3_row['x'],
        "target_similarity": target_similarity,
        "target_confidence": target_confidence,
        "distractor_similarity": distractor_similarity,
        "distractor_confidence": distractor_confidence,
        "picked_target": picked_target
    }
    
    rows_list.append(row_data)

# Create the final dataframe from the list of row dictionaries
df_trial_combined = pd.DataFrame(rows_list)

# Dropping the extra index columns added from the experiment_row.to_dict() conversion
df_trial = df_trial_combined.drop(columns=[col for col in df_trial_combined.columns if 'Unnamed' in col])

print(df_trial.head())

### At this point, if the first run of the experiment did not collect enough data or too many subjects had to be filtered out, we can "fill in" the experiment dataframe by finding trials that didn't get completed in the previous round of the experiment, and save new trial files for another round. This part is optional, and requires incrementing the version number at the top of the dataframe.

In [None]:
# Initialize a dictionary to count processed trials per pID
processed_trials_count = defaultdict(int)

# Update processed_trials_count for each row processed
for index, row_data in df_trial.iterrows():
    processed_trials_count[row_data['pID']] += 1

# Count expected trials per pID from df_experiment
expected_trials_count = df_experiment['pID'].value_counts().to_dict()

# Find pIDs with missing trials by comparing processed and expected counts
missing_trials_pids = {pID: expected_trials_count[pID] - processed_trials_count[pID] 
                       for pID in expected_trials_count 
                       if pID not in processed_trials_count or processed_trials_count[pID] < expected_trials_count[pID]-2}

# Print or handle pIDs with missing trials as needed
print(f"missing {len(missing_trials_pids.values())} pIDs")
for pID, missing_count in missing_trials_pids.items():
    print(f"pID: {pID} has {missing_count} missing trials.")
    
if len(missing_trials_pids.values()) > 0:
    df_exp_missing = df_experiment[df_experiment['pID'].isin(missing_trials_pids.keys())]

    df_catch_exp1_missing = df_exp_missing[df_exp_missing['experiment'] == 1]
    df_catch_exp23_missing = df_exp_missing[df_exp_missing['experiment'].isin([2, 3])]


    df_exp_tsv1_missing = df_catch_exp1_missing[['pID', 'stim1', 'stim2', 'stim3']].copy()
    df_exp_tsv23_missing = df_catch_exp23_missing[['pID', 'stim1', 'stim2', 'stim3']].copy()
    df_exp_tsv1_missing.to_csv(f"dataframes_v{experiment_version}/meadow_trials_v{experiment_version}_exp1_missing.tsv", sep="\t", index=False, header=False) 
    df_exp_tsv23_missing.to_csv(f"dataframes_v{experiment_version}/meadow_trials_v{experiment_version}_exp23_missing.tsv", sep="\t", index=False, header=False) 

### Once all the data has been collected, we need to parse and remove participants that failed at least 2 catch trials before doing analysis.

In [None]:
# number of participants
print("Total participants:", len(df_trial["participant"].unique()))
gt_failures = df_trial[(df_trial['catch_trial'] == 'ground_truth') & (df_trial['picked_target'] == False)].groupby('participant').size()
# Identify participants who failed more than 2 ground truth catch trials
participants_to_remove = gt_failures[gt_failures > 2].index.tolist()
print("Participants to remove:", participants_to_remove)

participants_to_remove = set(participants_to_remove)
filtered_df = df_trial[~df_trial['participant'].isin(participants_to_remove)]
print("Clean participants:", len(filtered_df["participant"].unique()))
print(len(df_trial), len(filtered_df))
print(participants_to_remove)
# Filter out catch trials for analysis
filtered_df = filtered_df[(filtered_df['catch_trial'].isnull())]
filtered_df.to_csv(f'{dataframe_path}filtered_responses_v{response_version}.csv', index=False)

# THE FOLLOWING CELLS ARE FOR ANALYZING RESPONSES
### At this point, you should have processed all of the data from the experiment to remove catch trials and bad subjects, now we can begin analysis.

In [None]:
# Load filtered responses
df_trial_exp = pd.read_csv(f'{dataframe_path}filtered_responses_v{response_version}.csv')
df_experiment = pd.read_csv(dataframe_path + f"experiment_v{experiment_version}.csv")

df_responses = pd.read_csv(f"{response_path}annotations_v{response_version}.csv")

# Iterate over each method
for method in df_trial_exp['method'].unique():
    print(f"Method: {method}")
    print("--------------------")
    
    # Iterate over each experiment
    experiment = 1
    for mode in ["vision", "imagery"]:
        
        print(f"Mode: {mode}")
        for stimtype in ["simple", "complex"]:
            print(f"Stimtype: {stimtype}")
            # Filter the data for the current method, mode, and experiment
            df_trial_exp1 = df_trial_exp[(df_trial_exp['method'] == method) & (df_trial_exp['experiment'] == experiment) & (df_trial_exp['mode'] == mode) & (df_trial_exp['stimtype'] == stimtype)]
            # Perform a binomial test
            # The null hypothesis is that the probability of success is 0.5 (chance level)
            p_value = binomtest(df_trial_exp1['picked_target'].sum(), n=len(df_trial_exp1['picked_target']), p=0.5, alternative='two-sided').pvalue

            print("Number of experiment trials:", len(df_trial_exp1))
            print("Success rate: ", len(df_trial_exp1[df_trial_exp1["picked_target"]]) / len(df_trial_exp1))
            print(f'P-value: {p_value}')
            
            print("--------------------")
            print()


In [None]:
import matplotlib.pyplot as plt

# Calculate the average similarity and confidence for each method and stimtype
averages = []
for method in df_trial_exp2['method'].unique():
    # for stimtype in df_trial_exp2['stimtype'].unique():
    category_df = df_trial_exp2[(df_trial_exp2["method"] == method) & (df_trial_exp2["stimtype"] == stimtype)]
    avg_vision_similarity = category_df['target_similarity'].mean()
    avg_imagery_similarity = category_df['distractor_similarity'].mean()
    avg_vision_confidence = category_df['target_confidence'].mean()
    avg_imagery_confidence = category_df['distractor_confidence'].mean()
    print(f"Method: {method}, Average Vision Similarity: {avg_vision_similarity:.3f}, Average Vision Confidence: {avg_vision_confidence:.3f}, Average Imagery Similarity: {avg_imagery_similarity:.3f}, Average Imagery Confidence: {avg_imagery_confidence:.3f}")
    averages.append((method, stimtype, avg_vision_similarity, avg_imagery_similarity))

# Create a list of methods and their corresponding average similarity values for vision and imagery
methods = []
vision_similarities = []
imagery_similarities = []

for method, _, avg_vision_similarity, avg_imagery_similarity in averages:
    if method == "boi-v2.1":
        method_label = "mindeye\n + BOI"
    elif method == "boi-v2.3":
        method_label = "braindiffuser\n + BOI"
    else:
        method_label = method
    methods.append(method_label)
    vision_similarities.append(avg_vision_similarity)
    imagery_similarities.append(avg_imagery_similarity)

# Set the width of the bars
bar_width = 0.35

# Set the positions of the bars on the X-axis
r1 = np.arange(len(methods))
r2 = [x + bar_width for x in r1]

# Plot the bars
plt.bar(r1, vision_similarities, color='blue', width=bar_width, label='Vision')
plt.bar(r2, imagery_similarities, color='orange', width=bar_width, label='Imagery')

# Add labels and title
plt.xlabel('Methods')
plt.ylabel('Average Similarity')
plt.title('Experiment 2: Average Similarity for Vision and Imagery')
plt.xticks([r + bar_width/2 for r in range(len(methods))], methods)

# Add legend
plt.legend()

# Show the plot
plt.show()



In [None]:
# Experiment 3 is brain-optimized inference vs base model, BOI trials are marked as "target", base trials are marked as "distractor"

df_trial_exp3 = df_trial_exp[(df_trial_exp['experiment'] == 3)]
print(df_trial_exp3['method'].unique())
for method in df_trial_exp3['method'].unique():
    for mode in df_trial_exp3['mode'].unique():
        category_df = df_trial_exp3[(df_trial_exp3["mode"] == mode) & (df_trial_exp3["method"] == method)]
        avg_boi_similarity = category_df['target_similarity'].mean()
        avg_base_similarity = category_df['distractor_similarity'].mean()
        avg_boi_confidence = category_df['target_confidence'].mean()
        avg_base_confidence = category_df['distractor_confidence'].mean()
        print(f"Method: {method}, Mode: {mode}, Average BOI Similarity: {avg_boi_similarity:.3f}, Average BOI Confidence: {avg_boi_confidence:.3f}, Average Base Model Similarity: {avg_base_similarity:.3f}, Average Base Model Confidence: {avg_base_confidence:.3f}")

import matplotlib.pyplot as plt

# Data
methods = df_trial_exp3['method'].unique()
modes = df_trial_exp3['mode'].unique()
avg_boi_similarity = []
avg_base_similarity = []
avg_boi_confidence = []
avg_base_confidence = []

# Calculate averages
for method in methods:
    for mode in modes:
        category_df = df_trial_exp3[(df_trial_exp3["mode"] == mode) & (df_trial_exp3["method"] == method)]
        avg_boi_similarity.append(category_df['target_similarity'].mean())
        avg_base_similarity.append(category_df['distractor_similarity'].mean())
        avg_boi_confidence.append(category_df['target_confidence'].mean())
        avg_base_confidence.append(category_df['distractor_confidence'].mean())

# Plotting
x = range(len(methods) * len(modes))
width = 0.35

fig, ax = plt.subplots()
rects1 = ax.bar(x, avg_boi_similarity, width, label='BOI Similarity')
rects2 = ax.bar([i + width for i in x], avg_base_similarity, width, label='Base Model Similarity')

# Add labels, title, and legend
ax.set_ylabel('Average Similarity')
ax.set_title('Experiment 3: BOI vs Base Model Similarity')
ax.set_xticks([i + width/2 for i in x])
ax.set_xticklabels([f'{method}\n{mode}' for method in methods for mode in modes])
ax.legend()

# Show the plot
plt.show()
