In [1]:
import os, sys, shutil
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib as plt
from PIL import Image
from matplotlib.lines import Line2D
import matplotlib as mpl
import math
import matplotlib.image as mpimg
import random
from datetime import datetime
# os.chdir("..")
experiment_version = 1
os.makedirs(f"stimuli_v{experiment_version}", exist_ok=True)

In [110]:
data_path = "/export/raid1/home/kneel027/Second-Sight/output/mental_imagery_paper/"
#Experiment column key:
# 1: Experiment 1, mindeye vs second sight
# 2: Experiment 2, second sight two way identification
# 3: Experiment 3, mental imagery two way identification
df_exp = pd.DataFrame(columns=["experiment", "stim1", "stim2", "stim3", "sample", "subject", "target_on_left", "iter_count", "is_low", "method", "catch_trial", "rep", "mode", "trial_rep"])
i=0
random_count = 0
for subj in [1,2,5,7]: #1,2,5,7
    #Experiment 3, mental imagery two way identification
    random_count = {"mindeye" : 0, "braindiffuser" : 0, "tagaki" : 0, "secondsight" : 0}
    random_low_count = {"mindeye" : 0, "braindiffuser" : 0, "tagaki" : 0, "secondsight" : 0}
    for mode in ["vision", "imagery"]:
        for sample in range(12):
            gt_sample = f"mi_{sample}_ground_truth"
            for method in ["mindeye", "braindiffuser", "tagaki", "secondsight"]: 
                for rep in range(5):
                    for trial_rep in range(4):
                        order = random.randrange(2)
                        if method in ["mindeye", "secondsight"]:
                            shuffled_idx = [i for i in range(982)]
                            random.shuffle(shuffled_idx)
                            random_sample = shuffled_idx[sample]
                            sample_names = [f"{random_sample}_subject{subj}_{method}", f"mi_{sample}_subject{subj}_{method}_{mode}_{rep}"]
                        else:
                            sample_names = [f"{random_count[method]}_subject{subj}_{method}_random", f"mi_{sample}_subject{subj}_{method}_{mode}_{rep}"]
                            random_count[method] +=1
                        
                        left_sample = sample_names.pop(order)
                        right_sample = sample_names.pop()
                        #count how many iterations the sample searched for
                        if method == "secondsight":
                            iter_files = os.listdir(f"../output/ss_mi_{mode}/subject{subj}/{sample}/")
                            iter_count = sum(1 for file in iter_files if "iter_" in file and file.endswith(".png"))
                        else:
                            iter_count=None
                        
                        df_exp.loc[i] = {"experiment" : 3, "stim1" : gt_sample, "stim2" : left_sample, "stim3" : right_sample, "sample" : sample, "subject" : subj, 
                                        "target_on_left" : order == 1, "iter_count" : iter_count, "is_low" : False, "method" : method, "catch_trial" : None, "rep" : rep, "mode" : mode, "trial_rep" : trial_rep}
                        i+=1
                if method != "secondsight":
                    order = random.randrange(2)
                    sample_names = [f"{random_low_count[method]}_subject{subj}_{method}_low_random", f"mi_{sample}_subject{subj}_{method}_{mode}_low"]
                    random_low_count[method] +=1
                    left_sample = sample_names.pop(order)
                    right_sample = sample_names.pop()
                    df_exp.loc[i] = {"experiment" : 3, "stim1" : gt_sample, "stim2" : left_sample, "stim3" : right_sample, "sample" : sample, "subject" : subj, 
                                    "target_on_left" : order == 1, "iter_count" : None, "is_low" : True, "method" : method, "catch_trial" : None, "rep" : rep, "mode" : mode, "trial_rep" : 0}
                    i+=1                 
    print(i)
print(df_exp)
df_exp = df_exp.sample(frac=1)
print(df_exp)

  0%|          | 0/982 [00:00<?, ?it/s]

100%|██████████| 982/982 [00:09<00:00, 107.75it/s]


1964
3956


100%|██████████| 982/982 [00:15<00:00, 63.70it/s]


5920
7912


 12%|█▏        | 119/982 [00:02<00:18, 47.92it/s]


KeyboardInterrupt: 

In [None]:
# Check if all images are present in final stimuli folder
count_not_found = 0
stim_path = "/home/naxos2-raid25/kneel027/home/kneel027/Second-Sight/experiments/final_stimuli/"
stim_path2 = "/home/naxos2-raid25/kneel027/home/kneel027/Second-Sight/experiments/new_random_stimuli/"
for index, row in df_exp.iterrows():
    if not (os.path.exists(f"{stim_path}{row['stim1']}.png") or os.path.exists(f"{stim_path2}{row['stim1']}.png")):
        print(f"{row['stim1']}.png")
        count_not_found += 1
    if not (os.path.exists(f"{stim_path}{row['stim2']}.png") or os.path.exists(f"{stim_path2}{row['stim2']}.png")):
        print(f"{row['stim2']}.png")
        count_not_found += 1
    if not (os.path.exists(f"{stim_path}{row['stim3']}.png") or os.path.exists(f"{stim_path2}{row['stim3']}.png")):
        print(f"{row['stim3']}.png")
        count_not_found += 1
print(count_not_found)

0


In [None]:
#Add participant ID column
pIDs = []
for i in range(len(df_exp)):
    pIDs.append(i // 105)
df_exp.insert(0, "pID", pIDs)
print(len(df_exp[(df_exp['pID'] == 0)]))
#Add catch trials within each pID section
for pID in range(max(pIDs)):
    df_pid = df_exp[(df_exp['experiment'] == 2) & (df_exp['pID'] == pID)]
    
    # Ground truth catch trials
    gt_catch_trials = df_pid.sample(n=5)
    gt_catch_trials['catch_trial'] = "ground_truth"
    for index, row in gt_catch_trials.iterrows():
        
        order = random.randrange(2)
        ground_truth = row['stim1']
        stims = [row['stim2'], ground_truth]
        
        gt_catch_trials.at[index, 'stim2'] = stims.pop(order)
        gt_catch_trials.at[index, 'stim3'] = stims.pop()
        # Target on left here means the ground truth repeat is on the left
        gt_catch_trials.at[index, 'target_on_left'] = (order == 1)
        
    # repeated trial catch trials
    repeat_catch_trials_rep1 = df_pid.sample(n=5)
    repeat_catch_trials_rep1['catch_trial'] = "repeat"
    repeat_catch_trials_rep2 = repeat_catch_trials_rep1.copy()
    repeat_catch_trials_rep1['rep'] = 1
    repeat_catch_trials_rep2['rep'] = 2
    df_exp = pd.concat([df_exp, gt_catch_trials, repeat_catch_trials_rep1, repeat_catch_trials_rep2])
    
df_exp = df_exp.sample(frac=1).sort_values(by='pID', kind='mergesort')
print(df_exp)
print(len(df_exp[(df_exp['pID'] == 0)]))

105
       pID  experiment               stim1  \
6351     0           3   mi_5_ground_truth   
9723     0           2    905_ground_truth   
15720    0           3  mi_10_ground_truth   
14037    0           3   mi_2_ground_truth   
12451    0           2    291_ground_truth   
...    ...         ...                 ...   
1009   150           2    504_ground_truth   
7256   150           3   mi_4_ground_truth   
5268   150           1    656_ground_truth   
2946   150           3  mi_11_ground_truth   
5110   150           1    577_ground_truth   

                                     stim2  \
6351                  838_subject2_mindeye   
9723              246_subject5_secondsight   
15720        22_subject7_tagaki_low_random   
14037     58_subject7_braindiffuser_random   
12451              24_subject7_secondsight   
...                                    ...   
1009              171_subject1_secondsight   
7256                  466_subject2_mindeye   
5268                  656_sub

In [None]:
version = 4
df_exp.to_csv(f'../experiments/dataframes/experiment_v{version}.csv', index=False)
df_exp_tsv = df_exp[['pID', 'stim1', 'stim2', 'stim3']].copy()
df_exp_tsv.to_csv(f"../experiments/dataframes/experiment_v{version}_meadow_trials.tsv", sep="\t", index=False, header=False) 

# THE FOLLOWING CELLS ARE FOR PROCESSING RESPONSES

In [3]:
response_path = "human_preference_11_23/responses/"
dataframe_path = "human_preference_11_23/dataframes/"
df_experiment = pd.read_csv(dataframe_path + "experiment_v4.csv")
response_version = "8"
df_responses = pd.read_csv(f"{response_path}deployment_v{response_version}.csv")

In [6]:
df_responses.head()
df_trial = pd.DataFrame(columns=["experiment", "stim1", "stim2", "stim3", "sample", "subject", "target_on_left", "iter_count", "is_low", "method", "catch_trial", "rep", "trial_rep", "mode", "picked_left", "participant"])
df_experiment['picked_left'] = None
for index, row in tqdm(df_responses.iterrows()):
    if row['label'] == row['stim2_id']:
        picked_left = True
    elif row['label'] == row['stim3_id']:
        picked_left = False
    else:
        print("Error")
        break
    start_timestamp = row['start']
    end_timestamp = row['resp']
    start = datetime.fromisoformat(start_timestamp.replace("Z", "+00:00"))
    end = datetime.fromisoformat(end_timestamp.replace("Z", "+00:00"))
    # Calculate the difference in seconds
    time_difference_seconds = (end - start).total_seconds()
    
    df_trial.loc[index] = df_experiment[(df_experiment['stim1'] == row['stim1_name']) & (df_experiment['stim2'] == row['stim2_name']) & (df_experiment['stim3'] == row['stim3_name'])].iloc[0]
    if df_trial.loc[index, 'sample'] <= 5 and df_trial.loc[index, 'experiment'] == 3:
        df_trial.loc[index, "stimtype"] = "simple"
    else:
        df_trial.loc[index, "stimtype"] = "complex"
    df_trial.loc[index, 'picked_left'] = picked_left
    df_trial.loc[index, 'participant'] = row['participation']
    df_trial.loc[index, 'response_time'] = time_difference_seconds
    
df_trial["picked_target"] = df_trial["picked_left"] == df_trial["target_on_left"]
print(df_trial)

  df_trial.loc[index] = df_experiment[(df_experiment['stim1'] == row['stim1_name']) & (df_experiment['stim2'] == row['stim2_name']) & (df_experiment['stim3'] == row['stim3_name'])].iloc[0]
  df_trial.loc[index] = df_experiment[(df_experiment['stim1'] == row['stim1_name']) & (df_experiment['stim2'] == row['stim2_name']) & (df_experiment['stim3'] == row['stim3_name'])].iloc[0]
  df_trial.loc[index] = df_experiment[(df_experiment['stim1'] == row['stim1_name']) & (df_experiment['stim2'] == row['stim2_name']) & (df_experiment['stim3'] == row['stim3_name'])].iloc[0]
  df_trial.loc[index] = df_experiment[(df_experiment['stim1'] == row['stim1_name']) & (df_experiment['stim2'] == row['stim2_name']) & (df_experiment['stim3'] == row['stim3_name'])].iloc[0]
  df_trial.loc[index] = df_experiment[(df_experiment['stim1'] == row['stim1_name']) & (df_experiment['stim2'] == row['stim2_name']) & (df_experiment['stim3'] == row['stim3_name'])].iloc[0]
  df_trial.loc[index] = df_experiment[(df_experiment['s

      experiment               stim1                                 stim2  \
0              2    170_ground_truth              170_subject2_secondsight   
1              2    470_ground_truth              393_subject2_secondsight   
2              3   mi_6_ground_truth                  788_subject2_mindeye   
3              2    935_ground_truth              922_subject1_secondsight   
4              1    367_ground_truth                  367_subject1_mindeye   
...          ...                 ...                                   ...   
19029          2    190_ground_truth              190_subject2_secondsight   
19030          3  mi_10_ground_truth  mi_10_subject5_secondsight_imagery_4   
19031          2     14_ground_truth              474_subject7_secondsight   
19032          2    909_ground_truth              909_subject5_secondsight   
19033          3  mi_11_ground_truth         23_subject7_tagaki_low_random   

                                   stim3 sample subject target_




In [9]:
# number of participants
print("Total participants:", len(df_trial["participant"].unique()))

# Remove participants who failed the ground truth catch trial, no tolerance
participants_to_remove_rule1 = df_trial[(df_trial['catch_trial'] == 'ground_truth') & (df_trial['picked_target'] == False)]['participant'].unique()

# Remove participants who failed the repeat catch trial, with a 3 strike allowance
repeat_offenses = df_trial[(df_trial['catch_trial'] == 'repeat') & (df_trial['picked_target'] == False)].groupby('participant').size()
participants_to_remove_rule2 = repeat_offenses[repeat_offenses >= 4].index.tolist()

participants_to_remove = set(participants_to_remove_rule1).union(set(participants_to_remove_rule2))
filtered_df = df_trial[~df_trial['participant'].isin(participants_to_remove)]
print("Clean participants:", len(filtered_df["participant"].unique()))
print(len(df_trial), len(filtered_df))
print(participants_to_remove)
filtered_df.to_csv(f'{dataframe_path}filtered_responses_v{response_version}.csv', index=False)

Total participants: 159
Clean participants: 152
19034 18194
{'natural-panda', 'frank-grubworm', 'modern-mustang', 'wondrous-caiman', 'exact-quetzal', 'famous-monster', 'composed-penguin'}


In [7]:
filtered_df = pd.read_csv(f'{dataframe_path}filtered_responses_v{response_version}.csv')
unique_rows_df = df_experiment.merge(
    filtered_df,
    on=["stim1", "stim2", "stim3"],
    how="left",
    indicator=True
).loc[lambda x: x['_merge'] == 'left_only'].drop(columns='_merge')

# Number of rows in the resulting dataframe
num_rows_unique = unique_rows_df.shape[0]
print("Number of unique rows:", num_rows_unique)

Number of unique rows: 0


In [8]:

df_trial_exp = filtered_df[filtered_df['catch_trial'].isnull()]

print(len(filtered_df), len(df_trial_exp))
# df_trial_exp1 = df_trial_exp[(df_trial_exp['experiment'] == 2)]
# df_trial_exp1 = df_trial[(df_trial['experiment'] == 3) & (df_trial['mode'] == "imagery")]
df_trial_exp1 = df_trial[(df_trial['experiment'] == 3) & (df_trial['method'] == "tagaki") & (df_trial['mode'] == "vision") & (df_trial['is_low'] == False)]
print("Number of experiment trials", len(df_trial_exp1))
print(len(df_trial_exp1[df_trial_exp1["picked_target"]]) / len(df_trial_exp1))

18194 15903
Number of experiment trials 1014
0.571992110453649


In [160]:
df_trial_exp.head(100)

Unnamed: 0,experiment,stim1,stim2,stim3,sample,subject,target_on_left,iter_count,is_low,method,catch_trial,rep,mode,picked_left,participant,picked_target,stimtype
0,2,170_ground_truth,170_subject2_secondsight,263_subject2_secondsight,170,2,True,4.0,False,,,0,nsd_vision,True,artistic-joey,True,
2,3,mi_6_ground_truth,788_subject2_mindeye,mi_6_subject2_mindeye_vision_2,6,2,False,,False,mindeye,,2,vision,False,artistic-joey,True,complex
3,2,935_ground_truth,922_subject1_secondsight,935_subject1_secondsight,935,1,False,3.0,False,,,0,nsd_vision,False,artistic-joey,True,
4,1,367_ground_truth,367_subject1_mindeye,367_subject1_secondsight,367,1,False,3.0,False,,,0,nsd_vision,True,artistic-joey,False,
5,1,419_ground_truth,419_subject7_secondsight,419_subject7_mindeye,419,7,True,2.0,False,,,0,nsd_vision,False,artistic-joey,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,1,682_ground_truth,682_subject5_mindeye,682_subject5_secondsight,682,5,False,4.0,False,,,0,nsd_vision,False,artistic-joey,True,
106,1,169_ground_truth,169_subject7_secondsight,169_subject7_mindeye,169,7,True,1.0,False,,,0,nsd_vision,False,artistic-joey,False,
107,2,628_ground_truth,240_subject1_secondsight,628_subject1_secondsight,628,1,False,3.0,False,,,0,nsd_vision,False,artistic-joey,True,
108,2,551_ground_truth,551_subject1_secondsight,410_subject1_secondsight,551,1,True,5.0,False,,,0,nsd_vision,True,artistic-joey,True,


In [9]:
df_trial_exp.head(100)
for index, row in df_trial_exp[(df_trial_exp['experiment'] == 3)].iterrows():
    print(row['stim1'], row['stim2'], row['stim3'])
    for stim in [row['stim2'], row['stim3']]:
        elements = stim.split("_")
        if "vision" in elements:
            df_trial_exp.at[index, "mode"] = "vision"
        elif "imagery" in elements:
            df_trial_exp.at[index, "mode"] = "imagery"
    sample_id = int(row['stim1'].split("_")[1])
    if sample_id <= 5:
        df_trial_exp.at[index, "stimtype"] = "simple"
    else:
        df_trial_exp.at[index, "stimtype"] = "complex"

print(len(df_trial_exp[(df_trial_exp['experiment'] == 3)]))
print(len(df_trial_exp[(df_trial_exp['experiment'] == 3) & (df_trial_exp['mode'] == "vision")]))
print(len(df_trial_exp[(df_trial_exp['experiment'] == 3) & (df_trial_exp['mode'] == "imagery")]))
print(len(df_trial_exp[(df_trial_exp['experiment'] == 3) & (df_trial_exp['stimtype'] == "simple")]))
print(len(df_trial_exp[(df_trial_exp['experiment'] == 3) & (df_trial_exp['stimtype'] == "complex")]))
df_trial_exp.to_csv(f'{dataframe_path}responses_v{response_version}_clean.csv', index=False)

mi_6_ground_truth 788_subject2_mindeye mi_6_subject2_mindeye_vision_2
mi_11_ground_truth mi_11_subject2_secondsight_vision_3 433_subject2_secondsight
mi_4_ground_truth mi_4_subject2_secondsight_imagery_1 226_subject2_secondsight
mi_1_ground_truth 265_subject5_tagaki_random mi_1_subject5_tagaki_imagery_1
mi_3_ground_truth mi_3_subject1_tagaki_imagery_4 318_subject1_tagaki_random
mi_11_ground_truth 224_subject7_tagaki_random mi_11_subject7_tagaki_vision_1
mi_4_ground_truth 693_subject1_secondsight mi_4_subject1_secondsight_imagery_3
mi_3_ground_truth mi_3_subject1_tagaki_vision_0 61_subject1_tagaki_random
mi_6_ground_truth 136_subject7_tagaki_random mi_6_subject7_tagaki_vision_4
mi_8_ground_truth 599_subject5_mindeye mi_8_subject5_mindeye_imagery_0
mi_2_ground_truth mi_2_subject7_braindiffuser_imagery_0 280_subject7_braindiffuser_random
mi_5_ground_truth 111_subject7_tagaki_random mi_5_subject7_tagaki_vision_2
mi_0_ground_truth mi_0_subject1_mindeye_imagery_4 897_subject1_mindeye
mi_1_gr

In [48]:
import pandas as pd
import json
from collections import defaultdict

# Load the JSON data again
file_path = f'{response_path}deployment_v{response_version}_tree.json'

with open(file_path, 'r') as file:
    data = json.load(file)

# Function to determine Email Consent value
def get_email_consent(value):
    if isinstance(value, str):
        if "yes" in value.lower():
            return True
        elif "no" in value.lower():
            return False
    return False  # Default to False if not explicitly "yes"

# Dictionary to track unique participants and number of sessions
participant_sessions = defaultdict(int)
participant_email_consent = defaultdict(bool)

# List to store data for CSV
csv_data = []

# Iterate over the high-level session entries
for participant_id, session_data in data.items():
    tasks = session_data.get('tasks', [])
    # Ensure all tasks have "finished" status
    if all(task.get('status') == 'finished' for task in tasks):
        # Select the 3rd task data
        third_task_data = tasks[2] if len(tasks) > 2 else None
        if third_task_data:
            # Construct a unique key from first and last name
            first_name = third_task_data.get('First Name', '').strip()
            last_name = third_task_data.get('Last Name', '').strip()
            participant_key = f"{first_name} {last_name}"
            # Check if participant is a duplicate by name
            if participant_sessions[participant_key] == 0:
                # If not a duplicate, add to CSV data
                csv_data.append({
                    "participant": participant_id,
                    "Age": third_task_data.get('age', ''),
                    "First Name": third_task_data.get('First Name', ''),
                    "Last Name": third_task_data.get('Last Name', ''),
                    "Phone Number": third_task_data.get('Phone Number', ''),
                    "Email Address": third_task_data.get('Email Address', ''),
                    "Full Mailing Address": third_task_data.get('Full Mailing Address', ''),
                    "Email Consent": get_email_consent(third_task_data.get('Would you like a copy of the consent form emailed to you? (the form on the previous page)', '')),
                    "num_sessions": 1
                })
            # Increment session count for this participant
            participant_sessions[participant_key] += 1
            participant_email_consent[participant_key] = participant_email_consent[participant_key] or get_email_consent(third_task_data.get('Would you like a copy of the consent form emailed to you? (the form on the previous page)', ''))

# Update the num_sessions for duplicates
for row in csv_data:
    name_key = f"{row['First Name']} {row['Last Name']}"
    row["num_sessions"] = participant_sessions[name_key]

# Create a DataFrame and write the CSV data
csv_df = pd.DataFrame(csv_data)
csv_df["Consented"] = True
csv_file_path = f'{response_path}participant_info_v{response_version}.csv'
csv_df.to_csv(csv_file_path, index=False)

In [49]:
print(csv_df['num_sessions'].sum())
print(csv_df['num_sessions'].value_counts())

159
1     22
6      9
2      8
3      7
4      3
7      2
5      2
10     1
Name: num_sessions, dtype: int64
