In [4]:

import os
import pandas as pd
import numpy as np
import ast
import csv

topdir = '/Users/sm6511/Desktop/Prediction-Accomodation-Exp'
study = 'Study2.0'
studyn = 220
dates = [
    '2026-02-07',
    '2026-02-09'
]
datadir = os.path.join(topdir, f'data/{study}/Predict')
datadir2 = os.path.join(topdir, f'data/{study}/Accommodate')


#Determine whether attention check was correct
def extract_basic_info(csv_path, condition):
    df = pd.read_csv(csv_path)
    df.columns = df.columns.str.strip()

    if condition == 'accommodate':
        att_col = 'button_3_correct.numClicks'
    if condition == 'predict':
        att_col = 'answer_3_right.numClicks'
    if att_col in df.columns:
        #print(f"Found attention check column in {os.path.basename(csv_path)}")
        att_rows = df[df[att_col].notna()]
        if not att_rows.empty:
            att_val = att_rows.iloc[0][att_col]
        else:
            att_val = np.nan
    else:
        att_val = np.nan  # column never existed for this participant
    if att_col not in df.columns:
        print(f"Missing attention check column in {os.path.basename(csv_path)}") #If the column doesn't exist, warn me as the data must be incomplete


    participant_id = df['participant_id'].dropna().iloc[0]

    return {
        'condition': condition,
        'condition_order': participant_id,
        'attention_check': att_val
    }


all_participants = []
#Repeat for predict/accommodate
for fname in os.listdir(datadir):
    if fname.endswith('.csv') and fname:
        participant_id = fname[:3]
        if not any(d in fname for d in dates):
            continue
        csv_path = os.path.join(datadir, fname)
        #print(csv_path)
        info = extract_basic_info(csv_path, condition='predict')
        all_participants.append(info)

for fname in os.listdir(datadir2):
    if fname.endswith('.csv') and fname:
        participant_id = fname[:3]
        if not any(d in fname for d in dates):
            continue
        csv_path = os.path.join(datadir2, fname)
        #print(csv_path)
        info = extract_basic_info(csv_path, condition='accommodate')
        all_participants.append(info)


#Combine and look at who failed the attention check
df_all = pd.DataFrame(all_participants)
print(df_all)
df_all['attention_check'] = pd.to_numeric(
    df_all['attention_check'],
    errors='coerce'
)
failed = df_all[df_all['attention_check'] == 0]

for cond in ['predict', 'accommodate']:
    failed_ids = failed.loc[
        failed['condition'] == cond,
        'condition_order'
    ].tolist()

    print(f"\n{cond.upper()} – failed attention check:")
    print(len(failed_ids))
    print(failed_ids if failed_ids else "None")



       condition  condition_order  attention_check
0        predict            215.0              1.0
1        predict             68.0              1.0
2        predict             12.0              1.0
3        predict            214.0              1.0
4        predict            197.0              1.0
..           ...              ...              ...
413  accommodate            148.0              1.0
414  accommodate            188.0              1.0
415  accommodate             22.0              1.0
416  accommodate            178.0              1.0
417  accommodate            125.0              1.0

[418 rows x 3 columns]

PREDICT – failed attention check:
0
None

ACCOMMODATE – failed attention check:
0
None


In [17]:
def delete_failed_files(failed_ids, datadir, dates):
    deleted = []

    for fname in os.listdir(datadir):
        if not fname.endswith('.csv'):
            continue
        if not any(d in fname for d in dates):
            continue

        participant_id = fname[:3].strip()

        if participant_id in failed_ids:
            path = os.path.join(datadir, fname)
            print("DELETING:", path)
            os.remove(path)
            deleted.append(fname)

    return deleted


for cond, directory in [
    ('predict', datadir),
    ('accommodate', datadir2)
]:
    failed_ids = (
        failed.loc[failed['condition'] == cond, 'condition_order']
        .astype(int)
        .astype(str)
        .str.zfill(3)
        .tolist()
    )

    print(f"\n{cond.upper()} failed IDs:", failed_ids)

    deleted_files = delete_failed_files(failed_ids, directory, dates)

    print(f"{cond.upper()} – deleted files:")
    print(len(deleted_files))
    print(deleted_files if deleted_files else "None")



PREDICT failed IDs: ['091', '149', '033', '208', '191', '016', '017', '080', '156', '152', '023', '025', '056', '113', '104', '061', '186', '049', '171', '207', '188', '118', '147', '094', '182', '214', '192', '149', '161', '087', '053', '065', '148', '162', '066', '096', '019', '082', '146', '030']
DELETING: /Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study2.0/Predict/091_test_2026-02-07_10h54.32.507.csv
DELETING: /Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study2.0/Predict/149_test_2026-02-07_12h18.15.146.csv
DELETING: /Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study2.0/Predict/033_test_2026-02-07_10h47.10.696.csv
DELETING: /Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study2.0/Predict/208_test_2026-02-07_10h07.42.991.csv
DELETING: /Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study2.0/Predict/191_test_2026-02-07_11h06.43.750.csv
DELETING: /Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study2.0/Predict/016_test_2026-02-07_0

In [5]:
'''Check for duplicate or missing entries'''

observed_ids = (
    df_all['condition_order']
    .dropna()
    .astype(float)
    .astype(int)
)

expected_ids = set(range(1, studyn + 1))
observed_ids_set = set(observed_ids)
missing_ids = sorted(expected_ids - observed_ids_set)

# Print as 3-digit IDs
missing_ids_str = [f"{i:03d}" for i in missing_ids]

print("\nMISSING CONDITION ORDERS (001–220):")
print(missing_ids_str if missing_ids_str else "None ")

expected_ids = set(range(1, studyn + 1))

print("\nMISSING CONDITION ORDERS (within each condition):")
for cond in df_all['condition'].unique():
    cond_orders = (
        df_all[df_all['condition'] == cond]['condition_order']
        .dropna()
        .astype(int)
    )
    
    missing = sorted(expected_ids - set(cond_orders))
    missing_str = [f"{i:03d}" for i in missing]
    
    if missing:
        print(f"{cond.upper()}: {missing_str}")
    else:
        print(f"{cond.upper()}: None")

#crete dataframe of missing rows

missing_rows = []

expected_ids = set(range(1, studyn + 1))

for cond in df_all['condition'].unique():
    cond_orders = (
        df_all[df_all['condition'] == cond]['condition_order']
        .dropna()
        .astype(int)
    )

    missing = expected_ids - set(cond_orders)

    for mid in missing:
        missing_rows.append({
            'condition': cond,
            'condition_order': mid,
            'attention_check': np.nan  # not applicable
        })

missing_df = pd.DataFrame(missing_rows)
# Check duplicates **within each condition**
print("\nDUPLICATE CONDITION ORDERS (within each condition):")
for cond in df_all['condition'].unique():
    cond_duplicates = (
        df_all[df_all['condition'] == cond]
        .duplicated(subset='condition_order', keep=False)
    )
    dup_df = df_all[(df_all['condition'] == cond) & cond_duplicates]
    
    if dup_df.empty:
        print(f"{cond.upper()}: None")
    else:
        print(f"{cond.upper()}:")
        print(dup_df[['condition_order', 'condition']].sort_values('condition_order'))



MISSING CONDITION ORDERS (001–220):
['002', '010', '013', '023', '049', '087', '094', '113', '182', '186', '210']

MISSING CONDITION ORDERS (within each condition):
PREDICT: ['002', '010', '013', '023', '049', '087', '094', '113', '182', '186', '210']
ACCOMMODATE: ['002', '010', '013', '023', '049', '087', '094', '113', '182', '186', '210']

DUPLICATE CONDITION ORDERS (within each condition):
PREDICT: None
ACCOMMODATE: None


In [3]:
import csv

def generate_failed_participants_csv_by_condition(
    failed_df,
    output_file="failed_participants_by_condition.csv",
    url_map=None,
    pad=3
):
    """
    Generate a CSV with Pavlovia links for failed attention check participants,
    one link per participant per condition.

    Parameters
    ----------
    failed_df : pd.DataFrame
        Must have columns: ['condition_order', 'condition']
    output_file : str
        Name of the CSV file to save
    url_map : dict
        Mapping from condition name to URL prefix
        {'predict': 'https://run.pavlovia.org/montesinos7/test?condition=',
               'accommodate': 'https://run.pavlovia.org/montesinos7/explain2?condition='}
    pad : int
        Number of digits to pad participant IDs (e.g., 3 -> '001')
    """
    if url_map is None:
        url_map = {
            'predict': 'https://run.pavlovia.org/montesinos7/predict2?condition=',
            'accommodate': 'https://run.pavlovia.org/montesinos7/accommodate2?condition='
        }

    with open(output_file, "w", newline="") as f:
        writer = csv.writer(f)
        for _, row in failed_df.iterrows():
            pid_padded = str(int(float(row['condition_order']))).zfill(pad)
            url = url_map.get(row['condition'], None)
            if url:
                writer.writerow([url + pid_padded])

    print(f"Saved {len(failed_df)} URLs to {output_file}")



#Combine missing runs and failed runs
failed_plus_missing = pd.concat(
    [failed[['condition', 'condition_order']], 
     missing_df[['condition', 'condition_order']]],
    ignore_index=True
)


# Failed attention check DataFrame:
failed_plus_missing['condition_order'] = failed_plus_missing['condition_order'].astype(float)  # just in case
generate_failed_participants_csv_by_condition(
    failed_plus_missing,
    output_file="/Users/sm6511/Desktop/Prediction-Accomodation-Exp/ConditionFiles-Prolific/failed_participants_combined.csv"
)



Saved 82 URLs to /Users/sm6511/Desktop/Prediction-Accomodation-Exp/ConditionFiles-Prolific/failed_participants_combined.csv
