In [None]:

import os
import pandas as pd
import ast

topdir = '/Users/sm6511/Desktop/Prediction-Accomodation-Exp'
study = 'PrePilot4.0'
date = '2026-01-06'
datadir = os.path.join(topdir, f'data/{study}/Predict')
datadir2 = os.path.join(topdir, f'data/{study}/Accommodate')

def extract_basic_info(csv_path):
    df = pd.read_csv(csv_path)

    # Participant ratings
    ratings_series = df['sliderRatings'].dropna()
    if len(ratings_series) != 1:
        raise ValueError(f"Should only be one slider rating in {csv_path}")

    # Parse the string representation of the list into an actual list
    slider_ratings = ast.literal_eval(ratings_series.iloc[0])

    cats_raw = (
        df['testing_categories']
        .dropna()
        .astype(str)
    )

    # Take the first entry
    testing_categories = cats_raw.iloc[0].split(',')

    #strip whitespace
    testing_categories = [c.strip() for c in testing_categories]

    participant_id = df['participant_id'].dropna().iloc[0]

    return {
        'slider_ratings': slider_ratings,
        'testing_categories': testing_categories,
        'condition_order': participant_id      
    }



all_participants = []

#Repeat for predict/accommodate
for fname in os.listdir(datadir):
    if fname.endswith('.csv') and fname:
        participant_id = fname[:3]
        if date not in fname:  
            continue
        csv_path = os.path.join(datadir, fname)
        print(csv_path)
        info = extract_basic_info(csv_path)
        all_participants.append(info)

for fname in os.listdir(datadir2):
    if fname.endswith('.csv') and fname:
        participant_id = fname[:3]
        if date not in fname:  
            continue
        csv_path = os.path.join(datadir2, fname)
        print(csv_path)
        info = extract_basic_info(csv_path)
        all_participants.append(info)



In [33]:
print(all_participants)

[{'slider_ratings': [7, 8, 8, 3, 5, 4, 5, 5], 'testing_categories': ['medium', 'high', 'high', 'medium', 'medium', 'low', 'medium', 'low'], 'condition_order': np.float64(15.0)}, {'slider_ratings': [7, 5, 4, 3, 3, 4, 4, 3], 'testing_categories': ['medium', 'high', 'high', 'medium', 'low', 'medium', 'medium', 'low'], 'condition_order': np.float64(3.0)}, {'slider_ratings': [9, 6, 8, 7, 6, 7, 7, 9], 'testing_categories': ['medium', 'medium', 'low', 'high', 'low', 'medium', 'medium', 'high'], 'condition_order': np.float64(19.0)}, {'slider_ratings': [4, 7, 4, 5, 3, 4, 5, 3], 'testing_categories': ['medium', 'low', 'high', 'high', 'low', 'medium', 'medium', 'medium'], 'condition_order': np.float64(1.0)}, {'slider_ratings': [4, 8, 9, 7, 3, 5, 8, 4], 'testing_categories': ['low', 'high', 'high', 'medium', 'low', 'medium', 'medium', 'medium'], 'condition_order': np.float64(7.0)}, {'slider_ratings': [4, 4, 4, 6, 4, 6, 6, 6], 'testing_categories': ['high', 'medium', 'low', 'low', 'medium', 'high',

In [34]:
from scipy.stats import spearmanr
import numpy as np

cat_map = {'low': 1, 'medium': 2, 'high': 3}

rows = []

for pid, p in enumerate(all_participants):
    ratings = p['slider_ratings']
    cats = p['testing_categories']
    condition_order = int(p['condition_order'])
    # sanity check that lengths match
    assert len(ratings) == len(cats)

    # Correlate 
    cat_nums = [cat_map[c] for c in cats]
    rho, pval = spearmanr(cat_nums, ratings)

    # Mean rating per category
    df_tmp = pd.DataFrame({
        'rating': ratings,
        'category': cats
    })

    means = df_tmp.groupby('category')['rating'].mean()

    rows.append({
        'participant': condition_order,
        'spearman_rho': rho,
        'p_value': pval,
        'mean_low': means.get('low', np.nan),
        'mean_medium': means.get('medium', np.nan),
        'mean_high': means.get('high', np.nan)
    })

df_summary = pd.DataFrame(rows)
df_summary = df_summary.sort_values(by='participant').reset_index(drop=True)

print(df_summary)



    participant  spearman_rho   p_value  mean_low  mean_medium  mean_high
0             1      0.080064  0.850520       5.0         4.00        4.5
1             1      0.920737  0.001172       2.5         3.75        7.0
2             2      0.780869  0.022172       3.0         4.50        8.0
3             2      0.624695  0.097750       4.5         5.25        7.5
4             3      0.648886  0.081719       3.0         4.50        4.5
5             3      0.931381  0.000767       2.5         5.75        8.5
6             4      0.510688  0.195920       4.0         4.75        6.0
7             4      0.948683  0.000325       4.0         5.50        8.0
8             5      0.755779  0.030071       3.5         5.25        7.5
9             5      0.507565  0.199133       4.5         5.25        8.0
10            6      0.954669  0.000225       3.5         5.25        7.0
11            6      0.039778  0.925495       5.5         7.00        6.5
12            7     -0.244949  0.55876

In [35]:
group_means = (
    df_summary[['mean_low', 'mean_medium', 'mean_high']]
    .agg(['mean', 'sem'])
)

print(group_means)


      mean_low  mean_medium  mean_high
mean    3.8125      5.15625   6.487500
sem     0.1875      0.13358   0.215198
