In [1]:

import os
import pandas as pd
import ast

topdir = '/Users/sm6511/Desktop/Prediction-Accomodation-Exp'
study = 'Study1.0'
date = '2026-01-12'
datadir = os.path.join(topdir, f'data/{study}/Predict')
datadir2 = os.path.join(topdir, f'data/{study}/Accommodate')

def extract_basic_info(csv_path):
    df = pd.read_csv(csv_path)

    # Participant ratings
    ratings_series = df['sliderRatings'].dropna()
    if len(ratings_series) != 1:
        raise ValueError(f"Should only be one slider rating in {csv_path}")

    # Parse the string representation of the list into an actual list
    slider_ratings = ast.literal_eval(ratings_series.iloc[0])

    cats_raw = (
        df['testing_categories']
        .dropna()
        .astype(str)
    )

    # Take the first entry
    testing_categories = cats_raw.iloc[0].split(',')

    #strip whitespace
    testing_categories = [c.strip() for c in testing_categories]

    participant_id = df['participant_id'].dropna().iloc[0]

    return {
        'slider_ratings': slider_ratings,
        'testing_categories': testing_categories,
        'condition_order': participant_id      
    }



all_participants = []

#Repeat for predict/accommodate
for fname in os.listdir(datadir):
    if fname.endswith('.csv') and fname:
        participant_id = fname[:3]
        if date not in fname:  
            continue
        csv_path = os.path.join(datadir, fname)
        print(csv_path)
        info = extract_basic_info(csv_path)
        all_participants.append(info)

for fname in os.listdir(datadir2):
    if fname.endswith('.csv') and fname:
        participant_id = fname[:3]
        if date not in fname:  
            continue
        csv_path = os.path.join(datadir2, fname)
        print(csv_path)
        info = extract_basic_info(csv_path)
        all_participants.append(info)



/Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study1.0/Predict/100_test_2026-01-12_14h37.18.674.csv
/Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study1.0/Predict/060_test_2026-01-12_14h34.41.813.csv
/Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study1.0/Predict/033_test_2026-01-12_14h32.25.700.csv
/Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study1.0/Predict/119_test_2026-01-12_14h39.08.922.csv
/Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study1.0/Predict/020_test_2026-01-12_13h32.25.702.csv
/Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study1.0/Predict/067_test_2026-01-12_11h48.14.374.csv
/Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study1.0/Predict/069_test_2026-01-12_14h36.06.918.csv
/Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study1.0/Predict/090_test_2026-01-12_14h36.31.556.csv
/Users/sm6511/Desktop/Prediction-Accomodation-Exp/data/Study1.0/Predict/022_test_2026-01-12_13h31.58.345.csv
/Users/sm6511/Deskt

In [2]:
print(all_participants)

[{'slider_ratings': [5, 3, 3, 5, 3, 5, 5, 4], 'testing_categories': ['high', 'low', 'low', 'medium', 'medium', 'medium', 'high', 'medium'], 'condition_order': np.float64(100.0)}, {'slider_ratings': [5, 4, 6, 4, 4, 3, 7, 7], 'testing_categories': ['medium', 'low', 'medium', 'medium', 'low', 'medium', 'high', 'high'], 'condition_order': np.float64(60.0)}, {'slider_ratings': [2, 6, 7, 5, 6, 4, 3, 6], 'testing_categories': ['low', 'high', 'high', 'medium', 'medium', 'low', 'medium', 'medium'], 'condition_order': np.float64(33.0)}, {'slider_ratings': [7, 7, 6, 10, 3, 5, 3, 6], 'testing_categories': ['high', 'medium', 'low', 'high', 'medium', 'medium', 'low', 'medium'], 'condition_order': np.float64(119.0)}, {'slider_ratings': [6, 7, 4, 6, 3, 4, 5, 6], 'testing_categories': ['medium', 'high', 'low', 'medium', 'low', 'medium', 'medium', 'high'], 'condition_order': np.float64(20.0)}, {'slider_ratings': [8, 8, 4, 7, 7, 6, 4, 4], 'testing_categories': ['high', 'high', 'medium', 'medium', 'medium

In [4]:
from scipy.stats import spearmanr
import numpy as np

cat_map = {'low': 1, 'medium': 2, 'high': 3}

rows = []

for pid, p in enumerate(all_participants):
    ratings = p['slider_ratings']
    cats = p['testing_categories']
    condition_order = int(p['condition_order'])
    # sanity check that lengths match
    assert len(ratings) == len(cats)

    # Correlate 
    cat_nums = [cat_map[c] for c in cats]
    rho, pval = spearmanr(cat_nums, ratings)

    # Mean rating per category
    df_tmp = pd.DataFrame({
        'rating': ratings,
        'category': cats
    })

    means = df_tmp.groupby('category')['rating'].mean()

    rows.append({
        'participant': condition_order,
        'spearman_rho': rho,
        'p_value': pval,
        'mean_low': means.get('low', np.nan),
        'mean_medium': means.get('medium', np.nan),
        'mean_high': means.get('high', np.nan)
    })

df_summary = pd.DataFrame(rows)
df_summary = df_summary.sort_values(by='participant').reset_index(drop=True)

print(df_summary.head(20))



    participant  spearman_rho   p_value  mean_low  mean_medium  mean_high
0             1      0.853766  0.006986       3.0         5.75        8.0
1             1      0.480384  0.228258       4.5         5.00        6.5
2             2      0.596668  0.118415       4.5         6.25        7.0
3             2      0.931381  0.000767       2.0         5.25        8.0
4             3      0.517112  0.189396       4.0         5.00        6.0
5             3      0.632456  0.092426       5.0         6.25        7.5
6             4      0.755779  0.030071       4.5         5.75        7.5
7             4      0.746390  0.033416       3.0         5.25        7.5
8             5      0.546608  0.160956       3.5         4.75        6.0
9             5      0.755779  0.030071       3.0         5.00        6.5
10            6      0.785674  0.020826       4.5         5.25        8.0
11            6      0.780869  0.022172       2.0         6.75        7.5
12            7      0.560449  0.14847

  rho, pval = spearmanr(cat_nums, ratings)
  rho, pval = spearmanr(cat_nums, ratings)


In [5]:
group_means = (
    df_summary[['mean_low', 'mean_medium', 'mean_high']]
    .agg(['mean', 'sem'])
)

print(group_means)


      mean_low  mean_medium  mean_high
mean  4.046823     5.232441   6.357860
sem   0.075310     0.044770   0.076994
