In [2]:
import pandas as pd
from scipy.stats import pearsonr
from utils import agg_remove_nans
import statsmodels.api as sm 
from statsmodels.formula.api import ols

In [3]:
cols_change = {'QID21': 'prolific_id', 'QID25': 'h_consent', 'QID22': 'l_consent'}
df = pd.read_csv('results/human_exp/motivation-values.csv').rename(columns=cols_change).drop(index=[0, 1]).reset_index(drop=True)

In [4]:
df_all = []
for subset in ['1', '2', '3', '4']:
    df_subset = []
    for cond in ['Human', 'LLM']:
        # get the subset of the df for the given condition and subset, and keep only the question columns
        df_cond_subset = df[(df['set'] == subset) & (df['target'] == cond)]
        df_cond_subset = df_cond_subset.drop(columns=['set', 'target']).dropna(axis=1)
        df_cond_subset = df_cond_subset[[f'motivation tasks-{cond.lower()[0]}{subset}_{i}' for i in range(1,16)]]
        df_cond_subset = df_cond_subset.astype(float).T
        
        df_cond_subset.columns = [f'HP_{i}_{cond}' for i in range(1, df_cond_subset.shape[1]+1)]
        df_cond_subset[f'HP_mean_{cond}'] = df_cond_subset.mean(axis=1)  # average human score
        
        df_tasks = pd.read_csv(f'data/human_exp/tasks_{subset}.csv')
        df_tasks.index = df_cond_subset.index
        df_cond_subset = pd.concat([df_cond_subset, df_tasks], axis=1)        
        
        df_model = pd.read_csv(f'results/merged/all.csv')
        df_model = agg_remove_nans(df_model, [f'motivation_score--all--none'])
        df_cond_subset = df_cond_subset.merge(df_model[['sub_task_id', f'motivation_score--all--none']], 
                                              on=['sub_task_id'], how='left').rename(columns=
                                                {f'motivation_score--all--none': 'all'}) 
        df_subset.append(df_cond_subset)
    
    # have all the columns (and values) from both conditions in one df df_subset. drop columns that appear in both dfs
    df_subset = pd.concat(df_subset, axis=1)
    df_subset = df_subset.loc[:,~df_subset.columns.duplicated()]
    df_all.append(df_subset)
    
df_all = pd.concat(df_all).reset_index(drop=True)  # this concats all subsets one on top of the other

In [5]:
#pairwise correlation between HP_mean_Human and the models' motivation score
metrics = ['HP_mean_Human', 'HP_mean_LLM', 'all']
for m1 in metrics:
    for m2 in metrics:
        if m1 != m2:
            df_pair = df_all[[m1, m2]].dropna()
            r, p = pearsonr(df_pair[m1], df_pair[m2])
            print(f'Correlation between {m1} and {m2}: r={r:.2f}, p={p:.5f}, significant: {p < 0.005}')

Correlation between HP_mean_Human and HP_mean_LLM: r=-0.13, p=0.30958, significant: False
Correlation between HP_mean_Human and all: r=0.47, p=0.00013, significant: True
Correlation between HP_mean_LLM and HP_mean_Human: r=-0.13, p=0.30958, significant: False
Correlation between HP_mean_LLM and all: r=0.39, p=0.00203, significant: True
Correlation between all and HP_mean_Human: r=0.47, p=0.00013, significant: True
Correlation between all and HP_mean_LLM: r=0.39, p=0.00203, significant: True


In [6]:
# linear regression between HP_mean_Human and HP_mean_LLM (explaining variables) to motivation_score (dependent variable)
reg_model = ols('motivation_score ~ HP_mean_Human + HP_mean_LLM', data=df_all).fit()
anova_table = sm.stats.anova_lm(reg_model, typ=2)
print(reg_model.summary())

                            OLS Regression Results                            
Dep. Variable:       motivation_score   R-squared:                       0.434
Model:                            OLS   Adj. R-squared:                  0.414
Method:                 Least Squares   F-statistic:                     21.85
Date:                Mon, 23 Feb 2026   Prob (F-statistic):           9.05e-08
Time:                        20:49:54   Log-Likelihood:                -266.36
No. Observations:                  60   AIC:                             538.7
Df Residuals:                      57   BIC:                             545.0
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
Intercept       -40.5125     13.660     -2.966

In [7]:
# create the same model but with interaction between HP_mean_Human and HP_mean_LLM
model_interaction = ols('motivation_score ~ HP_mean_Human * HP_mean_LLM', data=df_all).fit()
anova_table_interaction = sm.stats.anova_lm(model_interaction, typ=2)
print(model_interaction.summary())

                            OLS Regression Results                            
Dep. Variable:       motivation_score   R-squared:                       0.446
Model:                            OLS   Adj. R-squared:                  0.416
Method:                 Least Squares   F-statistic:                     15.00
Date:                Mon, 23 Feb 2026   Prob (F-statistic):           2.77e-07
Time:                        20:49:54   Log-Likelihood:                -265.74
No. Observations:                  60   AIC:                             539.5
Df Residuals:                      56   BIC:                             547.9
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------
Intercept             