In [1]:
import pandas as pd
import statsmodels.formula.api as smf
import numpy as np

# Load the dataset from the given directory
file_path = "/work/student_folders/markus_folder/data/merged_logfile_145_A.csv"
df_145_A = pd.read_csv(file_path)

# Calculate mean reaction time per participant
df_145_A['mean_rt_participant'] = df_145_A['rt_select'].mean()

df_145_A

# Step 1: Fit the linear regression model with log_rt_select as the outcome,
# and move_count, move_duration, and mean_rt_participant as predictors
model = smf.mixedlm("rt_select ~ n_moves", data=df_145_A, groups=df_145_A["mean_rt_participant"], re_formula="~1")
result = model.fit()
# Print the summary of the regression model
print(result.summary())

# Get the residuals from the model
df_145_A['residuals'] = result.resid

# Check residuals
print(df_145_A[['session', 'residuals']].head())

# Step 2: Adjust residuals so that the lowest residual becomes 0
min_residual = df_145_A['residuals'].min()  # Find the minimum residual
df_145_A['adjusted_residuals'] = df_145_A['residuals'] - min_residual  # Subtract the minimum from all residuals

# Optionally, save results
#df_145.to_csv("/work/student_folders/markus_folder/df_1_with_residuals.csv", index=False)


         Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: rt_select
No. Observations: 160     Method:             REML     
No. Groups:       1       Scale:              0.0780   
Min. group size:  160     Log-Likelihood:     -28.0358 
Max. group size:  160     Converged:          Yes      
Mean group size:  160.0                                
-------------------------------------------------------
          Coef.   Std.Err.     z    P>|z| [0.025 0.975]
-------------------------------------------------------
Intercept 0.599        0.282  2.122 0.034  0.046  1.153
n_moves   0.216        0.017 12.753 0.000  0.183  0.249
Group Var 0.078 10020548.694                           

   session  residuals
0        1   0.926398
1        1   0.401146
2        1   0.294835
3        1  -0.016266
4        1   0.108533


In [3]:
df_145_A[0:160]

Unnamed: 0,rt_select,condition,dur_decks,total,frames_before_decks,no,t_select,loss,onset_win,frames_after_decks,...,t_moves,n_moves,ID,dur_measured_decks,session,exp_type,section,mean_rt_participant,residuals,adjusted_residuals
0,2.173932,iowa_exp,600,100,72,1,3.172599,0,3.572519,48,...,"[1.7137115000077756, 2.07213810000394, 2.65573...",3,,,1,"['fMRI', 'EEG', 'MEG']",A,1.055818,0.926398,1.375367
1,1.216645,iowa_exp,600,-50,72,2,7.408175,-250,7.807490,48,...,[6.6822704000078375],1,,,1,"['fMRI', 'EEG', 'MEG']",A,1.055818,0.401146,0.850115
2,1.758387,iowa_exp,600,0,72,3,12.243241,0,12.642702,48,...,"[11.017321799998172, 11.33414890000131, 11.575...",4,,,1,"['fMRI', 'EEG', 'MEG']",A,1.055818,0.294835,0.743805
3,0.799233,iowa_exp,600,100,72,4,16.052653,0,16.452517,48,...,[15.560789099996327],1,,,1,"['fMRI', 'EEG', 'MEG']",A,1.055818,-0.016266,0.432704
4,0.708015,iowa_exp,600,200,72,5,19.804502,0,20.203986,48,...,[],0,,,1,"['fMRI', 'EEG', 'MEG']",A,1.055818,0.108533,0.557503
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,0.858822,iowa_exp,600,500,48,36,151.348949,0,151.947741,72,...,[151.08096799999475],1,,,3,"['fMRI', 'EEG', 'MEG']",A,1.055818,0.043323,0.492292
156,1.241822,iowa_exp,600,550,48,37,155.625175,0,156.232732,72,...,"[154.8157945999992, 155.0574581999972, 155.308...",3,,,3,"['fMRI', 'EEG', 'MEG']",A,1.055818,-0.005712,0.443258
157,1.166950,iowa_exp,600,550,48,38,159.826984,-50,160.434348,72,...,"[158.90316449999227, 159.4759003999934]",2,,,3,"['fMRI', 'EEG', 'MEG']",A,1.055818,0.135433,0.584403
158,1.149321,iowa_exp,600,600,48,39,164.060989,0,164.661018,72,...,"[163.14397350000218, 163.41079290000198, 163.7...",3,,,3,"['fMRI', 'EEG', 'MEG']",A,1.055818,-0.098213,0.350756


In [38]:
# Load the dataset from the given directory
file_path = "/work/student_folders/markus_folder/data/switch_df_A.csv"
switch_df_A = pd.read_csv(file_path)

# Clean 'session' column in switch_df_A
switch_df_A['session'] = switch_df_A['session'].apply(
    lambda x: int(x.strip("(),")) if isinstance(x, str) else x
)

# Ensure all values are integers
switch_df_A['session'] = switch_df_A['session'].astype(int)

# Convert 'session' column in both DataFrames to integers
df_145_A['session'] = df_145_A['session'].astype(int)
switch_df_A['session'] = switch_df_A['session'].apply(lambda x: int(x.strip("(),")) if isinstance(x, str) else x)

# Perform the merge
merged_df_A = pd.merge(df_145_A, switch_df_A, on=['session'], how='outer')

# Verify the result
print(merged_df_A.head())

# Calculate confidence value by multiplying adjusted_residuals with switch_frequency
# Handle NaN values (ensure no missing data for the calculation)
merged_df_A['confidence_value'] = merged_df_A['adjusted_residuals'] * merged_df_A['switch_frequency']

# Calculate overall median confidence value for the entire dataset
overall_median_confidence_A = merged_df_A['confidence_value'].median()

# Create a new column for confidence (1 for below median, 0 for above)
merged_df_A['confidence'] = merged_df_A['confidence_value'].apply(
    lambda x: 1 if x < overall_median_confidence_A else 0
)


   rt_select condition  dur_decks  total  frames_before_decks  no   t_select  \
0   2.173932  iowa_exp        600    100                   72   1   3.172599   
1   1.216645  iowa_exp        600    -50                   72   2   7.408175   
2   1.758387  iowa_exp        600      0                   72   3  12.243241   
3   0.799233  iowa_exp        600    100                   72   4  16.052653   
4   0.708015  iowa_exp        600    200                   72   5  19.804502   

   loss  onset_win  frames_after_decks  ...  dur_measured_decks  session  \
0     0   3.572519                  48  ...                 NaN        1   
1  -250   7.807490                  48  ...                 NaN        1   
2     0  12.642702                  48  ...                 NaN        1   
3     0  16.452517                  48  ...                 NaN        1   
4     0  20.203986                  48  ...                 NaN        1   

                 exp_type  section  mean_rt_participant  resid

In [39]:
# Normalize the 'session' column to ensure consistent formatting
merged_df_A['session'] = merged_df_A['session'].apply(lambda x: x[0] if isinstance(x, tuple) else x)

# Group by 'session' and check the distribution of 'confidence'
distribution_A = merged_df_A.groupby('session')['confidence'].value_counts().unstack(fill_value=0)

# Display the corrected distribution
print(distribution_A)


confidence   0   1
session           
1           13  27
2           12  28
3           31   9
4           24  16
