In [1]:
import pandas as pd
import statsmodels.formula.api as smf
import numpy as np

# Load the dataset from the given directory
file_path = "/work/student_folders/markus_folder/data/merged_logfile_145_B.csv"
df_145_B = pd.read_csv(file_path)

# Calculate mean reaction time per participant
df_145_B['mean_rt_participant'] = df_145_B['rt_select'].mean()

df_145_B

# Step 1: Fit the linear regression model with log_rt_select as the outcome,
# and move_count, move_duration, and mean_rt_participant as predictors
model = smf.mixedlm("rt_select ~ n_moves", data=df_145_B, groups=df_145_B["mean_rt_participant"], re_formula="~1")
result = model.fit()
# Print the summary of the regression model
print(result.summary())

# Get the residuals from the model
df_145_B['residuals'] = result.resid

# Check residuals
print(df_145_B[['session', 'residuals']].head())

# Step 2: Adjust residuals so that the lowest residual becomes 0
min_residual = df_145_B['residuals'].min()  # Find the minimum residual
df_145_B['adjusted_residuals'] = df_145_B['residuals'] - min_residual  # Subtract the minimum from all residuals

# Optionally, save results
#df_145.to_csv("/work/student_folders/markus_folder/df_1_with_residuals.csv", index=False)


         Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: rt_select
No. Observations: 160     Method:             REML     
No. Groups:       1       Scale:              0.0530   
Min. group size:  160     Log-Likelihood:     2.5337   
Max. group size:  160     Converged:          Yes      
Mean group size:  160.0                                
-------------------------------------------------------
           Coef.   Std.Err.    z    P>|z| [0.025 0.975]
-------------------------------------------------------
Intercept  0.594       0.232  2.555 0.011  0.138  1.049
n_moves    0.201       0.014 13.947 0.000  0.172  0.229
Group Var  0.053 8259355.250                           

   session  residuals
0        1   0.720288
1        1   0.213693
2        1   0.113419
3        1   0.387777
4        1  -0.362912


In [3]:
# Load the dataset from the given directory
file_path = "/work/student_folders/markus_folder/data/switch_df_B.csv"
switch_df_B = pd.read_csv(file_path)

# Clean 'session' column in switch_df_B
switch_df_B['session'] = switch_df_B['session'].apply(
    lambda x: int(x.strip("(),")) if isinstance(x, str) else x
)

# Ensure all values are integers
switch_df_B['session'] = switch_df_B['session'].astype(int)

# Convert 'session' column in both DataFrames to integers
df_145_B['session'] = df_145_B['session'].astype(int)
switch_df_B['session'] = switch_df_B['session'].apply(lambda x: int(x.strip("(),")) if isinstance(x, str) else x)

# Perform the merge
merged_df_B = pd.merge(df_145_B, switch_df_B, on=['session'], how='outer')

# Verify the result
print(merged_df_B.head())

# Calculate confidence value by multiplying adjusted_residuals with switch_frequency
# Handle NaN values (ensure no missing data for the calculation)
merged_df_B['confidence_value'] = merged_df_B['adjusted_residuals'] * merged_df_B['switch_frequency']

# Calculate overall median confidence value for the entire dataset
overall_median_confidence_B = merged_df_B['confidence_value'].median()

# Create a new column for confidence (1 for below median, 0 for above)
merged_df_B['confidence'] = merged_df_B['confidence_value'].apply(
    lambda x: 1 if x < overall_median_confidence_B else 0
)


   frames_before_decks  init_mark  dur_win  deck_select  rt_select  \
0                   72          0      240            4   1.915792   
1                   72          2      240            3   0.807522   
2                   72          0      240            4   1.308924   
3                   72          0      240            3   1.382723   
4                   72          3      240            3   0.832592   

   frames_after_decks  n_moves  no  ID  win  ...  session  condition  \
0                  48        3   1 NaN   50  ...        1   iowa_exp   
1                  48        0   2 NaN   50  ...        1   iowa_exp   
2                  48        3   3 NaN   50  ...        1   iowa_exp   
3                  48        2   4 NaN   50  ...        1   iowa_exp   
4                  48        3   5 NaN   50  ...        1   iowa_exp   

   onset_win  section  mean_rt_participant  residuals adjusted_residuals  \
0   3.317445        B             0.957342   0.720288           1.0994

In [4]:
# Normalize the 'session' column to ensure consistent formatting
merged_df_B['session'] = merged_df_B['session'].apply(lambda x: x[0] if isinstance(x, tuple) else x)

# Group by 'session' and check the distribution of 'confidence'
distribution_B = merged_df_B.groupby('session')['confidence'].value_counts().unstack(fill_value=0)

# Display the corrected distribution
print(distribution_B)


confidence   0   1
session           
1           22  18
2           26  14
3            7  33
4           25  15
