In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [4]:
all_data = pd.read_csv('../data/analyzed/combined_data.csv', dtype={'stimulus': str})

In [14]:
all_data['mintaps_standard'] = all_data['corrected_tapstotype_standard'] / all_data['num_words']
all_data["taps_per_char"] = all_data.eval('num_taps / num_chars')


In [15]:
list(all_data.columns)

['experiment',
 'participant',
 'age',
 'english_proficiency',
 'gender',
 'helpfulRank-accurate-least-condition',
 'helpfulRank-accurate-least-idx',
 'helpfulRank-accurate-most-condition',
 'helpfulRank-accurate-most-idx',
 'helpfulRank-quick-least-condition',
 'helpfulRank-quick-least-idx',
 'helpfulRank-quick-most-condition',
 'helpfulRank-quick-most-idx',
 'helpfulRank-specific-least-condition',
 'helpfulRank-specific-least-idx',
 'helpfulRank-specific-most-condition',
 'helpfulRank-specific-most-idx',
 'differences',
 'other_exp',
 'techDiff_exp',
 'total_time',
 'use_predictive',
 'verbalized_during',
 'condition_order',
 'stimulus_order',
 'num_trials_where_recs_used',
 'NFC',
 'NFC_boxcox',
 'Extraversion',
 'Extraversion_boxcox',
 'Openness',
 'Openness_boxcox',
 'Trust',
 'Trust_boxcox',
 'block',
 'condition',
 'mental',
 'physical',
 'temporal',
 'performance',
 'effort',
 'frustration',
 'TLX_sum',
 'sys-accurate',
 'sys-fast',
 'sys-specific',
 'techDiff_block',
 'other_b

In [16]:
aggregations = {}
for column in 'age num_chars num_words relevant_use_frac mintaps_standard chars_per_sec_norecs_mean taps_per_char'.split():
    aggregations[column] = 'mean'
block_data = (
    all_data
    .groupby('experiment participant block condition'.split(), as_index=False)
    .aggregate(aggregations))
block_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 495 entries, 0 to 494
Data columns (total 11 columns):
experiment                   495 non-null object
participant                  495 non-null object
block                        495 non-null int64
condition                    495 non-null object
age                          495 non-null float64
num_chars                    495 non-null float64
num_words                    495 non-null float64
relevant_use_frac            330 non-null float64
mintaps_standard             495 non-null float64
chars_per_sec_norecs_mean    495 non-null float64
taps_per_char                495 non-null float64
dtypes: float64(7), int64(1), object(3)
memory usage: 46.4+ KB


In [17]:
model = smf.mixedlm(
    "mintaps_standard ~ C(experiment) + C(condition) + block",
    data=block_data,
    groups=block_data.participant)
model.fit().summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,mintaps_standard
No. Observations:,495,Method:,REML
No. Groups:,165,Scale:,0.0479
Min. group size:,3,Likelihood:,-22.3827
Max. group size:,3,Converged:,Yes
Mean group size:,3.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,1.686,0.043,39.045,0.000,1.602,1.771
C(experiment)[T.spec1],0.058,0.043,1.359,0.174,-0.026,0.141
C(experiment)[T.spec2],-0.052,0.039,-1.329,0.184,-0.130,0.025
C(condition)[T.gated],0.093,0.044,2.092,0.036,0.006,0.180
C(condition)[T.norecs],0.152,0.038,3.959,0.000,0.077,0.227
C(condition)[T.standard],-0.010,0.038,-0.258,0.796,-0.085,0.065
block,-0.038,0.012,-3.184,0.001,-0.062,-0.015
Group Var,0.016,0.020,,,,


In [22]:
model = smf.mixedlm("mintaps_standard ~ C(experiment) + C(condition) + idx", data=data, groups='participant')
model.fit().summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,mintaps_standard
No. Observations:,1908,Method:,REML
No. Groups:,165,Scale:,0.1339
Min. group size:,9,Likelihood:,-890.2097
Max. group size:,12,Converged:,Yes
Mean group size:,11.6,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,1.694,0.040,42.543,0.000,1.616,1.772
C(experiment)[T.spec1],0.045,0.044,1.028,0.304,-0.041,0.131
C(experiment)[T.spec2],-0.054,0.039,-1.386,0.166,-0.129,0.022
C(condition)[T.gated],0.090,0.039,2.314,0.021,0.014,0.165
C(condition)[T.norecs],0.148,0.034,4.360,0.000,0.081,0.214
C(condition)[T.standard],-0.013,0.034,-0.373,0.709,-0.079,0.054
idx,-0.008,0.002,-3.126,0.002,-0.013,-0.003
participant Var,0.020,0.010,,,,


In [25]:
model = smf.mixedlm("mintaps_standard ~ C(experiment) + C(condition) + idx", data=data, 
                    groups=np.zeros(len(data)), vc_formula={
                        'participant': '1 + C(participant)',
                        'stimulus': '1 + C(stimulus)',
#                        'cross': '1 + C(stimulus)*C(participant)'
                    })
model.fit().summary()

0,1,2,3
Model:,MixedLM,Dependent Variable:,mintaps_standard
No. Observations:,1908,Method:,REML
No. Groups:,1,Scale:,0.1119
Min. group size:,1908,Likelihood:,-753.8494
Max. group size:,1908,Converged:,Yes
Mean group size:,1908.0,,

0,1,2,3,4,5,6
,Coef.,Std.Err.,z,P>|z|,[0.025,0.975]
Intercept,1.557,0.245,6.365,0.000,1.078,2.037
C(experiment)[T.spec1],0.042,0.043,0.979,0.328,-0.043,0.128
C(experiment)[T.spec2],-0.039,0.037,-1.053,0.292,-0.113,0.034
C(condition)[T.gated],0.081,0.035,2.294,0.022,0.012,0.151
C(condition)[T.norecs],0.139,0.031,4.474,0.000,0.078,0.200
C(condition)[T.standard],-0.020,0.031,-0.656,0.512,-0.081,0.040
idx,-0.001,0.002,-0.595,0.552,-0.006,0.003
participant Var,0.022,0.011,,,,
stimulus Var,0.036,0.051,,,,
