## Analysis based on the extension study data

In [2]:
import numpy as np
import pandas as pd
import utility as util

import warnings
warnings.filterwarnings("ignore")

In [3]:
ext_df = pd.read_csv('../output/SD_experiment_df.csv')

In [4]:
# reset and add index column "id"
ext_df = ext_df.reset_index(drop=True)
ext_df['id'] = range(1, len(ext_df) + 1)

In [5]:
ext_df = ext_df[['id', 'condition', 'gone_to_friend', 'had_visitors', 'had_close_contact', 'gone_outside',
                 'gender', 'marital', 'age_group', 'education']]

In [6]:
# remove missing based on demographic variables
democols = ['gender', 'marital', 'age_group', 'education']

ext_df = ext_df.dropna(how='any', subset= democols)

In [7]:
# remove those that reported unsure in the marital variable
ext_df = ext_df.loc[~np.isin(ext_df['marital'], [8]), :]

In [8]:
ext_df.shape

(627, 10)

### 1) checking for randomization of treatment conditions

In [9]:
democols = ['gender', 'marital', 'age_group', 'education']

for col in democols:
    print(ext_df[col].value_counts(dropna=False).sort_index())
    print("................")

1.0    369
2.0    258
Name: gender, dtype: int64
................
1.0    465
7.0    162
Name: marital, dtype: int64
................
2.0     17
3.0     42
4.0     63
5.0    106
6.0    212
7.0    187
Name: age_group, dtype: int64
................
4.0      6
5.0     88
6.0    247
7.0    162
8.0    124
Name: education, dtype: int64
................


In [10]:
# recode demographic variables for consistency
ext_df['gender_r'] = np.nan
ext_df.loc[ext_df['gender']==1, ['gender_r']] = 2 # woman
ext_df.loc[ext_df['gender']==2, ['gender_r']] = 1 # man

ext_df['marital_r'] = np.nan
ext_df.loc[ext_df['marital']==1, ['marital_r']] = 1 # married
ext_df.loc[ext_df['marital']==7, ['marital_r']] = 2 # not married

ext_df['age_group_r'] = np.nan
ext_df.loc[np.isin(ext_df['age_group'], [2, 3, 4, 5]), ['age_group_r']] = 1 # 19-55
ext_df.loc[ext_df['age_group']==6, ['age_group_r']] = 2 # 56-65
ext_df.loc[ext_df['age_group']==7, ['age_group_r']] = 3 # 66+

ext_df['education_r'] = np.nan
ext_df.loc[np.isin(ext_df['education'], [4, 5, 6]), ['education_r']] = 1 # less than college
ext_df.loc[np.isin(ext_df['education'], [7, 8]), ['education_r']] = 2 # college or more

In [11]:
# checking recoded variables
democols_r = ['gender_r', 'marital_r', 'age_group_r', 'education_r']

for col in democols_r:
    print(ext_df[col].value_counts(dropna=False).sort_index())
    print("................")

1.0    258
2.0    369
Name: gender_r, dtype: int64
................
1.0    465
2.0    162
Name: marital_r, dtype: int64
................
1.0    228
2.0    212
3.0    187
Name: age_group_r, dtype: int64
................
1.0    341
2.0    286
Name: education_r, dtype: int64
................


In [12]:
for col in democols_r:
    util.crosstab_chisq(col, 'condition', ext_df, chisqtest=True)

condition,A,B,C,Control
gender_r,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,44.8,42.7,41.0,36.2
2.0,55.2,57.3,59.0,63.7
Total n,154.0,157.0,156.0,160.0


*Chi-squared statistic = 2.6, degree of freedom = 3, p = 0.46*

-----

condition,A,B,C,Control
marital_r,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,72.7,77.7,78.8,67.5
2.0,27.3,22.3,21.2,32.5
Total n,154.0,157.0,156.0,160.0


*Chi-squared statistic = 6.7, degree of freedom = 3, p = 0.083*

-----

condition,A,B,C,Control
age_group_r,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,34.4,42.0,37.2,31.9
2.0,32.5,31.2,31.4,40.0
3.0,33.1,26.8,31.4,28.1
Total n,154.0,157.0,156.0,160.0


*Chi-squared statistic = 6.3, degree of freedom = 6, p = 0.392*

-----

condition,A,B,C,Control
education_r,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,54.5,49.0,57.7,56.2
2.0,45.5,51.0,42.3,43.8
Total n,154.0,157.0,156.0,160.0


*Chi-squared statistic = 2.7, degree of freedom = 3, p = 0.437*

-----

### 1.5) Control vs. ABC combined

In [15]:
ext_df['condition'].value_counts()

Control    160
B          157
C          156
A          154
Name: condition, dtype: int64

In [16]:
ext_df['condition_r'] = np.nan
ext_df.loc[ext_df['condition']=='Control', ['condition_r']] = 'Control'
ext_df.loc[ext_df['condition']=='A', ['condition_r']] = 'Experiment'
ext_df.loc[ext_df['condition']=='B', ['condition_r']] = 'Experiment'
ext_df.loc[ext_df['condition']=='C', ['condition_r']] = 'Experiment'

In [17]:
ext_df['condition_r'].value_counts()

Experiment    467
Control       160
Name: condition_r, dtype: int64

In [18]:
for col in democols_r:
    util.crosstab_chisq(col, 'condition_r', ext_df, chisqtest=True)

condition_r,Control,Experiment
gender_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,36.2,42.8
2.0,63.7,57.2
Total n,160.0,467.0


*Chi-squared statistic = 1.9, degree of freedom = 1, p = 0.172*

-----

condition_r,Control,Experiment
marital_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,67.5,76.4
2.0,32.5,23.6
Total n,160.0,467.0


*Chi-squared statistic = 4.5, degree of freedom = 1, p = 0.033*

-----

condition_r,Control,Experiment
age_group_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,31.9,37.9
2.0,40.0,31.7
3.0,28.1,30.4
Total n,160.0,467.0


*Chi-squared statistic = 3.8, degree of freedom = 2, p = 0.147*

-----

condition_r,Control,Experiment
education_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1.0,56.2,53.7
2.0,43.8,46.3
Total n,160.0,467.0


*Chi-squared statistic = 0.2, degree of freedom = 1, p = 0.648*

-----

### 2) experimental data analysis

In [12]:
# check missing in experiment cols
expcols = ['gone_to_friend', 'had_visitors', 'had_close_contact', 'gone_outside']

for col in expcols:
    print(ext_df[col].value_counts(dropna=False).sort_index())
    print("................")

1    327
2     59
3    238
4      3
Name: gone_to_friend, dtype: int64
................
1    309
2     56
3    258
4      4
Name: had_visitors, dtype: int64
................
1    384
2     99
3    132
4     12
Name: had_close_contact, dtype: int64
................
1    498
2     27
3     95
4      7
Name: gone_outside, dtype: int64
................


In [13]:
# numeric -> character display
for col in expcols:
    ext_df[col] = ext_df[[col]].replace([1, 2, 3, 4], ["1a. Yes/Yes for any reason", "1b. Yes but only when neccessary", 
                                                       "2. No", "3. Unsure"])

In [14]:
# raw distributions for cross-tabs
for col in expcols:
    util.crosstab_chisq(col, 'condition', ext_df, chisqtest=False)

condition,A,B,C,Control
gone_to_friend,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1a. Yes/Yes for any reason,57.8,49.0,40.4,61.3
1b. Yes but only when neccessary,0.0,15.9,21.8,0.0
2. No,40.9,35.0,37.2,38.8
3. Unsure,1.3,0.0,0.6,0.0
Total n,154.0,157.0,156.0,160.0


-----

condition,A,B,C,Control
had_visitors,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1a. Yes/Yes for any reason,56.5,45.2,44.2,51.2
1b. Yes but only when neccessary,0.0,15.9,19.9,0.0
2. No,42.9,38.2,34.6,48.8
3. Unsure,0.6,0.6,1.3,0.0
Total n,154.0,157.0,156.0,160.0


-----

condition,A,B,C,Control
had_close_contact,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1a. Yes/Yes for any reason,79.9,49.0,44.2,71.9
1b. Yes but only when neccessary,0.0,29.9,33.3,0.0
2. No,19.5,20.4,19.9,24.4
3. Unsure,0.6,0.6,2.6,3.8
Total n,154.0,157.0,156.0,160.0


-----

condition,A,B,C,Control
gone_outside,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1a. Yes/Yes for any reason,85.7,77.7,73.7,80.6
1b. Yes but only when neccessary,0.0,8.3,9.0,0.0
2. No,13.6,14.0,16.0,16.9
3. Unsure,0.6,0.0,1.3,2.5
Total n,154.0,157.0,156.0,160.0


-----

In [15]:
# combining 1a, 1b, and 8 together as 1/yes
combine_three = {"1a. Yes/Yes for any reason": "1. Yes", 
                 "2. No": "2. No",
                 "1b. Yes but only when neccessary": "1. Yes", 
                 "3. Unsure": "1. Yes"}

for col in expcols:
    ext_df[f'{col}_r'] = ext_df[col].map(combine_three)
    
recoded_cols = ['gone_to_friend_r', 'had_visitors_r', 'had_close_contact_r', 'gone_outside_r'] # use recoded_cols for further analyses

In [16]:
ext_df['condition'].value_counts()

Control    160
B          157
C          156
A          154
Name: condition, dtype: int64

In [17]:
# subset data for control vs. particular treatment condition comparisons
ext_A = ext_df[ext_df['condition'].isin(['Control', 'A'])]
ext_B = ext_df[ext_df['condition'].isin(['Control', 'B'])]
ext_C = ext_df[ext_df['condition'].isin(['Control', 'C'])]

In [18]:
# cross-tabs based on recodes
for col in recoded_cols:
    util.crosstab_chisq(col, 'condition', ext_A, chisqtest=True)

condition,A,Control
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,59.1,61.3
2. No,40.9,38.8
Total n,154.0,160.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.783*

-----

condition,A,Control
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,57.1,51.2
2. No,42.9,48.8
Total n,154.0,160.0


*Chi-squared statistic = 0.9, degree of freedom = 1, p = 0.35*

-----

condition,A,Control
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,80.5,75.6
2. No,19.5,24.4
Total n,154.0,160.0


*Chi-squared statistic = 0.8, degree of freedom = 1, p = 0.362*

-----

condition,A,Control
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,86.4,83.1
2. No,13.6,16.9
Total n,154.0,160.0


*Chi-squared statistic = 0.4, degree of freedom = 1, p = 0.522*

-----

In [19]:
# cross-tabs based on recodes
for col in recoded_cols:
    util.crosstab_chisq(col, 'condition', ext_B, chisqtest=True)

condition,B,Control
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,65.0,61.3
2. No,35.0,38.8
Total n,157.0,160.0


*Chi-squared statistic = 0.3, degree of freedom = 1, p = 0.569*

-----

condition,B,Control
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,61.8,51.2
2. No,38.2,48.8
Total n,157.0,160.0


*Chi-squared statistic = 3.2, degree of freedom = 1, p = 0.075*

-----

condition,B,Control
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,79.6,75.6
2. No,20.4,24.4
Total n,157.0,160.0


*Chi-squared statistic = 0.5, degree of freedom = 1, p = 0.473*

-----

condition,B,Control
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,86.0,83.1
2. No,14.0,16.9
Total n,157.0,160.0


*Chi-squared statistic = 0.3, degree of freedom = 1, p = 0.583*

-----

In [20]:
# cross-tabs based on recodes
for col in recoded_cols:
    util.crosstab_chisq(col, 'condition', ext_C, chisqtest=True)

condition,C,Control
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,62.8,61.3
2. No,37.2,38.8
Total n,156.0,160.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.864*

-----

condition,C,Control
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,65.4,51.2
2. No,34.6,48.8
Total n,156.0,160.0


*Chi-squared statistic = 5.9, degree of freedom = 1, p = 0.015*

-----

condition,C,Control
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,80.1,75.6
2. No,19.9,24.4
Total n,156.0,160.0


*Chi-squared statistic = 0.7, degree of freedom = 1, p = 0.408*

-----

condition,C,Control
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,84.0,83.1
2. No,16.0,16.9
Total n,156.0,160.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.959*

-----

### 3) testing interactions

In [21]:
ext_df['condition_r'] = np.nan
ext_df.loc[ext_df['condition']=='Control', ['condition_r']] = 1 # Control
ext_df.loc[ext_df['condition']=='A', ['condition_r']] = 2 # condition A
ext_df.loc[ext_df['condition']=='B', ['condition_r']] = 3 # condition B
ext_df.loc[ext_df['condition']=='C', ['condition_r']] = 4 # condition B

In [22]:
# recode y as numerics
for col in recoded_cols:
    ext_df[col] = ext_df[[col]].replace(['1. Yes'], [1]) # yes have done the mentioned behavior
    ext_df[col] = ext_df[[col]].replace(['2. No'], [0]) # no haven not done the mentioned behavior

In [23]:
ext_df['condition_r'].value_counts()

1.0    160
3.0    157
4.0    156
2.0    154
Name: condition_r, dtype: int64

In [24]:
recoded_cols

['gone_to_friend_r', 'had_visitors_r', 'had_close_contact_r', 'gone_outside_r']

In [25]:
# use R for logistic regression tables
%load_ext rpy2.ipython

In [26]:
%%R -i ext_df
print('GONE TO FRIEND - SEX')
summary(glm(gone_to_friend_r ~ factor(condition_r) * factor(gender_r), family=binomial, data=ext_df))

[1] "GONE TO FRIEND - SEX"

Call:
glm(formula = gone_to_friend_r ~ factor(condition_r) * factor(gender_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.5585  -1.3683   0.8672   0.9767   1.0923  

Coefficients:
                                       Estimate Std. Error z value Pr(>|z|)  
(Intercept)                             0.49248    0.27061   1.820   0.0688 .
factor(condition_r)2                   -0.28888    0.36305  -0.796   0.4262  
factor(condition_r)3                    0.29164    0.37761   0.772   0.4399  
factor(condition_r)4                    0.36975    0.38482   0.961   0.3366  
factor(gender_r)2                      -0.05422    0.33817  -0.160   0.8726  
factor(condition_r)2:factor(gender_r)2  0.35518    0.47229   0.752   0.4520  
factor(condition_r)3:factor(gender_r)2 -0.23091    0.48061  -0.480   0.6309  
factor(condition_r)4:factor(gender_r)2 -0.50127    0.48345  -1.037   0.2998  
---
Signif. codes:  0 '

In [27]:
%%R -i ext_df
print('GONE TO FRIEND - MARITAL')
summary(glm(gone_to_friend_r ~ factor(condition_r) * factor(marital_r), family=binomial, data=ext_df))

[1] "GONE TO FRIEND - MARITAL"

Call:
glm(formula = gone_to_friend_r ~ factor(condition_r) * factor(marital_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.5103  -1.3572   0.8777   0.9813   1.1054  

Coefficients:
                                        Estimate Std. Error z value Pr(>|z|)  
(Intercept)                              0.41319    0.19657   2.102   0.0356 *
factor(condition_r)2                    -0.12551    0.27404  -0.458   0.6470  
factor(condition_r)3                     0.34209    0.27628   1.238   0.2156  
factor(condition_r)4                     0.06740    0.27032   0.249   0.8031  
factor(marital_r)2                       0.13888    0.34868   0.398   0.6904  
factor(condition_r)2:factor(marital_r)2  0.16122    0.51160   0.315   0.7527  
factor(condition_r)3:factor(marital_r)2 -0.72231    0.52383  -1.379   0.1679  
factor(condition_r)4:factor(marital_r)2  0.07368    0.54072   0.136   0.8916  
---
Signi

In [28]:
%%R -i ext_df
print('GONE TO FRIEND - AGE_GROUP')
summary(glm(gone_to_friend_r ~ factor(condition_r) * factor(age_group_r), family=binomial, data=ext_df))

[1] "GONE TO FRIEND - AGE_GROUP"

Call:
glm(formula = gone_to_friend_r ~ factor(condition_r) * factor(age_group_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.5829  -1.2814   0.8620   0.9205   1.2299  

Coefficients:
                                            Estimate Std. Error z value
(Intercept)                                6.931e-01  2.970e-01   2.333
factor(condition_r)2                       1.452e-01  4.216e-01   0.344
factor(condition_r)3                      -8.816e-16  3.955e-01   0.000
factor(condition_r)4                       1.054e-01  4.108e-01   0.256
factor(age_group_r)2                      -4.652e-02  3.969e-01  -0.117
factor(age_group_r)3                      -7.376e-01  4.209e-01  -1.752
factor(condition_r)2:factor(age_group_r)2 -5.506e-01  5.729e-01  -0.961
factor(condition_r)3:factor(age_group_r)2 -1.410e-02  5.619e-01  -0.025
factor(condition_r)4:factor(age_group_r)2  1.643e-01  5.814e-01   0.

In [29]:
%%R -i ext_df
print('GONE TO FRIEND - EDUCATION')
summary(glm(gone_to_friend_r ~ factor(condition_r) * factor(education_r), family=binomial, data=ext_df))

[1] "GONE TO FRIEND - EDUCATION"

Call:
glm(formula = gone_to_friend_r ~ factor(condition_r) * factor(education_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.5546  -1.3537   0.9558   0.9758   1.1054  

Coefficients:
                                            Estimate Std. Error z value
(Intercept)                                5.465e-01  2.187e-01   2.499
factor(condition_r)2                      -1.024e-02  3.146e-01  -0.033
factor(condition_r)3                       3.069e-01  3.314e-01   0.926
factor(condition_r)4                      -5.851e-15  3.093e-01   0.000
factor(education_r)2                      -2.003e-01  3.267e-01  -0.613
factor(condition_r)2:factor(education_r)2 -1.642e-01  4.641e-01  -0.354
factor(condition_r)3:factor(education_r)2 -2.478e-01  4.699e-01  -0.527
factor(condition_r)4:factor(education_r)2  1.484e-01  4.679e-01   0.317
                                          Pr(>|z|)  
(Intercept)    

In [30]:
%%R -i ext_df
print('HAD VISITORS - SEX')
summary(glm(had_visitors_r ~ factor(condition_r) * factor(gender_r), family=binomial, data=ext_df))

[1] "HAD VISITORS - SEX"

Call:
glm(formula = had_visitors_r ~ factor(condition_r) * factor(gender_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.6651  -1.2540   0.8167   1.0620   1.2109  

Coefficients:
                                       Estimate Std. Error z value Pr(>|z|)  
(Intercept)                             0.27763    0.26515   1.047   0.2951  
factor(condition_r)2                    0.22547    0.36335   0.621   0.5349  
factor(condition_r)3                    0.64913    0.37917   1.712   0.0869 .
factor(condition_r)4                    0.82098    0.39196   2.095   0.0362 *
factor(gender_r)2                      -0.35610    0.33103  -1.076   0.2820  
factor(condition_r)2:factor(gender_r)2 -0.02922    0.46746  -0.063   0.9502  
factor(condition_r)3:factor(gender_r)2 -0.39241    0.47733  -0.822   0.4110  
factor(condition_r)4:factor(gender_r)2 -0.39111    0.48759  -0.802   0.4225  
---
Signif. codes:  0 '***'

In [31]:
%%R -i ext_df
print('HAD VISITORS - MARITAL')
summary(glm(had_visitors_r ~ factor(condition_r) * factor(marital_r), family=binomial, data=ext_df))

[1] "HAD VISITORS - MARITAL"

Call:
glm(formula = had_visitors_r ~ factor(condition_r) * factor(marital_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.4934  -1.3232   0.8914   1.0284   1.4190  

Coefficients:
                                        Estimate Std. Error z value Pr(>|z|)  
(Intercept)                              0.33647    0.19518   1.724   0.0847 .
factor(condition_r)2                     0.02454    0.27384   0.090   0.9286  
factor(condition_r)3                     0.38137    0.27439   1.390   0.1646  
factor(condition_r)4                     0.28435    0.27176   1.046   0.2954  
factor(marital_r)2                      -0.88854    0.34789  -2.554   0.0106 *
factor(condition_r)2:factor(marital_r)2  0.62284    0.50336   1.237   0.2160  
factor(condition_r)3:factor(marital_r)2 -0.11698    0.52430  -0.223   0.8234  
factor(condition_r)4:factor(marital_r)2  0.96086    0.54143   1.775   0.0760 .
---
Signif. c

In [32]:
%%R -i ext_df
print('HAD VISITORS - AGE_GROUP')
summary(glm(had_visitors_r ~ factor(condition_r) * factor(age_group_r), family=binomial, data=ext_df))

[1] "HAD VISITORS - AGE_GROUP"

Call:
glm(formula = had_visitors_r ~ factor(condition_r) * factor(age_group_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.6446  -1.2655   0.9005   1.0579   1.2735  

Coefficients:
                                           Estimate Std. Error z value Pr(>|z|)
(Intercept)                                0.438255   0.286807   1.528    0.127
factor(condition_r)2                       0.062520   0.403189   0.155    0.877
factor(condition_r)3                       0.121361   0.384362   0.316    0.752
factor(condition_r)4                       0.614895   0.414946   1.482    0.138
factor(age_group_r)2                      -0.500775   0.380551  -1.316    0.188
factor(age_group_r)3                      -0.661398   0.415040  -1.594    0.111
factor(condition_r)2:factor(age_group_r)2  0.080043   0.552495   0.145    0.885
factor(condition_r)3:factor(age_group_r)2  0.145954   0.541098   0.270    0.787


In [33]:
%%R -i ext_df
print('HAD VISITORS - EDUCATION')
summary(glm(had_visitors_r ~ factor(condition_r) * factor(education_r), family=binomial, data=ext_df))

[1] "HAD VISITORS - EDUCATION"

Call:
glm(formula = had_visitors_r ~ factor(condition_r) * factor(education_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.5281  -1.2346   0.8633   1.0520   1.2018  

Coefficients:
                                          Estimate Std. Error z value Pr(>|z|)
(Intercept)                                 0.1335     0.2113   0.632   0.5274
factor(condition_r)2                        0.3018     0.3075   0.981   0.3264
factor(condition_r)3                        0.5402     0.3205   1.686   0.0919
factor(condition_r)4                        0.6614     0.3106   2.129   0.0332
factor(education_r)2                       -0.1907     0.3191  -0.598   0.5501
factor(condition_r)2:factor(education_r)2  -0.1302     0.4572  -0.285   0.7758
factor(condition_r)3:factor(education_r)2  -0.1808     0.4594  -0.393   0.6940
factor(condition_r)4:factor(education_r)2  -0.1735     0.4660  -0.372   0.7097
         

In [34]:
%%R -i ext_df
print('HAD CLOSE CONTACT - SEX')
summary(glm(had_close_contact_r ~ factor(condition_r) * factor(gender_r), family=binomial, data=ext_df))

[1] "HAD CLOSE CONTACT - SEX"

Call:
glm(formula = had_close_contact_r ~ factor(condition_r) * factor(gender_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.9807   0.5505   0.6485   0.7325   0.8011  

Coefficients:
                                       Estimate Std. Error z value Pr(>|z|)    
(Intercept)                             1.45225    0.33494   4.336 1.45e-05 ***
factor(condition_r)2                    0.32270    0.47868   0.674    0.500    
factor(condition_r)3                    0.17520    0.47006   0.373    0.709    
factor(condition_r)4                    0.35786    0.49140   0.728    0.466    
factor(gender_r)2                      -0.48039    0.40176  -1.196    0.232    
factor(condition_r)2:factor(gender_r)2 -0.11591    0.58630  -0.198    0.843    
factor(condition_r)3:factor(gender_r)2  0.04252    0.57645   0.074    0.941    
factor(condition_r)4:factor(gender_r)2 -0.17226    0.59199  -0.291    0.771    

In [35]:
%%R -i ext_df
print('HAD CLOSE CONTACT - MARITAL')
summary(glm(had_close_contact_r ~ factor(condition_r) * factor(marital_r), family=binomial, data=ext_df))

[1] "HAD CLOSE CONTACT - MARITAL"

Call:
glm(formula = had_close_contact_r ~ factor(condition_r) * factor(marital_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.9060   0.5957   0.6444   0.7090   0.9005  

Coefficients:
                                        Estimate Std. Error z value Pr(>|z|)
(Intercept)                              1.25276    0.23145   5.413 6.21e-08
factor(condition_r)2                     0.21357    0.33493   0.638    0.524
factor(condition_r)3                     0.20686    0.32734   0.632    0.527
factor(condition_r)4                     0.38623    0.33657   1.148    0.251
factor(marital_r)2                      -0.34990    0.38375  -0.912    0.362
factor(condition_r)2:factor(marital_r)2  0.18284    0.58931   0.310    0.756
factor(condition_r)3:factor(marital_r)2 -0.04886    0.59196  -0.083    0.934
factor(condition_r)4:factor(marital_r)2 -0.59595    0.58595  -1.017    0.309
                     

In [36]:
%%R -i ext_df
print('HAD CLOSE CONTACT - AGE_GROUP')
summary(glm(had_close_contact_r ~ factor(condition_r) * factor(age_group_r), family=binomial, data=ext_df))

[1] "HAD CLOSE CONTACT - AGE_GROUP"

Call:
glm(formula = had_close_contact_r ~ factor(condition_r) * factor(age_group_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.0874   0.4902   0.6231   0.7495   0.8633  

Coefficients:
                                          Estimate Std. Error z value Pr(>|z|)
(Intercept)                                1.54045    0.36732   4.194 2.74e-05
factor(condition_r)2                       0.51794    0.56820   0.912    0.362
factor(condition_r)3                       0.18232    0.50277   0.363    0.717
factor(condition_r)4                       0.44547    0.54533   0.817    0.414
factor(age_group_r)2                      -0.44183    0.46718  -0.946    0.344
factor(age_group_r)3                      -0.74552    0.48847  -1.526    0.127
factor(condition_r)2:factor(age_group_r)2 -0.57059    0.71424  -0.799    0.424
factor(condition_r)3:factor(age_group_r)2 -0.15492    0.66818  -0.232    0.817

In [37]:
%%R -i ext_df
print('HAD CLOSE CONTACT - EDUCATION')
summary(glm(had_close_contact_r ~ factor(condition_r) * factor(education_r), family=binomial, data=ext_df))

[1] "HAD CLOSE CONTACT - EDUCATION"

Call:
glm(formula = had_close_contact_r ~ factor(condition_r) * factor(education_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.8930   0.6039   0.6444   0.7290   0.7710  

Coefficients:
                                          Estimate Std. Error z value Pr(>|z|)
(Intercept)                                1.18958    0.24922   4.773 1.81e-06
factor(condition_r)2                       0.18190    0.36859   0.493    0.622
factor(condition_r)3                       0.07155    0.37095   0.193    0.847
factor(condition_r)4                       0.41985    0.37698   1.114    0.265
factor(education_r)2                      -0.12871    0.37000  -0.348    0.728
factor(condition_r)2:factor(education_r)2  0.23533    0.55237   0.426    0.670
factor(condition_r)3:factor(education_r)2  0.33392    0.54262   0.615    0.538
factor(condition_r)4:factor(education_r)2 -0.34129    0.54717  -0.624    0.533

In [38]:
%%R -i ext_df
print('GONE_OUTSIDE - SEX')
summary(glm(gone_outside_r ~ factor(condition_r) * factor(gender_r), family=binomial, data=ext_df))

[1] "GONE_OUTSIDE - SEX"

Call:
glm(formula = gone_outside_r ~ factor(condition_r) * factor(gender_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.4924   0.3027   0.5287   0.6607   0.6887  

Coefficients:
                                       Estimate Std. Error z value Pr(>|z|)    
(Intercept)                              1.9859     0.4031   4.927 8.35e-07 ***
factor(condition_r)2                    -0.0888     0.5387  -0.165    0.869    
factor(condition_r)3                     1.0744     0.7151   1.502    0.133    
factor(condition_r)4                     0.2828     0.5885   0.480    0.631    
factor(gender_r)2                       -0.5749     0.4740  -1.213    0.225    
factor(condition_r)2:factor(gender_r)2   0.4834     0.6704   0.721    0.471    
factor(condition_r)3:factor(gender_r)2  -1.1671     0.8002  -1.459    0.145    
factor(condition_r)4:factor(gender_r)2  -0.3477     0.6891  -0.505    0.614    
---
Signi

In [39]:
%%R -i ext_df
print('GONE_OUTSIDE - MARITAL')
summary(glm(gone_outside_r ~ factor(condition_r) * factor(marital_r), family=binomial, data=ext_df))

[1] "GONE_OUTSIDE - MARITAL"

Call:
glm(formula = gone_outside_r ~ factor(condition_r) * factor(marital_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.0754   0.4968   0.5279   0.5853   0.7981  

Coefficients:
                                        Estimate Std. Error z value Pr(>|z|)
(Intercept)                               1.6776     0.2642   6.349 2.16e-10
factor(condition_r)2                      0.3525     0.3960   0.890    0.373
factor(condition_r)3                      0.2871     0.3819   0.752    0.452
factor(condition_r)4                      0.2226     0.3764   0.591    0.554
factor(marital_r)2                       -0.2426     0.4400  -0.551    0.581
factor(condition_r)2:factor(marital_r)2  -0.3407     0.6596  -0.517    0.605
factor(condition_r)3:factor(marital_r)2  -0.3359     0.6695  -0.502    0.616
factor(condition_r)4:factor(marital_r)2  -0.6768     0.6467  -1.047    0.295
                               

In [40]:
%%R -i ext_df
print('GONE_OUTSIDE - AGE_GROUP')
summary(glm(gone_outside_r ~ factor(condition_r) * factor(age_group_r), family=binomial, data=ext_df))

[1] "GONE_OUTSIDE - AGE_GROUP"

Call:
glm(formula = gone_outside_r ~ factor(condition_r) * factor(age_group_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.3678   0.3891   0.5553   0.6300   0.7793  

Coefficients:
                                          Estimate Std. Error z value Pr(>|z|)
(Intercept)                                1.68176    0.38504   4.368 1.26e-05
factor(condition_r)2                       0.58000    0.60753   0.955   0.3397
factor(condition_r)3                       1.05908    0.64372   1.645   0.0999
factor(condition_r)4                       0.67910    0.60591   1.121   0.2624
factor(age_group_r)2                       0.00464    0.51650   0.009   0.9928
factor(age_group_r)3                      -0.29546    0.53586  -0.551   0.5814
factor(condition_r)2:factor(age_group_r)2 -0.75006    0.78937  -0.950   0.3420
factor(condition_r)3:factor(age_group_r)2 -0.95372    0.83640  -1.140   0.2542
factor(co

In [41]:
%%R -i ext_df
print('GONE_OUTSIDE - EDUCATION')
summary(glm(gone_outside_r ~ factor(condition_r) * factor(education_r), family=binomial, data=ext_df))

[1] "GONE_OUTSIDE - EDUCATION"

Call:
glm(formula = gone_outside_r ~ factor(condition_r) * factor(education_r), 
    family = binomial, data = ext_df)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-2.0904   0.4886   0.5586   0.6081   0.6335  

Coefficients:
                                          Estimate Std. Error z value Pr(>|z|)
(Intercept)                                1.53148    0.27571   5.555 2.78e-08
factor(condition_r)2                       0.47000    0.43534   1.080    0.280
factor(condition_r)3                       0.06246    0.41056   0.152    0.879
factor(condition_r)4                       0.24738    0.40734   0.607    0.544
factor(education_r)2                       0.14817    0.42880   0.346    0.730
factor(condition_r)2:factor(education_r)2 -0.47000    0.63659  -0.738    0.460
factor(condition_r)3:factor(education_r)2  0.32336    0.63373   0.510    0.610
factor(condition_r)4:factor(education_r)2 -0.42294    0.61289  -0.690    0.490
         

In [42]:
# checking interaction for the one significant main effect: control vs. C, had_visitors
ext_C = ext_df[ext_df['condition'].isin(['Control', 'C'])]

In [43]:
%%R -i ext_C
print('HAD_VISITORS - SEX')
summary(glm(had_visitors_r ~ factor(condition_r) * factor(gender_r), family=binomial, data=ext_C))

[1] "HAD_VISITORS - SEX"

Call:
glm(formula = had_visitors_r ~ factor(condition_r) * factor(gender_r), 
    family = binomial, data = ext_C)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.6651  -1.1825   0.7585   1.0620   1.2109  

Coefficients:
                                       Estimate Std. Error z value Pr(>|z|)  
(Intercept)                              0.2776     0.2651   1.047   0.2951  
factor(condition_r)4                     0.8210     0.3920   2.095   0.0362 *
factor(gender_r)2                       -0.3561     0.3310  -1.076   0.2820  
factor(condition_r)4:factor(gender_r)2  -0.3911     0.4876  -0.802   0.4225  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 429.47  on 315  degrees of freedom
Residual deviance: 417.26  on 312  degrees of freedom
AIC: 425.26

Number of Fisher Scoring iterations: 4



In [44]:
%%R -i ext_C
print('HAD_VISITORS - MARITAL')
summary(glm(had_visitors_r ~ factor(condition_r) * factor(marital_r), family=binomial, data=ext_C))

[1] "HAD_VISITORS - MARITAL"

Call:
glm(formula = had_visitors_r ~ factor(condition_r) * factor(marital_r), 
    family = binomial, data = ext_C)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.4823  -1.3232   0.9275   1.0383   1.4190  

Coefficients:
                                        Estimate Std. Error z value Pr(>|z|)  
(Intercept)                               0.3365     0.1952   1.724   0.0847 .
factor(condition_r)4                      0.2844     0.2718   1.046   0.2954  
factor(marital_r)2                       -0.8885     0.3479  -2.554   0.0106 *
factor(condition_r)4:factor(marital_r)2   0.9609     0.5414   1.775   0.0760 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 429.47  on 315  degrees of freedom
Residual deviance: 416.20  on 312  degrees of freedom
AIC: 424.2

Number of Fisher Scoring iterations: 4



In [45]:
%%R -i ext_C
print('HAD_VISITORS - AGE_GROUP')
summary(glm(had_visitors_r ~ factor(condition_r) * factor(age_group_r), family=binomial, data=ext_C))

[1] "HAD_VISITORS - AGE_GROUP"

Call:
glm(formula = had_visitors_r ~ factor(condition_r) * factor(age_group_r), 
    family = binomial, data = ext_C)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.6446  -1.1510   0.7736   1.0129   1.2735  

Coefficients:
                                           Estimate Std. Error z value Pr(>|z|)
(Intercept)                                0.438255   0.286807   1.528    0.127
factor(condition_r)4                       0.614895   0.414946   1.482    0.138
factor(age_group_r)2                      -0.500775   0.380551  -1.316    0.188
factor(age_group_r)3                      -0.661398   0.415040  -1.594    0.111
factor(condition_r)4:factor(age_group_r)2 -0.008759   0.567939  -0.015    0.988
factor(condition_r)4:factor(age_group_r)3 -0.104069   0.587804  -0.177    0.859

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 429.47  on 315  degrees of freedom
Residual deviance: 416.47  on 310  degrees of fr

In [46]:
%%R -i ext_C
print('HAD_VISITORS - EDUCATION')
summary(glm(had_visitors_r ~ factor(condition_r) * factor(education_r), family=binomial, data=ext_C))

[1] "HAD_VISITORS - EDUCATION"

Call:
glm(formula = had_visitors_r ~ factor(condition_r) * factor(education_r), 
    family = binomial, data = ext_C)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.5281  -1.2346   0.8633   1.1213   1.2018  

Coefficients:
                                          Estimate Std. Error z value Pr(>|z|)
(Intercept)                                 0.1335     0.2113   0.632   0.5274
factor(condition_r)4                        0.6614     0.3106   2.129   0.0332
factor(education_r)2                       -0.1907     0.3191  -0.598   0.5501
factor(condition_r)4:factor(education_r)2  -0.1735     0.4660  -0.372   0.7097
                                           
(Intercept)                                
factor(condition_r)4                      *
factor(education_r)2                       
factor(condition_r)4:factor(education_r)2  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial fam