## Social desirability experiment data analysis: subgroup analysis

Note this is a long notebook with mainly table outputs...

In [2]:
import numpy as np
import pandas as pd

import utility as util

In [3]:
df_no_miss = pd.read_csv("../output/df_no_miss.csv")

In [42]:
df_no_miss.shape

(618, 20)

In [76]:
df_no_miss.columns

Index(['ID', 'vaccine', 'mandate', 'gender', 'marital', 'age_group',
       'education', 'gone_to_friend', 'had_visitors', 'had_close_contact',
       'gone_outside', 'condition', 'gone_to_friend_r', 'had_visitors_r',
       'had_close_contact_r', 'gone_outside_r', 'excuse_statement_condition',
       'response_set_condition', 'age_group_r', 'education_r'],
      dtype='object')

### Recode age_group and education and exclude records with any demographic missing

In [21]:
age_g_map = {'19-25': '19-35',
             '26-35': '19-35',
             '36-45': '36-55', 
             '46-55': '36-55',
             '56-65': '56+',
             '66+': '56+'}

education_map ={'1. Less than high school diploma': '1. Less than college',
                '2. High school diploma': '1. Less than college',
                '3. Some college': '1. Less than college',
                '4. Bachelor degree': '2. College+',
                '5. Graduate degree': '2. College+'}

In [22]:
df_no_miss['age_group_r'] = df_no_miss['age_group'].map(age_g_map)
df_no_miss['education_r'] = df_no_miss['education'].map(education_map)

In [40]:
# exclude records with missing demographic info
df_sub = df_no_miss.dropna(subset=['gender', 'marital', 'age_group', 'education'])

In [38]:
df_sub.shape

(608, 20)

In [41]:
demo_cols = ['gender', 'marital', 'age_group_r', 'education_r']

### Subset data

In [103]:
woman_df = df_sub.loc[df_sub['gender'] == '1. Woman']
man_df = df_sub.loc[df_sub['gender'] == '2. Man']

married_df = df_sub.loc[df_sub['marital'] == '1. Married']
not_married_df = df_sub.loc[df_sub['marital'] == '2. Not married']

young_df = df_sub.loc[df_sub['age_group_r'] == '19-35']
middle_df = df_sub.loc[df_sub['age_group_r'] == '36-55']
old_df = df_sub.loc[df_sub['age_group_r'] == '56+']

lscollege_df = df_sub.loc[df_sub['education_r'] == '1. Less than college']
college_df = df_sub.loc[df_sub['education_r'] == '2. College+']

### Testing the effect of question wording

In [78]:
recoded_cols = ['gone_to_friend_r', 'had_visitors_r', 'had_close_contact_r', 'gone_outside_r']

In [74]:
excuse_statement_order = ['no_excuse', 'with_excuse']

#### gender differences

In [79]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'excuse_statement_condition', woman_df, excuse_statement_order)

excuse_statement_condition,no_excuse,with_excuse
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,60.8,60.2
2. No,39.2,39.8
Total n,186.0,171.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.993*

-----

excuse_statement_condition,no_excuse,with_excuse
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,51.6,56.7
2. No,48.4,43.3
Total n,186.0,171.0


*Chi-squared statistic = 0.7, degree of freedom = 1, p = 0.389*

-----

excuse_statement_condition,no_excuse,with_excuse
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,73.7,76.0
2. No,26.3,24.0
Total n,186.0,171.0


*Chi-squared statistic = 0.2, degree of freedom = 1, p = 0.695*

-----

excuse_statement_condition,no_excuse,with_excuse
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,79.0,81.9
2. No,21.0,18.1
Total n,186.0,171.0


*Chi-squared statistic = 0.3, degree of freedom = 1, p = 0.588*

-----

In [82]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'excuse_statement_condition', man_df, excuse_statement_order)

excuse_statement_condition,no_excuse,with_excuse
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,66.7,62.6
2. No,33.3,37.4
Total n,120.0,131.0


*Chi-squared statistic = 0.3, degree of freedom = 1, p = 0.588*

-----

excuse_statement_condition,no_excuse,with_excuse
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,66.7,68.7
2. No,33.3,31.3
Total n,120.0,131.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.834*

-----

excuse_statement_condition,no_excuse,with_excuse
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,81.7,85.5
2. No,18.3,14.5
Total n,120.0,131.0


*Chi-squared statistic = 0.4, degree of freedom = 1, p = 0.516*

-----

excuse_statement_condition,no_excuse,with_excuse
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,91.7,88.5
2. No,8.3,11.5
Total n,120.0,131.0


*Chi-squared statistic = 0.4, degree of freedom = 1, p = 0.54*

-----

#### marital status differences

In [83]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'excuse_statement_condition', married_df, excuse_statement_order)

excuse_statement_condition,no_excuse,with_excuse
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,64.8,60.1
2. No,35.2,39.9
Total n,227.0,228.0


*Chi-squared statistic = 0.9, degree of freedom = 1, p = 0.352*

-----

excuse_statement_condition,no_excuse,with_excuse
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,63.4,63.2
2. No,36.6,36.8
Total n,227.0,228.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.972*

-----

excuse_statement_condition,no_excuse,with_excuse
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,79.3,82.5
2. No,20.7,17.5
Total n,227.0,228.0


*Chi-squared statistic = 0.5, degree of freedom = 1, p = 0.46*

-----

excuse_statement_condition,no_excuse,with_excuse
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,85.9,87.3
2. No,14.1,12.7
Total n,227.0,228.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.769*

-----

In [84]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'excuse_statement_condition', not_married_df, excuse_statement_order)

excuse_statement_condition,no_excuse,with_excuse
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,58.2,64.9
2. No,41.8,35.1
Total n,79.0,74.0


*Chi-squared statistic = 0.5, degree of freedom = 1, p = 0.499*

-----

excuse_statement_condition,no_excuse,with_excuse
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,40.5,58.1
2. No,59.5,41.9
Total n,79.0,74.0


*Chi-squared statistic = 4.1, degree of freedom = 1, p = 0.044*

-----

excuse_statement_condition,no_excuse,with_excuse
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,69.6,73.0
2. No,30.4,27.0
Total n,79.0,74.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.78*

-----

excuse_statement_condition,no_excuse,with_excuse
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,78.5,77.0
2. No,21.5,23.0
Total n,79.0,74.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.983*

-----

#### age group differences

In [85]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'excuse_statement_condition', young_df, excuse_statement_order)

excuse_statement_condition,no_excuse,with_excuse
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,71.0,68.0
2. No,29.0,32.0
Total n,31.0,25.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.958*

-----

excuse_statement_condition,no_excuse,with_excuse
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,61.3,64.0
2. No,38.7,36.0
Total n,31.0,25.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.945*

-----

excuse_statement_condition,no_excuse,with_excuse
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,90.3,96.0
2. No,9.7,4.0
Total n,31.0,25.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.766*

-----

excuse_statement_condition,no_excuse,with_excuse
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,80.6,84.0
2. No,19.4,16.0
Total n,31.0,25.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.98*

-----

In [86]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'excuse_statement_condition', middle_df, excuse_statement_order)

excuse_statement_condition,no_excuse,with_excuse
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,65.9,70.7
2. No,34.1,29.3
Total n,82.0,82.0


*Chi-squared statistic = 0.3, degree of freedom = 1, p = 0.615*

-----

excuse_statement_condition,no_excuse,with_excuse
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,63.4,70.7
2. No,36.6,29.3
Total n,82.0,82.0


*Chi-squared statistic = 0.7, degree of freedom = 1, p = 0.406*

-----

excuse_statement_condition,no_excuse,with_excuse
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,80.5,85.4
2. No,19.5,14.6
Total n,82.0,82.0


*Chi-squared statistic = 0.4, degree of freedom = 1, p = 0.534*

-----

excuse_statement_condition,no_excuse,with_excuse
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,92.7,92.7
2. No,7.3,7.3
Total n,82.0,82.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 1.0*

-----

In [87]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'excuse_statement_condition', old_df, excuse_statement_order)

excuse_statement_condition,no_excuse,with_excuse
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,60.6,56.4
2. No,39.4,43.6
Total n,193.0,195.0


*Chi-squared statistic = 0.5, degree of freedom = 1, p = 0.46*

-----

excuse_statement_condition,no_excuse,with_excuse
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,54.4,57.9
2. No,45.6,42.1
Total n,193.0,195.0


*Chi-squared statistic = 0.4, degree of freedom = 1, p = 0.548*

-----

excuse_statement_condition,no_excuse,with_excuse
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,73.1,75.9
2. No,26.9,24.1
Total n,193.0,195.0


*Chi-squared statistic = 0.3, degree of freedom = 1, p = 0.599*

-----

excuse_statement_condition,no_excuse,with_excuse
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,80.8,81.5
2. No,19.2,18.5
Total n,193.0,195.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.961*

-----

#### education differences

In [88]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'excuse_statement_condition', lscollege_df, excuse_statement_order)

excuse_statement_condition,no_excuse,with_excuse
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,66.9,63.5
2. No,33.1,36.5
Total n,160.0,170.0


*Chi-squared statistic = 0.3, degree of freedom = 1, p = 0.602*

-----

excuse_statement_condition,no_excuse,with_excuse
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,60.0,65.9
2. No,40.0,34.1
Total n,160.0,170.0


*Chi-squared statistic = 1.0, degree of freedom = 1, p = 0.321*

-----

excuse_statement_condition,no_excuse,with_excuse
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,76.2,81.8
2. No,23.8,18.2
Total n,160.0,170.0


*Chi-squared statistic = 1.2, degree of freedom = 1, p = 0.273*

-----

excuse_statement_condition,no_excuse,with_excuse
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,81.9,86.5
2. No,18.1,13.5
Total n,160.0,170.0


*Chi-squared statistic = 1.0, degree of freedom = 1, p = 0.32*

-----

In [89]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'excuse_statement_condition', college_df, excuse_statement_order)

excuse_statement_condition,no_excuse,with_excuse
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,58.9,58.3
2. No,41.1,41.7
Total n,146.0,132.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.98*

-----

excuse_statement_condition,no_excuse,with_excuse
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,54.8,56.8
2. No,45.2,43.2
Total n,146.0,132.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.827*

-----

excuse_statement_condition,no_excuse,with_excuse
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,77.4,78.0
2. No,22.6,22.0
Total n,146.0,132.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.986*

-----

excuse_statement_condition,no_excuse,with_excuse
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,86.3,82.6
2. No,13.7,17.4
Total n,146.0,132.0


*Chi-squared statistic = 0.5, degree of freedom = 1, p = 0.489*

-----

### Testing the effect of response set

In [90]:
response_set_order = ['two_response', 'three_response']

#### gender differences

In [91]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'response_set_condition', woman_df, response_set_order)

response_set_condition,two_response,three_response
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,60.8,60.2
2. No,39.2,39.8
Total n,181.0,176.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.998*

-----

response_set_condition,two_response,three_response
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,51.4,56.8
2. No,48.6,43.2
Total n,181.0,176.0


*Chi-squared statistic = 0.9, degree of freedom = 1, p = 0.355*

-----

response_set_condition,two_response,three_response
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,74.0,75.6
2. No,26.0,24.4
Total n,181.0,176.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.832*

-----

response_set_condition,two_response,three_response
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,82.3,78.4
2. No,17.7,21.6
Total n,181.0,176.0


*Chi-squared statistic = 0.6, degree of freedom = 1, p = 0.425*

-----

In [92]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'response_set_condition', man_df, response_set_order)

response_set_condition,two_response,three_response
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,58.7,70.0
2. No,41.3,30.0
Total n,121.0,130.0


*Chi-squared statistic = 3.0, degree of freedom = 1, p = 0.082*

-----

response_set_condition,two_response,three_response
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,61.2,73.8
2. No,38.8,26.2
Total n,121.0,130.0


*Chi-squared statistic = 4.1, degree of freedom = 1, p = 0.044*

-----

response_set_condition,two_response,three_response
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,82.6,84.6
2. No,17.4,15.4
Total n,121.0,130.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.802*

-----

response_set_condition,two_response,three_response
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,86.8,93.1
2. No,13.2,6.9
Total n,121.0,130.0


*Chi-squared statistic = 2.1, degree of freedom = 1, p = 0.146*

-----

#### marital differences

In [93]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'response_set_condition', married_df, response_set_order)

response_set_condition,two_response,three_response
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,58.6,65.8
2. No,41.4,34.2
Total n,215.0,240.0


*Chi-squared statistic = 2.2, degree of freedom = 1, p = 0.136*

-----

response_set_condition,two_response,three_response
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,59.1,67.1
2. No,40.9,32.9
Total n,215.0,240.0


*Chi-squared statistic = 2.8, degree of freedom = 1, p = 0.094*

-----

response_set_condition,two_response,three_response
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,79.5,82.1
2. No,20.5,17.9
Total n,215.0,240.0


*Chi-squared statistic = 0.3, degree of freedom = 1, p = 0.568*

-----

response_set_condition,two_response,three_response
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,86.0,87.1
2. No,14.0,12.9
Total n,215.0,240.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.852*

-----

In [94]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'response_set_condition', not_married_df, response_set_order)

response_set_condition,two_response,three_response
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,63.2,59.1
2. No,36.8,40.9
Total n,87.0,66.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.725*

-----

response_set_condition,two_response,three_response
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,46.0,53.0
2. No,54.0,47.0
Total n,87.0,66.0


*Chi-squared statistic = 0.5, degree of freedom = 1, p = 0.483*

-----

response_set_condition,two_response,three_response
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,72.4,69.7
2. No,27.6,30.3
Total n,87.0,66.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.851*

-----

response_set_condition,two_response,three_response
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,79.3,75.8
2. No,20.7,24.2
Total n,87.0,66.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.744*

-----

#### age group differences

In [95]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'response_set_condition', young_df, response_set_order)

response_set_condition,two_response,three_response
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,75.9,63.0
2. No,24.1,37.0
Total n,29.0,27.0


*Chi-squared statistic = 0.6, degree of freedom = 1, p = 0.448*

-----

response_set_condition,two_response,three_response
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,58.6,66.7
2. No,41.4,33.3
Total n,29.0,27.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.73*

-----

response_set_condition,two_response,three_response
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,86.2,100.0
2. No,13.8,0.0
Total n,29.0,27.0


*Chi-squared statistic = 2.2, degree of freedom = 1, p = 0.138*

-----

response_set_condition,two_response,three_response
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,75.9,88.9
2. No,24.1,11.1
Total n,29.0,27.0


*Chi-squared statistic = 0.9, degree of freedom = 1, p = 0.356*

-----

In [96]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'response_set_condition', middle_df, response_set_order)

response_set_condition,two_response,three_response
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,66.2,69.9
2. No,33.8,30.1
Total n,71.0,93.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.738*

-----

response_set_condition,two_response,three_response
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,63.4,69.9
2. No,36.6,30.1
Total n,71.0,93.0


*Chi-squared statistic = 0.5, degree of freedom = 1, p = 0.477*

-----

response_set_condition,two_response,three_response
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,84.5,81.7
2. No,15.5,18.3
Total n,71.0,93.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.794*

-----

response_set_condition,two_response,three_response
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,91.5,93.5
2. No,8.5,6.5
Total n,71.0,93.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.854*

-----

In [97]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'response_set_condition', old_df, response_set_order)

response_set_condition,two_response,three_response
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,55.4,61.8
2. No,44.6,38.2
Total n,202.0,186.0


*Chi-squared statistic = 1.4, degree of freedom = 1, p = 0.241*

-----

response_set_condition,two_response,three_response
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,52.0,60.8
2. No,48.0,39.2
Total n,202.0,186.0


*Chi-squared statistic = 2.7, degree of freedom = 1, p = 0.102*

-----

response_set_condition,two_response,three_response
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,73.8,75.3
2. No,26.2,24.7
Total n,202.0,186.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.823*

-----

response_set_condition,two_response,three_response
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,82.7,79.6
2. No,17.3,20.4
Total n,202.0,186.0


*Chi-squared statistic = 0.4, degree of freedom = 1, p = 0.515*

-----

#### education differences

In [98]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'response_set_condition', lscollege_df, response_set_order)

response_set_condition,two_response,three_response
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,63.3,67.1
2. No,36.7,32.9
Total n,166.0,164.0


*Chi-squared statistic = 0.4, degree of freedom = 1, p = 0.54*

-----

response_set_condition,two_response,three_response
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,57.8,68.3
2. No,42.2,31.7
Total n,166.0,164.0


*Chi-squared statistic = 3.4, degree of freedom = 1, p = 0.064*

-----

response_set_condition,two_response,three_response
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,77.7,80.5
2. No,22.3,19.5
Total n,166.0,164.0


*Chi-squared statistic = 0.2, degree of freedom = 1, p = 0.628*

-----

response_set_condition,two_response,three_response
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,84.3,84.1
2. No,15.7,15.9
Total n,166.0,164.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.918*

-----

In [99]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'response_set_condition', college_df, response_set_order)

response_set_condition,two_response,three_response
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,55.9,61.3
2. No,44.1,38.7
Total n,136.0,142.0


*Chi-squared statistic = 0.6, degree of freedom = 1, p = 0.43*

-----

response_set_condition,two_response,three_response
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,52.2,59.2
2. No,47.8,40.8
Total n,136.0,142.0


*Chi-squared statistic = 1.1, degree of freedom = 1, p = 0.296*

-----

response_set_condition,two_response,three_response
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,77.2,78.2
2. No,22.8,21.8
Total n,136.0,142.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.961*

-----

response_set_condition,two_response,three_response
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,83.8,85.2
2. No,16.2,14.8
Total n,136.0,142.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.878*

-----

### Comparing control vs. C

In [102]:
df_sub_controlC = df_sub[df_sub["condition"].isin(["Control", "C"])]

In [104]:
woman_df_cc = df_sub_controlC.loc[df_sub_controlC['gender'] == '1. Woman']
man_df_cc = df_sub_controlC.loc[df_sub_controlC['gender'] == '2. Man']

married_df_cc = df_sub_controlC.loc[df_sub_controlC['marital'] == '1. Married']
not_married_df_cc = df_sub_controlC.loc[df_sub_controlC['marital'] == '2. Not married']

young_df_cc = df_sub_controlC.loc[df_sub_controlC['age_group_r'] == '19-35']
middle_df_cc = df_sub_controlC.loc[df_sub_controlC['age_group_r'] == '36-55']
old_df_cc = df_sub_controlC.loc[df_sub_controlC['age_group_r'] == '56+']

lscollege_df_cc = df_sub_controlC.loc[df_sub_controlC['education_r'] == '1. Less than college']
college_df_cc = df_sub_controlC.loc[df_sub_controlC['education_r'] == '2. College+']

In [105]:
conditions_control_C = ['Control', 'C']

#### gender differences

In [106]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'condition', woman_df_cc, conditions_control_C)

condition,Control,C
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,59.8,58.6
2. No,40.2,41.4
Total n,97.0,87.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.991*

-----

condition,Control,C
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,49.5,59.8
2. No,50.5,40.2
Total n,97.0,87.0


*Chi-squared statistic = 1.6, degree of freedom = 1, p = 0.211*

-----

condition,Control,C
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,71.1,74.7
2. No,28.9,25.3
Total n,97.0,87.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.705*

-----

condition,Control,C
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,79.4,78.2
2. No,20.6,21.8
Total n,97.0,87.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.983*

-----

In [107]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'condition', man_df_cc, conditions_control_C)

condition,Control,C
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,64.2,71.4
2. No,35.8,28.6
Total n,53.0,63.0


*Chi-squared statistic = 0.4, degree of freedom = 1, p = 0.524*

-----

condition,Control,C
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,60.4,76.2
2. No,39.6,23.8
Total n,53.0,63.0


*Chi-squared statistic = 2.7, degree of freedom = 1, p = 0.103*

-----

condition,Control,C
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,79.2,85.7
2. No,20.8,14.3
Total n,53.0,63.0


*Chi-squared statistic = 0.5, degree of freedom = 1, p = 0.502*

-----

condition,Control,C
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,86.8,90.5
2. No,13.2,9.5
Total n,53.0,63.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.741*

-----

#### marital differences

In [108]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'condition', married_df_cc, conditions_control_C)

condition,Control,C
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,61.0,63.6
2. No,39.0,36.4
Total n,105.0,118.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.793*

-----

condition,Control,C
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,59.0,66.9
2. No,41.0,33.1
Total n,105.0,118.0


*Chi-squared statistic = 1.2, degree of freedom = 1, p = 0.279*

-----

condition,Control,C
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,77.1,83.1
2. No,22.9,16.9
Total n,105.0,118.0


*Chi-squared statistic = 0.9, degree of freedom = 1, p = 0.348*

-----

condition,Control,C
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,83.8,86.4
2. No,16.2,13.6
Total n,105.0,118.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.716*

-----

In [109]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'condition', not_married_df_cc, conditions_control_C)

condition,Control,C
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,62.2,65.6
2. No,37.8,34.4
Total n,45.0,32.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.948*

-----

condition,Control,C
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,40.0,65.6
2. No,60.0,34.4
Total n,45.0,32.0


*Chi-squared statistic = 3.9, degree of freedom = 1, p = 0.047*

-----

condition,Control,C
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,66.7,65.6
2. No,33.3,34.4
Total n,45.0,32.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.881*

-----

condition,Control,C
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,77.8,71.9
2. No,22.2,28.1
Total n,45.0,32.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.746*

-----

#### age group differences

In [110]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'condition', young_df_cc, conditions_control_C)

condition,Control,C
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,75.0,58.3
2. No,25.0,41.7
Total n,16.0,12.0


*Chi-squared statistic = 0.3, degree of freedom = 1, p = 0.599*

-----

condition,Control,C
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,56.2,66.7
2. No,43.8,33.3
Total n,16.0,12.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.867*

-----

condition,Control,C
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,81.2,100.0
2. No,18.8,0.0
Total n,16.0,12.0


*Chi-squared statistic = 0.9, degree of freedom = 1, p = 0.332*

-----

condition,Control,C
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,75.0,91.7
2. No,25.0,8.3
Total n,16.0,12.0


*Chi-squared statistic = 0.4, degree of freedom = 1, p = 0.522*

-----

In [111]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'condition', middle_df_cc, conditions_control_C)

condition,Control,C
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,65.6,74.4
2. No,34.4,25.6
Total n,32.0,43.0


*Chi-squared statistic = 0.3, degree of freedom = 1, p = 0.568*

-----

condition,Control,C
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,65.6,79.1
2. No,34.4,20.9
Total n,32.0,43.0


*Chi-squared statistic = 1.1, degree of freedom = 1, p = 0.299*

-----

condition,Control,C
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,81.2,83.7
2. No,18.8,16.3
Total n,32.0,43.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.977*

-----

condition,Control,C
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,87.5,90.7
2. No,12.5,9.3
Total n,32.0,43.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.948*

-----

In [112]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'condition', old_df_cc, conditions_control_C)

condition,Control,C
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,57.8,60.0
2. No,42.2,40.0
Total n,102.0,95.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.871*

-----

condition,Control,C
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,49.0,61.1
2. No,51.0,38.9
Total n,102.0,95.0


*Chi-squared statistic = 2.4, degree of freedom = 1, p = 0.121*

-----

condition,Control,C
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,70.6,74.7
2. No,29.4,25.3
Total n,102.0,95.0


*Chi-squared statistic = 0.2, degree of freedom = 1, p = 0.622*

-----

condition,Control,C
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,81.4,78.9
2. No,18.6,21.1
Total n,102.0,95.0


*Chi-squared statistic = 0.1, degree of freedom = 1, p = 0.804*

-----

#### education differences

In [113]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'condition', lscollege_df_cc, conditions_control_C)

condition,Control,C
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,64.3,64.8
2. No,35.7,35.2
Total n,84.0,88.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.926*

-----

condition,Control,C
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,54.8,70.5
2. No,45.2,29.5
Total n,84.0,88.0


*Chi-squared statistic = 3.9, degree of freedom = 1, p = 0.049*

-----

condition,Control,C
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,75.0,83.0
2. No,25.0,17.0
Total n,84.0,88.0


*Chi-squared statistic = 1.2, degree of freedom = 1, p = 0.274*

-----

condition,Control,C
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,81.0,85.2
2. No,19.0,14.8
Total n,84.0,88.0


*Chi-squared statistic = 0.3, degree of freedom = 1, p = 0.586*

-----

In [114]:
for col in recoded_cols:
    util.crosstab_chisq(col, 'condition', college_df_cc, conditions_control_C)

condition,Control,C
gone_to_friend_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,57.6,62.9
2. No,42.4,37.1
Total n,66.0,62.0


*Chi-squared statistic = 0.2, degree of freedom = 1, p = 0.664*

-----

condition,Control,C
had_visitors_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,51.5,61.3
2. No,48.5,38.7
Total n,66.0,62.0


*Chi-squared statistic = 0.9, degree of freedom = 1, p = 0.349*

-----

condition,Control,C
had_close_contact_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,72.7,74.2
2. No,27.3,25.8
Total n,66.0,62.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.99*

-----

condition,Control,C
gone_outside_r,Unnamed: 1_level_1,Unnamed: 2_level_1
1. Yes,83.3,80.6
2. No,16.7,19.4
Total n,66.0,62.0


*Chi-squared statistic = 0.0, degree of freedom = 1, p = 0.869*

-----