In [2]:
# Core analysis packages
import numpy as np
import os, sys
import pandas as pd
from scipy import stats
from scipy.special import comb
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats import anova
# from patsy import dmatrices
import bff
import pingouin as pg
#import researchpy

import networkx as nx

# Plotting packages
import matplotlib.pyplot as plt
plt.rcdefaults()
import seaborn as sns 
sns.set(style="ticks", color_codes=True)
sns.set_style("white")
sns.set_style({'xtick.bottom': True, 'ytick.left': True})
colorref = ["gray", "royalblue", "crimson", "goldenrod", "mediumorchid", "seagreen"]

# iPython magic commands
%matplotlib notebook
%load_ext autoreload
%autoreload 2
%autosave 30

SMALL_SIZE = 12
MEDIUM_SIZE = 12
BIG_SIZE = 14

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIG_SIZE)  # fontsize of the figure title
cust_palette = sns.color_palette("Paired")[6:10]
cust_palette = [cust_palette[i] for i in [1,0,3,2]]

def median_split(S):
    return S > S.median()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Autosaving every 30 seconds


In [3]:
def ttest_ind(x1, x2, equivar=False, alpha=0.05, printres=False):
    n1 = len(x1)
    M1 = np.mean(x1)
    s1 = np.std(x1, ddof=1)
    n2 = len(x2)
    M2 = np.mean(x2)
    s2 = np.std(x2, ddof=1)
    
    # t-test
    [t, p] = stats.ttest_ind(x1, x2, equal_var=equivar)
    # cohen's d
    dof = n1 + n2 - 2
    sp = np.sqrt(((n1-1)*s1**2 + (n2-1)*s2**2) / dof)
    d = np.abs(M1 - M2) / sp
    # degrees of freedom
    df = (s1**2/n1 + s2**2/n2)**2 / ((s1**2/n1)**2/(n1-1) + (s2**2/n2)**2/(n2-1))
    # confidence intervals (M1 - M2) ± ts(M1 - M2)
    se = np.sqrt(sp**2/n1 + sp**2/n2)
    CI = (M1 - M2) + np.array([-1,1])*stats.t.ppf(1-alpha/2, df, loc=0, scale=1)*se

    res = (t, df, p, d, CI[0], CI[1])
    if printres:
        print("t = %.5f, df = %.5f, p = %.5f, d = %.5f, CI = (%.5f, %.5f)" % res)
    else:
        return res

In [4]:
# Load file (from same directory as the notebook)
DATA = pd.read_excel(os.path.expanduser("natrepCOVID.xlsx"))
DATA.head()

Unnamed: 0,Partnum,cond,condfigs,political,CHANGE_Fbaseline,CHANGE_Cbaseline,CHANGE_F,CHANGE_C,change_k,CHANGE_Kbaseline,Age,Gender,votereg,Resist,Trump,Edu,Partystr
0,1,1,1,1.0,-25.0,-24.0,-25.0,-24.0,-1.0,-1.0,54,1,1,3.333333,1,4,3
1,2,1,1,1.0,0.5,-6.75,0.5,-6.75,7.25,7.25,57,0,0,2.333333,2,3,3
2,3,1,1,1.0,-1.25,-5.0,-1.25,-5.0,3.75,3.75,31,1,1,3.333333,3,3,3
3,4,1,1,2.0,0.0,-6.25,0.0,-6.25,6.25,6.25,24,1,1,5.0,6,2,5
4,5,1,1,2.0,7.5,-14.75,7.5,-14.75,22.25,22.25,40,1,1,4.0,6,2,3


In [5]:
DATA_1 = DATA.loc[DATA["cond"]==1]
DATA_2 = DATA.loc[DATA["cond"]==2]
DATA_3 = DATA.loc[DATA["cond"]==3]
DATA_4 = DATA.loc[DATA["cond"]==4]
DATA_5 = DATA.loc[DATA["cond"]==5]
DATA_6 = DATA.loc[DATA["cond"]==6]
DATA_7 = DATA.loc[DATA["cond"]==7]
DATA_8 = DATA.loc[DATA["cond"]==8]
DATA_9 = DATA.loc[DATA["cond"]==9]
DATA_10 = DATA.loc[DATA["cond"]==10]

In [7]:
pal = ["#241717"]

fig, ax = plt.subplots(1,1, figsize=(5,5))

sns.pointplot(y=DATA.condfigs, x=DATA.change_k, data=DATA, orient="h", \
            palette=pal, join=False, edgecolor=".2", errcolor=".2", \
            ax=ax)
sns.despine()
ax.set_yticklabels(["Control", "Generic-Normative","Dem-Normative", "Rep-Normative", "Fauci", "CDC", "Dem-Anecdote", "Rep-Anecdote","Biden", "Trump"])
ax.set_ylabel("")
ax.set_xlabel("Change in Knowledge")
plt.axvline(x=0, c='k')
plt.xlim(-20,20)

plt.tight_layout()
#plt.savefig('fig.tif', dpi=900, format="tiff")

<IPython.core.display.Javascript object>

In [21]:
pal = ["#2874A6", "#DC143C"]

fig, ax = plt.subplots(1,1, figsize=(5,5))

sns.pointplot(y="condfigs", x="change_k", hue="political", data=DATA, orient="h", \
            palette=pal, dodge=0.15, join=False, edgecolor=".2", errcolor=".2", \
            ax=ax)
sns.despine()
ax.set_yticklabels(["Control", "Generic-Normative","Dem-Normative", "Rep-Normative", "Fauci", "CDC", "Dem-Anecdote", "Rep-Anecdote","Biden", "Trump"])
ax.set_ylabel("")
ax.set_xlabel("Change in Knowledge")
plt.axvline(x=0, c='k')
plt.xlim(-20,25)
ax.get_legend().remove()

plt.tight_layout()
plt.savefig('fig.tif', dpi=900, format="tiff")

<IPython.core.display.Javascript object>

In [40]:
pal = ["#241717", "#9292A0"]

fig, ax = plt.subplots(1,1, figsize=(5,5))

sns.pointplot(y="condfigs", x="change_k", hue="baseline", data=DAT, orient="h", \
            palette=pal, dodge=0.15, join=False, edgecolor=".2", errcolor=".2", \
            ax=ax)
sns.despine()
ax.set_yticklabels(["Control", "Generic-Normative","Dem-Normative", "Rep-Normative", "Fauci", "CDC", "Dem-Anecdote", "Rep-Anecdote","Biden", "Trump"])
ax.set_ylabel("")
ax.set_xlabel("Change in Knowledge")
plt.axvline(x=0, c='k')
plt.xlim(-15,20)
ax.get_legend().remove()

plt.tight_layout()
plt.savefig('fig.tif', dpi=900, format="tiff")

<IPython.core.display.Javascript object>

In [104]:
fig, ax = plt.subplots(1,1, figsize=(4,4))

sns.regplot(x=DATA.Resist, y=DATA.change_k, scatter_kws={"color": "#006C67"}, \
            line_kws={"color":"#006C67","alpha":1,"lw":3}, x_estimator=np.mean, ax=ax)
plt.tight_layout()
ax.set_ylim([-15,40])
ax.set_ylabel('Change in knowledge')
ax.set(xlabel='Resistance to change')

plt.savefig('fig.tif', dpi=900, format="tiff")

<IPython.core.display.Javascript object>

In [13]:
import warnings
warnings.filterwarnings('ignore')

%load_ext rpy2.ipython
# %R library(lme4)

%R library(lmerTest)

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


array(['lmerTest', 'lme4', 'Matrix', 'tools', 'stats', 'graphics',
       'grDevices', 'utils', 'datasets', 'methods', 'base'], dtype='<U9')

In [87]:
%Rpush DATA

In [16]:
%%R

M <- lm(change_k ~ as.factor(political) + Age + as.factor(Gender) + Resist + Trump + Edu + Partystr, data = DATA)
print(summary(M))


Call:
lm(formula = change_k ~ as.factor(political) + Age + as.factor(Gender) + 
    Resist + Trump + Edu + Partystr, data = DATA)

Residuals:
     Min       1Q   Median       3Q      Max 
-105.600  -10.140   -2.487    9.044  103.517 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)   
(Intercept)              5.75122    5.15894   1.115  0.26519   
as.factor(political)2   -0.43654    2.31314  -0.189  0.85035   
as.factor(political)NaN 21.62951   17.19340   1.258  0.20867   
Age                      0.02268    0.04491   0.505  0.61368   
as.factor(Gender)1       2.06142    1.52564   1.351  0.17693   
Resist                  -2.67389    0.87821  -3.045  0.00239 **
Trump                    0.28076    0.50583   0.555  0.57898   
Edu                      1.19859    0.75665   1.584  0.11348   
Partystr                 0.33137    0.74565   0.444  0.65684   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 24.21 on 1051 deg

In [36]:
# Load file (from same directory as the notebook)
DAT = pd.read_excel(os.path.expanduser("Book3.xlsx"))
reorder = {k: v for k,v in enumerate([0, 1, 9, 8, 4, 5, 3, 2, 7, 6])}
DAT.insert(2, "condfigs", DAT["cond"].apply(lambda x: reorder[x-1]+1))
DAT.head()

Unnamed: 0,Partnum,cond,condfigs,political,Age,Gender,votereg,Resist,Trump,Edu,Partystr,CHANGE_F,CHANGE_C,change_k,baseline
0,1,1,1,1.0,54,1,1,3.333333,1,4,3,-25.0,-24.0,-1.0,0
1,2,1,1,1.0,57,0,0,2.333333,2,3,3,0.5,-6.75,7.25,0
2,3,1,1,1.0,31,1,1,3.333333,3,3,3,-1.25,-5.0,3.75,0
3,4,1,1,2.0,24,1,1,5.0,6,2,5,0.0,-6.25,6.25,0
4,5,1,1,2.0,40,1,1,4.0,6,2,3,7.5,-14.75,22.25,0


In [50]:
DAT_1 = DAT.loc[DAT["cond"]==1]
DAT_2 = DAT.loc[DAT["cond"]==2]
DAT_3 = DAT.loc[DAT["cond"]==3]
DAT_4 = DAT.loc[DAT["cond"]==4]
DAT_5 = DAT.loc[DAT["cond"]==5]
DAT_6 = DAT.loc[DAT["cond"]==6]
DAT_7 = DAT.loc[DAT["cond"]==7]
DAT_8 = DAT.loc[DAT["cond"]==8]
DAT_9 = DAT.loc[DAT["cond"]==9]
DAT_10 = DAT.loc[DAT["cond"]==10]

In [70]:
pall = ["#241717", "#FFFEFE"]
palc = ["#FFFEFE"]

fig, ax = plt.subplots(1,10, figsize=(10,4), sharey=True, gridspec_kw={"width_ratios": [0.6,1,1,1,1,1,1,1,1,1]})

sns.barplot(x=DAT_1.baseline, y=DAT_1.change_k, data=DAT_1, \
            palette=palc, linewidth = [0,1], edgecolor=".2", errcolor=".2", \
            ax=ax[0])
sns.despine()
ax[0].set_xlabel("Control", rotation=30)
ax[0].set_xticklabels([""])
ax[0].set_ylabel("Change in knowledge")


sns.barplot(x=DAT_2.baseline, y=DAT_2.change_k, data=DAT_2, \
            palette=pall, linewidth = [0,1], edgecolor=".2", errcolor=".2", \
            ax=ax[1])
sns.despine()
ax[1].set_xlabel("Generic-Norm", rotation=30)
ax[1].set_xticklabels(["T", "B"])
ax[1].set_ylabel("")



sns.barplot(x=DAT_8.baseline, y=DAT_8.change_k, data=DAT_8, \
            palette=pall, linewidth = [0,1], edgecolor=".2", errcolor=".2", \
            ax=ax[2])
sns.despine()
ax[2].set_xlabel("Dem-Norm", rotation=30)
ax[2].set_xticklabels(["T", "B"])
ax[2].set_ylabel("")


sns.barplot(x=DAT_7.baseline, y=DAT_7.change_k, data=DAT_7, \
            palette=pall, linewidth = [0,1], edgecolor=".2", errcolor=".2", \
            ax=ax[3])
sns.despine()
ax[3].set_xlabel("Rep-Norm", rotation=30)
ax[3].set_xticklabels(["T", "B"])
ax[3].set_ylabel("")


sns.barplot(x=DAT_5.baseline, y=DAT_5.change_k, data=DAT_5, \
            palette=pall, linewidth = [0,1], edgecolor=".2", errcolor=".2", \
            ax=ax[4])
sns.despine()
ax[4].set_xlabel("Fauci", rotation=30)
ax[4].set_xticklabels(["T", "B"])
ax[4].set_ylabel("")


sns.barplot(x=DAT_6.baseline, y=DAT_6.change_k, data=DAT_6, \
            palette=pall, linewidth = [0,1], edgecolor=".2", errcolor=".2", \
            ax=ax[5])
sns.despine()
ax[5].set_xlabel("CDC", rotation=30)
ax[5].set_xticklabels(["T", "B"])
ax[5].set_ylabel("")

sns.barplot(x=DAT_10.baseline, y=DAT_10.change_k, data=DAT_10, \
            palette=pall, linewidth = [0,1], edgecolor=".2", errcolor=".2", \
            ax=ax[6])
sns.despine()
ax[6].set_xlabel("Dem-Anec", rotation=30)
ax[6].set_xticklabels(["T", "B"])
ax[6].set_ylabel("")

sns.barplot(x=DAT_9.baseline, y=DAT_9.change_k, data=DAT_9, \
            palette=pall, linewidth = [0,1], edgecolor=".2", errcolor=".2", \
            ax=ax[7])
sns.despine()
ax[7].set_xlabel("Rep-Anec", rotation=30)
ax[7].set_xticklabels(["T", "B"])
ax[7].set_ylabel("")


sns.barplot(x=DAT_4.baseline, y=DAT_4.change_k, data=DAT_4, \
            palette=pall, linewidth = [0,1], edgecolor=".2", errcolor=".2", \
            ax=ax[8])
sns.despine()
ax[8].set_xlabel("Biden", rotation=30)
ax[8].set_xticklabels(["T", "B"])
ax[8].set_ylabel("")

sns.barplot(x=DAT_3.baseline, y=DAT_3.change_k, data=DAT_3, \
            palette=pall, linewidth = [0,1], edgecolor=".2", errcolor=".2", \
            ax=ax[9])
sns.despine()
ax[9].set_xlabel("Trump", rotation=30)
ax[9].set_xticklabels(["T", "B"])
ax[9].set_ylabel("")

plt.tight_layout()
plt.savefig('fig.tif', dpi=900, format="tiff")

<IPython.core.display.Javascript object>