In [53]:
library(dplyr)
library(ggplot2)
library(toffee)
library(lme4)
library(tidyr)
library(xtable)
library(broom)
library(broom.mixed)

In [54]:
make_model_tbl <- function(model){
  tidy(model) %>%
    mutate(star = ifelse(p.value < 0.05, "*", "")) %>%
    mutate(O.R. = exp(estimate)) %>%
    mutate(p.value = ifelse(p.value < 0.001, "<0.001", as.character(round(p.value, 3)))) %>%
    mutate(p.value = paste0(p.value, star)) %>%
    mutate(C.I. = CI_fmt(estimate, std.error)) %>%
    dplyr::select(term, O.R., C.I., p.value)
}

In [59]:
ad_counts <- read.csv('../data/harmful_counts_df.csv')
dems <- read.csv('../data/ALLDEMS.csv')


# Elissa's example model: prop_harmful_grouped ~ !men + !white + high_ed
ad_counts.dems <- merge(ad_counts, dems, by="pid") %>%
    mutate(prop_harmful_grouped = harmful_grouped / total) %>%
    mutate(prop_prohibited =  prohibited / total) %>%
    mutate(prop_deceptive = deceptive / total) %>%
    mutate(prop_clickbait = clickbait / total) %>%
    mutate(prop_financial = financial / total) %>%
    mutate(prop_sensitive = sensitive / total) %>%
    # ===== don't care about anything else below =====
    mutate(prop_other = other / total) %>%
    mutate(prop_opportunity = opportunity / total) %>%
    mutate(prop_healthcare = healthcare / total) %>%
    mutate(prop_neutral = neutral / total)

In [25]:
# ad_counts.dems <- merge(ad_counts, dems, by="pid") %>%
#     mutate(prop_harmful_grouped = harmful_grouped / total) %>%
#     mutate(prop_neutral = neutral / total)
    # mutate(notman = !man) %>%
    # mutate(notwhite = !white)

# TODO: have a version of this DF with financial excluded, prop_nonfin_harmful e.g.

#### What demographic factors impact your fraction of all grouped harmful ads?

In [66]:
# fit all models
mod.frac_harmful_dems <- lm(prop_harmful_grouped ~ man + white + black + hispanic + asian + high_ed + older, data=ad_counts.dems)
mod.frac_prohibited_dems <- lm(prop_prohibited ~ man + white + black + hispanic + asian + high_ed + older, data=ad_counts.dems)
mod.frac_deceptive_dems <- lm(prop_deceptive ~ man + white + black + hispanic + asian + high_ed + older, data=ad_counts.dems)
mod.frac_clickbait_dems <- lm(prop_clickbait ~ man + white + black + hispanic + asian + high_ed + older, data=ad_counts.dems)
mod.frac_financial_dems <- lm(prop_financial ~ man + white + black + hispanic + asian + high_ed + older, data=ad_counts.dems)
mod.frac_sensitive_dems <- lm(prop_sensitive ~ man + white + black + hispanic + asian + high_ed + older, data=ad_counts.dems)

In [80]:
# pull out formatted parameters
cols <- c('term', 'estimate', 'p.value')

# === OMNIBUS ===
omnibus.model.params <- tidy(mod.frac_harmful_dems) %>%
    select(cols) %>%
    mutate(sig = case_when(
        p.value < 0.001 ~ "**",
        p.value < 0.05 ~ "*",
        p.value <= 0.1 ~ "+",
        TRUE ~ "")) %>%
    # add confidence interval column
    mutate(CI = toffee_tbl(mod.frac_harmful_dems)$CI) %>%
    # format p value and CI into O.R. column
    mutate(estimate = paste0(as.character(round(estimate, 3)), sig, ' ', CI)) %>%
    select(-sig, -CI) %>%
    # rename columns for easier joining later
    rename(estimate.omnibus = estimate, p.omnibus = p.value)

# === PROHIBITED ===
prohibited.model.params <- tidy(mod.frac_prohibited_dems) %>%
    select(cols) %>%
    mutate(sig = case_when(
        p.value < 0.001 ~ "**",
        p.value < 0.05 ~ "*",
        p.value <= 0.1 ~ "+",
        TRUE ~ "")) %>%
    # add confidence interval column
    mutate(CI = toffee_tbl(mod.frac_prohibited_dems)$CI) %>%
    # format p value and CI into O.R. column
    mutate(estimate = paste0(as.character(round(estimate, 3)), sig, ' ', CI)) %>%
    select(-sig, -CI) %>%
    # rename columns for easier joining later
    rename(estimate.prohibited = estimate, p.prohibited = p.value)

# === DECEPTIVE ===
deceptive.model.params <- tidy(mod.frac_deceptive_dems) %>%
    select(cols) %>%
    mutate(sig = case_when(
        p.value < 0.001 ~ "**",
        p.value < 0.05 ~ "*",
        p.value <= 0.1 ~ "+",
        TRUE ~ "")) %>%
    # add confidence interval column
    mutate(CI = toffee_tbl(mod.frac_deceptive_dems)$CI) %>%
    # format p value and CI into O.R. column
    mutate(estimate = paste0(as.character(round(estimate, 3)), sig, ' ', CI)) %>%
    select(-sig, -CI) %>%
    # rename columns for easier joining later
    rename(estimate.deceptive = estimate, p.deceptive = p.value)

# === CLICKBAIT ===
clickbait.model.params <- tidy(mod.frac_clickbait_dems) %>%
    select(cols) %>%
    mutate(sig = case_when(
        p.value < 0.001 ~ "**",
        p.value < 0.05 ~ "*",
        p.value <= 0.1 ~ "+",
        TRUE ~ "")) %>%
    # add confidence interval column
    mutate(CI = toffee_tbl(mod.frac_clickbait_dems)$CI) %>%
    # format p value and CI into O.R. column
    mutate(estimate = paste0(as.character(round(estimate, 3)), sig, ' ', CI)) %>%
    select(-sig, -CI) %>%
    # rename columns for easier joining later
    rename(estimate.clickbait = estimate, p.clickbait = p.value)

# === FINANCIAL ===
financial.model.params <- tidy(mod.frac_financial_dems) %>%
    select(cols) %>%
    mutate(sig = case_when(
        p.value < 0.001 ~ "**",
        p.value < 0.05 ~ "*",
        p.value <= 0.1 ~ "+",
        TRUE ~ "")) %>%
    # add confidence interval column
    mutate(CI = toffee_tbl(mod.frac_financial_dems)$CI) %>%
    # format p value and CI into O.R. column
    mutate(estimate = paste0(as.character(round(estimate, 3)), sig, ' ', CI)) %>%
    select(-sig, -CI) %>%
    # rename columns for easier joining later
    rename(estimate.financial = estimate, p.financial = p.value)

# === SENSITIVE ===
sensitive.model.params <- tidy(mod.frac_sensitive_dems) %>%
    select(cols) %>%
    mutate(sig = case_when(
        p.value < 0.001 ~ "**",
        p.value < 0.05 ~ "*",
        p.value <= 0.1 ~ "+",
        TRUE ~ "")) %>%
    # add confidence interval column
    mutate(CI = toffee_tbl(mod.frac_sensitive_dems)$CI) %>%
    # format p value and CI into O.R. column
    mutate(estimate = paste0(as.character(round(estimate, 3)), sig, ' ', CI)) %>%
    select(-sig, -CI) %>%
    # rename columns for easier joining later
    rename(estimate.sensitive = estimate, p.sensitive = p.value)

In [81]:
# join all into one table
params.list <- list(omnibus.model.params, clickbait.model.params, deceptive.model.params,
                    prohibited.model.params, financial.model.params, sensitive.model.params)
all.params <- Reduce(function(x, y, ...) merge(x, y, by='term', ...), params.list)
                     
print(xtable(all.params %>%
    select(term, estimate.omnibus, estimate.prohibited, estimate.deceptive, estimate.clickbait, estimate.financial, estimate.sensitive)))

% latex table generated in R 4.1.2 by xtable 1.8-4 package
% Wed Jan 11 09:16:39 2023
\begin{table}[ht]
\centering
\begin{tabular}{rlllllll}
  \hline
 & term & estimate.omnibus & estimate.prohibited & estimate.deceptive & estimate.clickbait & estimate.financial & estimate.sensitive \\ 
  \hline
1 & (Intercept) & 0.032 [-0.02, 0.08] & 0.006+ [0, 0.01] & -0.011 [-0.03, 0.01] & -0.003 [-0.02, 0.02] & 0.009 [-0.03, 0.05] & 0.019+ [0, 0.04] \\ 
  2 & asian & 0.02 [-0.04, 0.08] & 0.001 [-0.01, 0.01] & 0.011 [-0.01, 0.03] & 0.011 [-0.01, 0.03] & 0.011 [-0.04, 0.06] & -0.003 [-0.03, 0.02] \\ 
  3 & black & 0.043+ [0, 0.09] & 0 [-0.01, 0.01] & 0.018* [0, 0.03] & 0.021* [0, 0.04] & 0.021 [-0.02, 0.06] & -0.002 [-0.02, 0.02] \\ 
  4 & high\_ed & 0.001 [-0.02, 0.03] & -0.001 [0, 0] & 0.003 [0, 0.01] & 0.007 [0, 0.02] & -0.008 [-0.03, 0.01] & 0.001 [-0.01, 0.01] \\ 
  5 & hispanic & 0.017 [-0.03, 0.06] & -0.003 [-0.01, 0] & 0.008 [-0.01, 0.02] & 0.002 [-0.01, 0.02] & 0.02 [-0.01, 0.05] & -0.005 [-0

In [63]:
mod.frac_harmful_dems <- lm(prop_harmful_grouped ~ man + white + black + hispanic + asian + high_ed + older,
                            data=ad_counts.dems)

xtable(toffee_tbl(mod.frac_harmful_dems))

# Observation: if you model with `woman`, estimate is -0.05 p(<0.01), i.e. being a woman actually means you see fewer harmful ads

Variable,Estimate,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.03,"[-0.02, 0.08]",0.23
man,0.06,"[0.03, 0.08]",< 0.01***
white,0.02,"[-0.02, 0.07]",0.31
black,0.04,"[0, 0.09]",0.07
hispanic,0.02,"[-0.03, 0.06]",0.44
asian,0.02,"[-0.04, 0.08]",0.51
high_ed,0.0,"[-0.02, 0.03]",0.91
older,0.04,"[0.01, 0.06]",< 0.01**


In [64]:
mod.frac_prohibited_dems <- lm(prop_prohibited ~ man + white + black + hispanic + asian + high_ed + older,
                               data=ad_counts.dems)

toffee_tbl(mod.frac_prohibited_dems)

term,estimate,std.error,statistic,p.value
<chr>,<dbl>,<dbl>,<dbl>,<dbl>
(Intercept),1.0062944,0.00325871,1.92552183,0.05647235
man,1.0019288,0.001467926,1.31270117,0.19172771
white,1.0010215,0.002928333,0.34865064,0.72794768
black,0.9999411,0.002944345,-0.02001786,0.98406156
hispanic,0.9968429,0.002667769,-1.18528696,0.23818894
asian,1.0012954,0.00368065,0.35172325,0.72564765
high_ed,0.9994288,0.001494887,-0.382206,0.70296815
older,0.9959816,0.001601226,-2.51465428,0.01320532


In [40]:
summary(mod.frac_harmful_dems)


Call:
lm(formula = prop_harmful_grouped ~ man + white + black + hispanic + 
    asian + high_ed + older, data = ad_counts.dems)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.131236 -0.041362 -0.008607  0.030253  0.245619 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.031820   0.026397   1.205  0.23034    
man         0.057362   0.011891   4.824 4.08e-06 ***
white       0.024331   0.023721   1.026  0.30703    
black       0.043015   0.023850   1.804  0.07375 .  
hispanic    0.016621   0.021610   0.769  0.44330    
asian       0.019839   0.029815   0.665  0.50703    
high_ed     0.001333   0.012109   0.110  0.91256    
older       0.039116   0.012971   3.016  0.00311 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.06534 on 123 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.2216,	Adjusted R-squared:  0.1773 
F-statistic: 5.003 on 7 and 123 DF,

In [28]:
print(xtable(toffee_tbl(mod.frac_harmful_dems)))

% latex table generated in R 4.1.2 by xtable 1.8-4 package
% Thu Dec  1 12:02:31 2022
\begin{table}[ht]
\centering
\begin{tabular}{rlrll}
  \hline
 & Variable & Estimate & CI & p\_value \\ 
  \hline
1 & (Intercept) & 0.05 & [0.01, 0.08] & 0.01* \\ 
  2 & man & 0.06 & [0.04, 0.08] & $<$ 0.01*** \\ 
  3 & white & 0.01 & [-0.02, 0.04] & 0.42 \\ 
  4 & black & 0.03 & [0, 0.06] & 0.06 \\ 
  5 & hispanic & 0.01 & [-0.03, 0.05] & 0.61 \\ 
  6 & high\_ed & 0.00 & [-0.02, 0.03] & 0.89 \\ 
  7 & older & 0.04 & [0.01, 0.06] & $<$ 0.01** \\ 
   \hline
\end{tabular}
\end{table}


### Healthcare and Opportunity included

In [41]:
mod.frac_other_dems <- lm(prop_other ~ man + white + black + hispanic + asian + high_ed + older,
                            data=ad_counts.dems)

xtable(toffee_tbl(mod.frac_other_dems))

Variable,Estimate,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.22,"[0.11, 0.33]",< 0.01***
man,0.01,"[-0.04, 0.06]",0.65
white,-0.03,"[-0.13, 0.07]",0.52
black,-0.02,"[-0.12, 0.08]",0.74
hispanic,-0.04,"[-0.13, 0.05]",0.35
asian,0.0,"[-0.13, 0.12]",0.96
high_ed,0.03,"[-0.02, 0.08]",0.27
older,0.02,"[-0.03, 0.08]",0.4


In [47]:
mod.frac_opportunity_dems <- lm(prop_opportunity ~ man + white + black + hispanic + asian + high_ed + older,
                            data=ad_counts.dems)

xtable(toffee_tbl(mod.frac_opportunity_dems))

Variable,Estimate,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.05,"[0, 0.1]",0.07
man,-0.01,"[-0.04, 0.01]",0.26
white,0.01,"[-0.03, 0.06]",0.54
black,0.05,"[0.01, 0.1]",0.02*
hispanic,0.0,"[-0.04, 0.04]",0.94
asian,0.03,"[-0.02, 0.09]",0.26
high_ed,0.02,"[0, 0.05]",0.05*
older,-0.02,"[-0.04, 0.01]",0.13


In [48]:
mod.frac_healthcare_dems <- lm(prop_healthcare ~ man + white + black + hispanic + asian + high_ed + older,
                            data=ad_counts.dems)

xtable(toffee_tbl(mod.frac_healthcare_dems))

Variable,Estimate,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.19,"[0.09, 0.28]",< 0.01***
man,0.02,"[-0.02, 0.06]",0.36
white,-0.05,"[-0.14, 0.04]",0.27
black,-0.07,"[-0.16, 0.02]",0.11
hispanic,-0.04,"[-0.12, 0.04]",0.31
asian,-0.04,"[-0.15, 0.07]",0.48
high_ed,0.01,"[-0.04, 0.05]",0.81
older,0.04,"[-0.01, 0.09]",0.08


#### What impacts your fraction of non-financial harmful ads?

In [13]:
mod.frac_nonfin_harmful_dems <- lm(prop_nonfin_harmful ~ man + white + black + hispanic + high_ed + older,
                                   data=ad_counts.dems)

toffee_tbl(mod.frac_nonfin_harmful_dems)

print(summary(mod.frac_nonfin_harmful_dems))

Variable,Estimate,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.03,"[0.01, 0.05]",0.01**
man,0.02,"[0, 0.03]",0.01*
white,0.0,"[-0.02, 0.02]",0.95
black,0.02,"[0, 0.04]",0.11
hispanic,-0.01,"[-0.03, 0.02]",0.6
high_ed,0.01,"[-0.01, 0.02]",0.21
older,0.03,"[0.01, 0.04]",< 0.01**



Call:
lm(formula = prop_nonfin_harmful ~ man + white + black + hispanic + 
    high_ed + older, data = ad_counts.dems)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.081926 -0.024744 -0.005115  0.013837  0.130685 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)   
(Intercept)  0.0285230  0.0105906   2.693  0.00803 **
man          0.0180261  0.0070345   2.563  0.01156 * 
white        0.0005739  0.0090703   0.063  0.94965   
black        0.0161570  0.0100006   1.616  0.10866   
hispanic    -0.0061076  0.0115574  -0.528  0.59811   
high_ed      0.0092070  0.0072960   1.262  0.20929   
older        0.0255957  0.0078183   3.274  0.00137 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.03995 on 127 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.1435,	Adjusted R-squared:  0.1031 
F-statistic: 3.548 on 6 and 127 DF,  p-value: 0.002789



#### What impacts your fraction of `Sensitive` ads?

In [57]:
mod.frac_sensitive_dems <- lm(prop_sensitive ~ woman + white + black  + hispanic + high_ed + older, 
                              data=ad_counts.dems)

toffee_tbl(mod.frac_sensitive_dems)

Variable,Estimate,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.02,"[0, 0.03]",0.01**
woman,0.0,"[-0.01, 0.01]",0.36
white,0.0,"[-0.02, 0.01]",0.56
black,0.0,"[-0.01, 0.01]",0.98
hispanic,0.0,"[-0.02, 0.01]",0.73
high_ed,0.0,"[-0.01, 0.01]",0.88
older,0.01,"[0, 0.02]",0.15


In [49]:
summary(mod.frac_harmful_dems)


Call:
lm(formula = prop_harmful_grouped ~ man + white + high_ed + older, 
    data = ad_counts.dems)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.132519 -0.040527 -0.008733  0.030101  0.243639 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.060379   0.011890   5.078 1.29e-06 ***
man          0.054847   0.010990   4.991 1.89e-06 ***
white       -0.005363   0.011619  -0.462  0.64520    
high_ed     -0.001230   0.011199  -0.110  0.91271    
older        0.035933   0.011904   3.018  0.00306 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.06284 on 130 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.1917,	Adjusted R-squared:  0.1668 
F-statistic: 7.708 on 4 and 130 DF,  p-value: 1.321e-05
