In [28]:
library(dplyr)
library(ggplot2)
library(toffee)
library(lme4)
library(tidyr)
library(xtable)
library(broom)
library(broom.mixed)

In [64]:
# removing "irrelevant", "political", "dislike_design" as personal reasons, retaining 7/10 others in nonpersonal_dislike

data_all <- read.csv('../data/ad_dislike_data_full.csv') %>%
    mutate(mistrust_dislike = as.integer(data_all$scam | data_all$pushy | data_all$unclear | data_all$dislike_clickbait)) %>%
    mutate(scam_dislike = as.integer(data_all$scam | data_all$pushy | data_all$unclear | data_all$dislike_clickbait | data_all$uncomfortable)) %>%
    mutate(nonpersonal_dislike = as.integer(data_all$unclear | data_all$pushy | data_all$dislike_clickbait | data_all$scam | data_all$dislike_product | data_all$uncomfortable | data_all$dislike_advertiser)) %>%
    mutate(personal_dislike = as.integer(data_all$dislike_design | data_all$irrelevant | data_all$political)) %>%
    mutate(any_dislike = as.integer(data_all$unclear | data_all$pushy | data_all$dislike_clickbait | data_all$scam | data_all$dislike_product | data_all$uncomfortable | data_all$dislike_advertiser | data_all$dislike_design | data_all$irrelevant | data_all$political)) %>%
    mutate(all_but_design = as.integer(data_all$unclear | data_all$pushy | data_all$dislike_clickbait | data_all$scam | data_all$dislike_product | data_all$uncomfortable | data_all$dislike_advertiser | data_all$irrelevant | data_all$political)) %>%
    mutate(all_but_design_and_relevance = as.integer(data_all$unclear | data_all$pushy | data_all$dislike_clickbait | data_all$scam | data_all$dislike_product | data_all$uncomfortable | data_all$dislike_advertiser | data_all$political))

disliked <- subset(data_all, dont_dislike == 0)   # some reason of disliking this data was picked

# TODO: filter out ads where participants specifically said they "dont-dislike" the ad

In [60]:
table(data_all$any_dislike)


   0    1 
5310 2527 

In [17]:
table(data_all$nonpersonal_dislike)


   0    1 
4713 3124 

In [51]:
any_dislike_reasons <- lme4::glmer(
    any_dislike ~ opportunity + healthcare + sensitive + financial + prohibited + harmful + clickbait + (1|pid),
    data=data_all, family=binomial()
)

toffee_tbl(any_dislike_reasons)

Variable,Odds_Ratio,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.4,"[0.33, 0.49]",< 0.01***
opportunity,0.97,"[0.82, 1.15]",0.73
healthcare,1.01,"[0.88, 1.16]",0.85
sensitive,1.74,"[1.25, 2.44]",< 0.01**
financial,1.39,"[1.15, 1.68]",< 0.01***
prohibited,1.51,"[1.03, 2.2]",0.03*
harmful,1.26,"[0.89, 1.78]",0.19
clickbait,2.36,"[1.89, 2.95]",< 0.01***
sd__(Intercept),3.15,"[NA, NA]",


In [62]:
mistrust_dislike_reasons <- lme4::glmer(
    mistrust_dislike ~ opportunity + healthcare + sensitive + financial + prohibited + harmful + clickbait + (1|pid),
    data=data_all, family=binomial()
)

toffee_tbl(mistrust_dislike_reasons)

Variable,Odds_Ratio,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.2,"[0.16, 0.25]",< 0.01***
opportunity,1.25,"[1.04, 1.5]",0.02*
healthcare,1.13,"[0.97, 1.32]",0.12
sensitive,1.64,"[1.15, 2.34]",0.01**
financial,1.28,"[1.03, 1.58]",0.03*
prohibited,1.12,"[0.74, 1.69]",0.59
harmful,1.49,"[1.03, 2.16]",0.04*
clickbait,2.39,"[1.88, 3.02]",< 0.01***
sd__(Intercept),3.07,"[NA, NA]",


In [65]:
# scam dislike reasons
scam_dislike_reasons <- lme4::glmer(
    scam_dislike ~ opportunity + healthcare + sensitive + financial + prohibited + harmful + clickbait + (1|pid),
    data=data_all, family=binomial()
)

toffee_tbl(scam_dislike_reasons)

Variable,Odds_Ratio,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.21,"[0.17, 0.26]",< 0.01***
opportunity,1.23,"[1.03, 1.48]",0.03*
healthcare,1.16,"[0.99, 1.35]",0.06
sensitive,1.59,"[1.11, 2.28]",0.01*
financial,1.26,"[1.02, 1.56]",0.03*
prohibited,1.18,"[0.79, 1.78]",0.42
harmful,1.57,"[1.09, 2.27]",0.02*
clickbait,2.31,"[1.83, 2.92]",< 0.01***
sd__(Intercept),3.12,"[NA, NA]",


In [44]:
nonpersonal_dislike_reasons <- lme4::glmer(
    nonpersonal_dislike ~ opportunity + healthcare + sensitive + financial + prohibited + harmful + clickbait + political + (1|pid),
    data=data_all, family=binomial()
)

toffee_tbl(nonpersonal_dislike_reasons)

Variable,Odds_Ratio,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.28,"[0.23, 0.35]",< 0.01***
opportunity,1.14,"[0.96, 1.36]",0.15
healthcare,1.12,"[0.97, 1.29]",0.14
sensitive,1.77,"[1.25, 2.49]",< 0.01**
financial,1.28,"[1.05, 1.57]",0.02*
prohibited,1.2,"[0.81, 1.78]",0.37
harmful,1.6,"[1.12, 2.27]",0.01**
clickbait,2.65,"[2.11, 3.32]",< 0.01***
political,0.76,"[0.17, 3.43]",0.73
sd__(Intercept),3.12,"[NA, NA]",


In [53]:
nondesign_reasons <- lme4::glmer(
    all_but_design ~ opportunity + healthcare + sensitive + financial + prohibited + harmful + clickbait + (1|pid),
    data=data_all, family=binomial()
)

toffee_tbl(nondesign_reasons)

Variable,Odds_Ratio,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.33,"[0.27, 0.41]",< 0.01***
opportunity,1.04,"[0.88, 1.24]",0.62
healthcare,1.03,"[0.89, 1.18]",0.72
sensitive,1.77,"[1.26, 2.49]",< 0.01***
financial,1.3,"[1.06, 1.58]",0.01**
prohibited,1.31,"[0.89, 1.92]",0.17
harmful,1.5,"[1.06, 2.13]",0.02*
clickbait,2.36,"[1.88, 2.95]",< 0.01***
sd__(Intercept),3.07,"[NA, NA]",


In [55]:
nondesign_and_relevance_reasons <- lme4::glmer(
    all_but_design_and_relevance ~ opportunity + healthcare + sensitive + financial + prohibited + harmful + clickbait + (1|pid),
    data=data_all, family=binomial()
)

toffee_tbl(nondesign_and_relevance_reasons)

Variable,Odds_Ratio,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.28,"[0.23, 0.35]",< 0.01***
opportunity,1.13,"[0.95, 1.35]",0.17
healthcare,1.1,"[0.95, 1.28]",0.18
sensitive,1.76,"[1.25, 2.49]",< 0.01**
financial,1.27,"[1.03, 1.55]",0.02*
prohibited,1.28,"[0.87, 1.89]",0.21
harmful,1.58,"[1.11, 2.25]",0.01*
clickbait,2.61,"[2.09, 3.27]",< 0.01***
sd__(Intercept),3.1,"[NA, NA]",


In [56]:
# which ads are disliked for design, irrelevance or political reasons?
personal_dislike_reasons <- lme4::glmer(
    personal_dislike ~ opportunity + healthcare + sensitive + financial + prohibited + harmful + clickbait + (1|pid),
    data=data_all, family=binomial()
)

toffee_tbl(personal_dislike_reasons)

Variable,Odds_Ratio,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.12,"[0.1, 0.15]",< 0.01***
opportunity,0.76,"[0.6, 0.95]",0.01*
healthcare,0.86,"[0.72, 1.02]",0.09
sensitive,1.87,"[1.28, 2.72]",< 0.01**
financial,1.32,"[1.05, 1.67]",0.02*
prohibited,1.81,"[1.19, 2.74]",0.01**
harmful,0.82,"[0.52, 1.27]",0.37
clickbait,1.5,"[1.15, 1.95]",< 0.01**
sd__(Intercept),3.05,"[NA, NA]",


In [None]:
# TODO: also run a model for ads that were disliked for non-mistrust-related reasons

In [31]:
# NOTE: there is one thing wrong with this regression,
# we want to make sure that if any of the nonpersonal_dislike reasons are not chosen, then the ad was
# at least disliked but not for these reasons. what the baseline means precisely should be clear

### Simplifying harmful vs. non-harmful ads

If prohibited + deceptive + clickbait + financial + sensitive are the "bad" ads, as identified by `dont-like` responses, _why_ do the participants find them bad vs. the other opportunity + healthcare + benign ads? Maybe these differing whys tell us user's differing perceptions, and can inform our definition of harm. Perhaps we could also call them unsafe vs. unsafe instead of harmful. 

In [71]:
colnames(data_all)

In [79]:
data_all <- data_all %>%
    mutate(bad_ads = as.integer(data_all$clickbait | data_all$prohibited | data_all$sensitive | data_all$harmful | data_all$financial)) %>%
    mutate(nonbad_ads = as.integer(data_all$opportunity | data_all$healthcare))

# TODO: it might be important to add benign to data_all

In [80]:
# why do people dislike purportedly bad ads?
bad_dislike_reasons <- lme4::glmer(
    bad_ads ~ unclear + irrelevant + pushy + dislike_clickbait + scam + dislike_product + dislike_design + uncomfortable + dislike_advertiser + dislike_political + (1|pid),
    data=data_all, family=binomial()
)

toffee_tbl(bad_dislike_reasons)

Variable,Odds_Ratio,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.14,"[0.12, 0.16]",< 0.01***
unclear,1.65,"[1.37, 1.99]",< 0.01***
irrelevant,1.25,"[0.98, 1.59]",0.08
pushy,0.98,"[0.79, 1.23]",0.89
dislike_clickbait,1.2,"[0.89, 1.6]",0.23
scam,0.93,"[0.62, 1.41]",0.74
dislike_product,1.16,"[0.88, 1.54]",0.3
dislike_design,1.42,"[1.16, 1.75]",< 0.01***
uncomfortable,0.69,"[0.4, 1.2]",0.19
dislike_advertiser,1.86,"[1.47, 2.36]",< 0.01***


In [75]:
nonbad_dislike_reasons <- lme4::glmer(
    nonbad_ads ~ unclear + irrelevant + pushy + dislike_clickbait + scam + dislike_product + dislike_design + uncomfortable + dislike_advertiser + dislike_political + (1|pid),
    data=data_all, family=binomial()
)

toffee_tbl(nonbad_dislike_reasons)

Variable,Odds_Ratio,CI,p_value
<chr>,<dbl>,<chr>,<chr>
(Intercept),0.45,"[0.4, 0.51]",< 0.01***
unclear,1.13,"[0.96, 1.34]",0.15
irrelevant,0.79,"[0.63, 0.99]",0.04*
pushy,1.07,"[0.88, 1.29]",0.5
dislike_clickbait,0.95,"[0.73, 1.24]",0.71
scam,1.25,"[0.88, 1.77]",0.21
dislike_product,0.96,"[0.75, 1.22]",0.73
dislike_design,0.77,"[0.64, 0.93]",0.01**
uncomfortable,1.26,"[0.83, 1.92]",0.27
dislike_advertiser,0.75,"[0.6, 0.94]",0.01*
