Replicate_Appendix.Rmd

---
title: "Replication Output for: Can the Government Deter Discrimination? Evidence from a Randomized Intervention in New York City"
author: "Albert H. Fang, Andrew M. Guess, and Macartan Humphreys"
date: "December 2, 2017"
output:
  html_document:
    toc: true
    number_sections: yes
    theme: cosmo
---


# Appendix Tables and Figures

## Table A1 (p. A-2): Distribution of Experimental Subjects by Randomization Block

```{r tablea1, echo=T, warning=F}
blockLabs <- rep(c("Brooklyn","Bronx","Manhattan","Queens","Staten Island","Likely Discrimination Frame"), 3)
blockLabs <- blockLabs[-length(blockLabs)]
pcts <- round(prop.table(table(dat$block, dat$TA, useNA="ifany")), 3)
block_dist <- cbind(blockLabs, 
                    table(dat$block, dat$TA, useNA="ifany")[,1], pcts[,1],
                    table(dat$block, dat$TA, useNA="ifany")[,2], pcts[,2],
                    table(dat$block, dat$TA, useNA="ifany")[,3], pcts[,3])

block_dist <- rbind(c("Regime 1: 13 Apr 2012 - 9 Sep 2012", rep(NA,6)),
                    block_dist[1:6,],
                    c("Regime 2: 10 Sep 2012 - 7 May 7, 2013", rep(NA,6)),
                    block_dist[7:12,],
                    c("Regime 3: 8 May 2013 to 20 Dec 2013", rep(NA,6)),
                    block_dist[13:17,],
                    c("Total", table(dat$TA)[1], round(table(dat$TA)[1]/sum(table(dat$TA)),3), 
                      table(dat$TA)[2], round(table(dat$TA)[2]/sum(table(dat$TA)),3), 
                      table(dat$TA)[3], round(table(dat$TA)[3]/sum(table(dat$TA)),3)
                    ))

colnames(block_dist) <- c("Block","Control (N)","Control (Proportion)",
                  "Monitoring (N)","Monitoring (Proportion)",
                  "Punitive (N)","Punitive (Proportion)")

block_distcaption = "Distribution of Experimental Subjects by Randomization Block. Cells contain counts of the number and proportion of experimental subjects randomly assigned to each arm by randomization block. The 17 randomization blocks are defined by the ad's sampling stratum (New York City borough or the likely discrimination, or LD, oversample) and by treatment regime (defined as a distinct design and randomization procedure). There are three treatment regimes. Regime 1 was a 5-arm design (2 of which are not analyzed in this paper) with equal treatment assignment probabilities. Regime 2 was a 3-arm design where the probability of assignment to control was 0.5 and the probability of assignment to the monitoring or punitive conditions was 0.25. Regime 3 was a 3-arm design with equal treatment assignment probabilities. Proportions may not sum to 1 due to rounding."

# show table in Rmd output file
rownames(block_dist) <- NULL
kable(block_dist, caption="**Table A1**")
```

```{r, include=FALSE}
# export table to tex file
print(xtable(block_dist, caption = block_distcaption, label= "blockNs"), file="out_table_a1_dist_by_block.tex", hline.after=c(0,7,14,20,21), include.rownames=FALSE,  only.contents=TRUE)
```

## Table A2 (p. A-3): Incidence of Early Stage Discrimination


```{r prep_tablea2, include=FALSE, warning=F}

# create table shells

wb <- matrix(NA, nrow=length(es.a), ncol=16)
wh <- matrix(NA, nrow=length(es.a), ncol=16)
bh <- matrix(NA, nrow=length(es.a), ncol=16)

# populate column headings
colnames(wb) <- c("Measure", paste( c(rep("AuditSamp-",5),rep("ExpSamp-",5),rep("Control-",5)) , rep(c("Maj-Mean","Min-Mean","Diff","P","N"),3), sep=""))
colnames(wh) <- c("Measure", paste( c(rep("AuditSamp-",5),rep("ExpSamp-",5),rep("Control-",5)) , rep(c("Maj-Mean","Min-Mean","Diff","P","N"),3), sep=""))
colnames(bh) <- c("Measure", paste( c(rep("AuditSamp-",5),rep("ExpSamp-",5),rep("Control-",5)) , rep(c("Maj-Mean","Min-Mean","Diff","P","N"),3), sep=""))

# populate first column - variable names
wb[,1] <- c(es.wb.labs)
wh[,1] <- c(es.wh.labs)
bh[,1] <- c(es.bh.labs)

# check variables
# for(i in 1:length(es.a)){
#   cat("\n *********** variable : ", es.a.labs[i], "****************\n", sep="")
#   print(table(aud[[es.a[i]]], useNA="ifany"))
#   cat("\n *********** variable : ", es.b.labs[i], "****************\n", sep="")
#   print(table(aud[[es.b[i]]], useNA="ifany"))
#   cat("\n *********** variable : ", es.c.labs[i], "****************\n", sep="")
#   print(table(aud[[es.c[i]]], useNA="ifany"))
# }

# convert all vars into numeric

cols <- c(es.a, es.b, es.c, es.wb, es.wh, es.bh)
aud[,cols] <- lapply(aud[,cols], as.numeric)

# populate table shells
```

```{r tablea2, include= TRUE, warning=F}

# A: BLACK , B: HISPANIC , C: WHITE

es <- c("contact", "sched",   "numattr", "numskep", "numpos", "numneu", 
        "numneg",  "pctskep", "pctpos",  "pctneu",  "pctneg",  "anyskep", "anyneg")

summm <- function(x, group1 = "C", group2 = "A") { 
  es1 <- paste0(x, "_", group1); es2 <- paste0(x, "_", group2) 
  
  # audit sample
  aud_maj_mean <- mean(aud[[es1]]) 
  aud_min_mean <- mean(aud[[es2]])
  aud_diff     <- as.numeric(aud_maj_mean) - as.numeric(aud_min_mean)
  aud_P        <- t.test(x=aud[[es1]], y=aud[[es2]], alternative="two.sided",
                         paired=TRUE, conf.level=0.95)$p.value
  aud_N        <- length(aud[[es1]])
  
  # experimental sample
  exp_maj_mean <- mean(aud[[es1]][aud$insamp=="1" & !is.na(aud$insamp) ])
  exp_min_mean <- mean(aud[[es2]][aud$insamp=="1" & !is.na(aud$insamp) ])
  exp_diff <-  exp_maj_mean - exp_min_mean
  exp_P <-  t.test(x=aud[[es1 ]][aud$insamp=="1" & !is.na(aud$insamp) ],
                   y=aud[[es2 ]][aud$insamp=="1" & !is.na(aud$insamp) ], alternative="two.sided", paired=TRUE, conf.level=0.95)$p.value
  exp_N <- length(aud[[es1]][aud$insamp=="1" & !is.na(aud$insamp) ]) 
  
  # control group
  ctrl_maj_mean <- mean(aud[[es1]][aud$insamp=="1"  & !is.na(aud$insamp) & aud$TA==0])
  ctrl_min_mean <- mean(aud[[es2]][aud$insamp=="1"  & !is.na(aud$insamp) & aud$TA==0])
  ctrl_diff <- ctrl_maj_mean -  ctrl_min_mean
  ctrl_P <- t.test(x=aud[[es1 ]][aud$insamp=="1" & !is.na(aud$insamp) & aud$TA==0], 
                   y=aud[[es2 ]][aud$insamp=="1"  & !is.na(aud$insamp) & aud$TA==0], alternative="two.sided", paired=TRUE, conf.level=0.95)$p.value
  ctrl_N <- length(aud[[es1]][aud$insamp=="1" & !is.na(aud$insamp)  & aud$TA==0]) 
  
  # return
  c(aud_maj_mean = aud_maj_mean, aud_min_mean = aud_min_mean,
    aud_diff = aud_diff, aud_P = aud_P, aud_N = aud_N,
    exp_maj_mean = exp_maj_mean, exp_min_mean = exp_min_mean, 
    exp_diff = exp_diff, exp_P = exp_P, exp_N = exp_N,
    ctrl_maj_mean = ctrl_maj_mean, ctrl_min_mean = ctrl_min_mean, 
    ctrl_diff = ctrl_diff, ctrl_P = ctrl_P, ctrl_N = ctrl_N)
}

wb <- cbind(es.wb.labs, t(sapply(es, summm, group1 = "C", group2 = "A")))
wh <- cbind(es.wh.labs, t(sapply(es, summm, group1 = "C", group2 = "B")))
bh <- cbind(es.bh.labs, t(sapply(es, summm, group1 = "A", group2 = "B")))

  
```

```{r tablea2_format, include= FALSE, warning=F}

for(i in 2:ncol(wb)){
  wb[,i] <- round(as.numeric(wb[,i]), 3)
  wh[,i] <- round(as.numeric(wh[,i]), 3)
  bh[,i] <- round(as.numeric(bh[,i]), 3)
  
  if(i %in% c(5,10,15)) {
    wb[,i] <- paste("(",wb[,i],")",sep="")
    wh[,i] <- paste("(",wh[,i],")",sep="")
    bh[,i] <- paste("(",bh[,i],")",sep="")
  }
  if(i %in% c(6,11,16)) {
    wb[,i] <- paste("[",wb[,i],"]",sep="")
    wh[,i] <- paste("[",wh[,i],"]",sep="")
    bh[,i] <- paste("[",bh[,i],"]",sep="")
  }
}


```


```{r tablea2_print, include= TRUE, warning=F}
es_discrim_full_table <- rbind(wb, wh, bh)
table_a2 <- es_discrim_full_table[c(1,2,14,15,27,28),1:6]
kable(table_a2, caption="**Table A2**",
      col.names = c("Measure", "Majority Group Mean", "Minority Group Mean",
                    "Difference (Maj-Min)", "p-value", "[N]"),
      row.names = FALSE)

```

```{r tablea2_TEX, include= FALSE, warning=F}
print(xtable(table_a2, caption = c("Incidence of Early Stage Discrimination",
                                 "Incidence of Early Stage Discrimination")),
                       file = "out_tablea2_early_stage_discrim.tex",
                       include.rownames = FALSE)
```

```{r tablea2_FTEST, include= TRUE, warning=F}
#### F-test for the null hypothesis that race does not matter for making any contact or for scheduling appointment

# A: BLACK , B: HISPANIC , C: WHITE

aud.rs <- melt(aud[,c("cid", "contact_A", "contact_B", "contact_C", "sched_A", "sched_B", "sched_C")], id.vars="cid")

aud.rs$ttype <- sapply(strsplit(as.character(aud.rs$variable), "_", fixed=TRUE), function(x) x[2])
aud.rs$variable <- sapply(strsplit(as.character(aud.rs$variable), "_", fixed=TRUE), function(x) x[1])
aud.rs <- dcast(aud.rs, cid + ttype ~ variable, value.var="value")

fit.contact <- lm(contact ~ ttype, data = aud.rs)
fit.sched   <- lm(sched   ~ ttype, data = aud.rs)

print("F-test of the null that tester race predicts whether the tester makes any contact with the landlord")
print(summary(fit.contact))

print("F-test of the null that tester race predicts whether the tester successfully schedules an appointment")
print(summary(fit.sched))

```

## Figure A2 (p. A-5): Cumulative Number of Cases Over Implementation Period

```{r figurea2png, echo=T, warning=F}
dat$casedate <- as.Date(dat$casedate, format="%Y-%m-%d")
casedate <- dat$casedate[order(dat$casedate)]

# generate .png
png("out_figurea2_cases_over_time.png", height=500, width=1200)
par(oma=c(2,0,3,0), xpd=TRUE)

plot(casedate, 1:length(casedate), xaxt="n", yaxt="n", type="l", ylim=c(0,700), xlab="", ylab="Cumulative Number of Cases", xaxs="i", yaxs="i")
axis.Date(side=1, at=seq(as.Date("2012-04-01"), as.Date("2014-01-01"), "month"), format="%m/%Y", las=2, cex.axis=0.9)
axis(side=2, at=seq(0,700,100), labels=seq(0,700,100), cex.axis=0.9, las=1)
polygon(x=c(rep(as.Date("2012-10-29"),2), rep(as.Date("2012-11-11"),2)),
        y=c(700,0,0,700),
        col=rgb(.8,.8,.8,.25),
        border=rgb(.8,.8,.8,.25))
text(x=as.Date("2012-11-11"), y=600, "Implementation Halted \nDue to Hurricane Sandy", cex=.9, pos=1)
mtext(side=1, "Date", outer=TRUE)

segments(x0=as.Date("2012-09-09"), y0=700, x1=as.Date("2012-09-09"), y1=0, lty=5, col="grey80")
segments(x0=as.Date("2013-05-07"), y0=700, x1=as.Date("2013-05-07"), y1=0, lty=5, col="grey80")

text(x=as.Date("2012-07-01"), y=840, "Regime 1: 5-Arm Design \nApril 13, 2012 to September 9, 2012 \nEqual Assignment Probabilities", cex=.9, pos=1)
text(x=as.Date("2013-01-15"), y=840, "Regime 2: 3-Arm Design \nSeptember 10, 2012 to May 7, 2013 \nPr(Control)=.5, Pr(Monitoring)=Pr(Punitive)=.25", cex=.9, pos=1)
text(x=as.Date("2013-09-01"), y=840, "Regime 3: 3-Arm Design \nMay 8, 2013 to December 20, 2013 \nEqual Assignment Probabilities", cex=.9, pos=1)

invisble(dev.off())
```

```{r figurea2pdf, echo=F, warning=F, include = FALSE}
pdf("out_figurea2_cases_over_time.pdf", height=5, width=12)
par(oma=c(2,0,0,0), xpd=TRUE)

plot(casedate, 1:length(casedate), xaxt="n", yaxt="n", type="l", ylim=c(0,700), xlab="", ylab="Cumulative Number of Cases", xaxs="i", yaxs="i")
axis.Date(side=1, at=seq(as.Date("2012-04-01"), as.Date("2014-01-01"), "month"), format="%m/%Y", las=2, cex.axis=0.9)
axis(side=2, at=seq(0,700,100), labels=seq(0,700,100), cex.axis=0.9, las=1)
polygon(x=c(rep(as.Date("2012-10-29"),2), rep(as.Date("2012-11-11"),2)),
        y=c(700,0,0,700),
        col=rgb(.8,.8,.8,.25),
        border=rgb(.8,.8,.8,.25))
text(x=as.Date("2012-11-11"), y=600, "Implementation Halted \nDue to Hurricane Sandy", cex=.9, pos=1)
mtext(side=1, "Date", outer=TRUE)

segments(x0=as.Date("2012-09-09"), y0=700, x1=as.Date("2012-09-09"), y1=0, lty=5, col="grey80")
segments(x0=as.Date("2013-05-07"), y0=700, x1=as.Date("2013-05-07"), y1=0, lty=5, col="grey80")

text(x=as.Date("2012-07-01"), y=840, "Regime 1: 5-Arm Design \nApril 13, 2012 to September 9, 2012 \nEqual Assignment Probabilities", cex=.9, pos=1)
text(x=as.Date("2013-01-15"), y=840, "Regime 2: 3-Arm Design \nSeptember 10, 2012 to May 7, 2013 \nPr(Control)=.5, Pr(Monitoring)=Pr(Punitive)=.25", cex=.9, pos=1)
text(x=as.Date("2013-09-01"), y=840, "Regime 3: 3-Arm Design \nMay 8, 2013 to December 20, 2013 \nEqual Assignment Probabilities", cex=.9, pos=1)
invisble(dev.off())
```

![**Figure A2**](out_figurea2_cases_over_time.png)

## Figure A3 (p. A-6): Discrimination Levels and Treatment Effects using the Subjective Discrimination Index

**Note:** Analysis and figure construction code for Figure A3 is shown above in _"1.1 Figure 1: Main Results on Discrimination Levels and Treatment Effects"_

![**Figure A3: Net Discrimination in Subjective Discrimination Index**](out_figurea3_6x6_index.png)

## Figure A4 (p. A-17): Estimates of Tester Random Effects on Outcome Measures

```{r figurea4code, echo=T, eval=T, warning=F}

ctx$weekday <- as.factor(weekdays(as.Date(ctx$sfa_B4_apptdate, format="%m/%d/%y")))

anova_fig <- data.frame(outcome = c("cb","off","qualpraise","sales","posedit","posbg","prof"),
                        ylab = c("Callbacks", "Offers", "Praise", "Perceived Sales Efforts", 
                                 "Positive Editorializing", "Positive Reactions", "Professionalism"),
                        title = c("Tester Random Effects on Callback Incidence", 
                                  "Tester Random Effects on Offer Incidence",
                                  "Tester Random Effects on Praise re: Qualifications to Rent", 
                                  "Tester Random Effects on Sales Efforts",
                                  "Tester Random Effects on Positive Editorializing", 
                                  "Tester Random Effects on Positive Reactions to Background",
                                  "Tester Random Effects on Professionalism"))

# MAKE GRAPHS

for(ii in 1:nrow(anova_fig)){
  model     <- lmer(paste0(anova_fig$outcome[ii], 
                       " ~ (1|tid) + (1|ttype) + callorder + weekday + partnered
                                   + numbr + anyskep + anyneg + (1|team_gender)"), data=ctx)
  model_off <- lmer(paste0(anova_fig$outcome[2], 
                           " ~ (1|tid) + (1|ttype) + callorder + weekday + partnered
                                        + numbr + anyskep + anyneg + (1|team_gender)"), data=ctx)
  
  ifelse(ii>=2, se <- se.ranef(model_off)[[1]][i,1], se <- se.ranef(model)[[1]][i,1])
  
  i <- 1:(length(ranef(model)[[1]][,1]))
  y <- ranef(model)[[1]][,1] # individual tester RE
  types <- c(rep(0,14), rep(1,19), rep(2,15))
  col   <- rainbow(3)[types+1]

  # output PNG file for Rmd output
  png(paste0("out_figurea4",letters[ii],"_anova_",anova_fig$outcome[ii],".png"), width=800)
  par(0,0,0,0)
  plot(i, y, col = col, ylim=c(-1.0,1),xlim=c(1,50),xlab="Testers", ylab=anova_fig$ylab[ii],main=anova_fig$title[ii],pch=20,bty="n")
  legend("bottom", c("Black","Hispanic","White"), pch=20, col=rainbow(3),horiz=T,bty="n")
  segments(i, y + 2*se, i, y - 2*se,  col = col)
  abline(a=0,b=0,lty=2)
  invisble(dev.off())
}
```


```{r figure4_tex, include=FALSE}
  # output PDF file for tex
  pdf(paste0("out_figurea4",letters[ii],"_anova_",anova_fig$outcome[ii],".pdf"), width=8)
  plot(i, y, col = col, ylim=c(-1.0,1),xlim=c(1,50),xlab="Testers", ylab=anova_fig$ylab[ii],main=anova_fig$title[ii],pch=20,bty="n")
  legend("bottom", c("Black","Hispanic","White"), pch=20, col=rainbow(3),horiz=T,bty="n")
  segments(i, y + 2*se, i, y - 2*se,  col = col)
  abline(a=0,b=0,lty=2)
  invisble(dev.off())
```


![**Figure A4, panel a: Post-Visit Callback**](out_figurea4a_anova_cb.png){width=60%}

![**Figure A4, panel b: Post-Visit Offer**](out_figurea4b_anova_off.png){width=60%}

![**Figure A4, panel c: Positive Biographical Feedback**](out_figurea4f_anova_posbg.png){width=60%}

![**Figure A4, panel d: Positive Editorializing**](out_figurea4e_anova_posedit.png){width=60%}

![**Figure A4, panel e: Praised Rental Qualifications**](out_figurea4c_anova_qualpraise.png){width=60%}

![**Figure A4, panel f: Sales Efforts**](out_figurea4d_anova_sales.png){width=60%}

![**Figure A4, panel g: Professionalism**](out_figurea4g_anova_prof.png){width=60%}


```{r randfxmodels, echo=T, eval=T, warning=F}
### The following analysis corresponds to the analyses described in footnote 22 in the article.
ctx$weekday <- as.factor(weekdays(as.Date(ctx$sfa_B4_apptdate, format="%m/%d/%y")))
dat_tid <- dat[,c("cid","TA",names(dat)[grepl("tid",names(dat))])]
dat_tid <- cbind(dat_tid[,c("cid","TA")], t(apply(dat_tid[,3:ncol(dat_tid)], 1, function(J) names(dat_tid[,3:ncol(dat_tid)])[which(J==1)])))

dat_tid_1 <- as.data.frame(dat_tid[,c(1,2,3)], stringsAsFactors=FALSE)
dat_tid_2 <- as.data.frame(dat_tid[,c(1,2,4)], stringsAsFactors=FALSE)
dat_tid_3 <- as.data.frame(dat_tid[,c(1,2,5)], stringsAsFactors=FALSE)

dat_tid_1[,3] <- as.character(dat_tid_1[,3])
dat_tid_2[,3] <- as.character(dat_tid_2[,3])
dat_tid_3[,3] <- as.character(dat_tid_3[,3])
names(dat_tid_1)[3] <- names(dat_tid_2)[3] <- names(dat_tid_3)[3] <- "tid"
dat_tid <- rbind(dat_tid_1, dat_tid_2, dat_tid_3)
dat_tid$tid <- gsub("tid.","",dat_tid$tid,fixed=TRUE)
dat_tid$ttype <- substr(dat_tid$tid, 1, 1)

dat_sub <- dat[,c("cid","TA","team_gender","callorder","partnered","numbr",
       paste0(rep(c("anyskep","anyneg","cb","off","prof","posedit","posbg","qualpraise","sales"), 3),
              rep(c("_A","_B","_C"), each=9))
       )]

dmelt <- melt(dat_sub, id.vars=c("cid", "TA", "team_gender", "callorder", "partnered", "numbr"))
dmelt$ttype <- ifelse(grepl("_A", dmelt$variable, fixed=TRUE), "A",
                      ifelse(grepl("_B", dmelt$variable, fixed=TRUE), "B", "C"))
dmelt$variable <- gsub("_A", "", dmelt$variable, fixed=TRUE)
dmelt$variable <- gsub("_B", "", dmelt$variable, fixed=TRUE)
dmelt$variable <- gsub("_C", "", dmelt$variable, fixed=TRUE)
dat_sub2 <- dcast(dmelt, cid + TA + team_gender + callorder + partnered + numbr + ttype ~ variable, value.var="value")
dat2 <- join(dat_tid, dat_sub2, by=c("cid","TA","ttype"), type="left", match="all")

ctx_sub <- ctx[,c("cid","weekday")]
ctx_sub <- ctx_sub[!duplicated(ctx_sub$cid),]
#ctx[ctx$cid %in% ctx_sub$cid[is.na(ctx_sub$weekday)] & !is.na(ctx$weekday),c("cid","weekday")]
ctx_sub$weekday <- ifelse(ctx_sub$cid == 11108, "Wednesday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 12116,    "Friday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 12608,    "Monday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 13361,    "Monday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 15734,    "Friday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 19446,  "Thursday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 26324,    "Friday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 30165,    "Friday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(is.na(ctx_sub$weekday), "None", ctx_sub$weekday)

dat3 <- join(dat2, ctx_sub, by="cid", type="left", match="all")

model.callback.0 <- lmer(cb ~ (1|tid) + (1|ttype) + callorder + weekday + 
                         partnered + numbr + anyskep + anyneg + (1|team_gender), 
                       data=dat3, subset=(TA==0))

model.offer.0 <- lmer(off ~ (1|tid) + (1|ttype) + callorder + weekday + 
                      partnered + numbr + anyskep + anyneg + (1|team_gender), 
                    data=dat3, subset=(TA==0))

model.callback.all <- lmer(cb ~ (1|tid) + (1|ttype) + callorder + weekday + 
                           partnered + numbr + anyskep + anyneg + (1|team_gender), 
                         data=dat3)

model.offer.all <- lmer(off ~ (1|tid) + (1|ttype) + callorder + weekday + 
                        partnered + numbr + anyskep + anyneg + (1|team_gender), 
                      data=dat3)

re_out <- list(model.callback.0   , 
               model.offer.0      ,
               model.callback.all ,
               model.offer.all    )

re_table <- lapply(re_out, function(J){
  x <- as.data.frame(VarCorr(J))
  x[,4] <- round(x[,4], 4)
  x <- x[,c(1,4)]
  return(x)
})

re_table <- do.call(cbind, re_table)
re_table <- re_table[,-c(3,5,7)]
re_table[,1] <- c("Estimated Variance of Varying Tester Intercepts",
                  "Estimated Variance of Varying Tester Race Intercepts",
                  "Estimated Variance of Varying Tester Team Gender Intercepts",
                  "Estimated Residual Variance")
colnames(re_table) <- c("", "Control Group, Outcome: Callback", "Control Group, Outcome: Offer",
                        "Experimental Sample, Outcome: Callback", "Experimental Sample, Outcome: Offer")

kable(re_table)
```

## Table A4 (p. A-20): Selected Characteristics of Housing Units in the Audit and Experimental Samples

We summarize selected _pre-treatment_ characteristics of housing units in the audit and experimental samples. To ensure the measures are _pre-treatment_ quantities, we summarize characteristics of housing units that are posted on and scraped from the original Craigslist ads.

```{r tablea4, echo=T, eval=T, warning=F}

# Helper functions and code to calculate descriptive statistics
stratum_labels <- c("Total", "Brooklyn", "Bronx", "Manhattan", "Queens", "Staten Island", "Likely Discrimination Frame")


# descriptive_f: A function to compute descriptive statistics e.g. mean, sd, min, max..
calc_descriptives <- function(descriptive_f, df, x, label, digs = 2) {
  x <- c(descriptive_f(df[[x]], na.rm=TRUE),
              descriptive_f(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brk" ], na.rm=TRUE),
              descriptive_f(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brx" ], na.rm=TRUE),
              descriptive_f(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "mnh" ], na.rm=TRUE),
              descriptive_f(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "que" ], na.rm=TRUE),
              descriptive_f(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "stn" ], na.rm=TRUE),
              descriptive_f(df[[x]][ df[["frame"]] != "representative" ], na.rm=TRUE))
  
  out <- round(x, digs)
  
 if(identical(descriptive_f, sd))  out <- paste0("(", out, ")")
  out
}


calc_pct_ses <- function(df, x, label, digs=NULL) {
  
  x_prop <- c(mean(df[[x]], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brk" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brx" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "mnh" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "que" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "stn" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] != "representative" ], na.rm=TRUE))
  
  x_sd <- c(sd(df[[x]], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brk" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brx" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "mnh" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "que" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "stn" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] != "representative" ], na.rm=TRUE))
  
  x_n <- c(sum(!is.na(df[[x]])),
           sum(!is.na(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brk" ])),
           sum(!is.na(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brx" ])),
           sum(!is.na(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "mnh" ])),
           sum(!is.na(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "que" ])),
           sum(!is.na(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "stn" ])),
           sum(!is.na(df[[x]][ df[["frame"]] != "representative" ]))
           )
  
  x_se <- x_sd / sqrt(x_n)
  
  if(is.null(digs)) digs <- 2
  x_pct <- sprintf( paste0("%.",digs,"f") , round(x_prop * 100, digs) )
  x_se <- paste0("(", sprintf( paste0("%.",digs,"f") , round(x_se * 100, digs) ) ,")")
  
  x <- cbind(x_pct, x_se)
  colnames(x) <- paste0(label, c("_p", "_se"))
  rownames(x) <- NULL
  return(x)
}

#----------------------------------------#
# Panel A: Number of units

panel_a_aud_n <- c(nrow(aud),
                   table(aud$boro[aud$frame == "representative"]),
                   nrow(aud[aud$frame != "representative",]))

panel_a_aud_p <- paste0("(", sprintf("%.2f", round(panel_a_aud_n / nrow(aud) * 100, 2))  ,")")

panel_a_exp_n <- c(nrow(dat),
                   table(dat$boro[dat$frame == "representative"]),
                   nrow(dat[dat$frame != "representative",]))
panel_a_exp_p <- paste0("(", sprintf("%.2f", round(panel_a_exp_n / nrow(dat) * 100, 2))  ,")")

panel_a_trt_n <- c(nrow(dat[dat$TA != 0,]),
                   table(dat$boro[dat$frame == "representative" & dat$TA != 0]),
                   nrow(dat[dat$frame != "representative" & dat$TA != 0,]))
panel_a_trt_p <- paste0("(", sprintf("%.2f", round(panel_a_trt_n / nrow(dat[dat$TA != 0,]) * 100, 2)) ,")")

panel_a_con_n <- c(nrow(dat[dat$TA == 0,]),
                   table(dat$boro[dat$frame == "representative" & dat$TA == 0]),
                   nrow(dat[dat$frame != "representative" & dat$TA == 0,]))
panel_a_con_p <- paste0("(", sprintf("%.2f", round(panel_a_con_n / nrow(dat[dat$TA == 0,]) * 100, 2)) ,")")

tab_a4_panel_a <- cbind(stratum_labels,
                        panel_a_aud_n,
                        panel_a_aud_p,
                        panel_a_exp_n,
                        panel_a_exp_p,
                        panel_a_trt_n,
                        panel_a_trt_p,
                        panel_a_con_n,
                        panel_a_con_p)

rownames(tab_a4_panel_a) <- NULL
colnames(tab_a4_panel_a) <- gsub("panel_a_", "", colnames(tab_a4_panel_a), fixed=TRUE)

kable(tab_a4_panel_a, col.names=c("Stratum", "Audit Sample (N)", "Audit Sample (%)",
                                  "Exp Sample (N)", "Exp Sample (%)",
                                  "Any Treatment (N)", "Any Treatment (%)",
                                  "Control (N)", "Control (%)"),
      caption="**Table A4, Panel A. Number of Units (Scraped from Craigslist).**")

#----------------------------------------#
# Panel B: Monthly asking price (mean)
tab_a4_panel_b <- cbind(stratum_labels,
                    calc_descriptives(mean, df = aud,               x = "rent", label = "aud"),
                    calc_descriptives(sd,   df = aud,               x = "rent", label = "aud"),
                    calc_descriptives(mean, df = dat,               x = "rent", label = "exp"),
                    calc_descriptives(sd,   df = dat,               x = "rent", label = "exp"),
                    calc_descriptives(mean, df = dat[dat$TA != 0,], x = "rent", label = "trt"),
                    calc_descriptives(sd,   df = dat[dat$TA != 0,], x = "rent", label = "trt"),
                    calc_descriptives(mean, df = dat[dat$TA == 0,], x = "rent", label = "con"),
                    calc_descriptives(sd,   df = dat[dat$TA == 0,], x = "rent", label = "con"))


kable(tab_a4_panel_b, col.names=c("Stratum", "Audit Sample (Mean)", "Audit Sample (SD)",
                                  "Exp Sample (Mean)", "Exp Sample (SD)",
                                  "Any Treatment (Mean)", "Any Treatment (SD)",
                                  "Control (Mean)", "Control (SD)"),
      caption="**Table A4, Panel B. Monthly Asking Rental Price ($, Scraped from Craigslist).**")

#----------------------------------------#
# Panel C: Monthly asking price (median)

tab_a4_panel_c <- cbind(stratum_labels,
                    calc_descriptives(median, df = aud, x = "rent", label = "aud"),
                    "",
                    calc_descriptives(median, df = dat, x = "rent", label = "exp"),
                    "",
                    calc_descriptives(median, df = dat[dat$TA != 0,], x = "rent", label = "trt"),
                    "",
                    calc_descriptives(median, df = dat[dat$TA == 0,], x = "rent", label = "con"),
                     "")


kable(tab_a4_panel_c, col.names=c("Stratum", "Audit Sample (Median)", "",
                                  "Exp Sample (Median)", "",
                                  "Any Treatment (Median)", "",
                                  "Control (Median)", ""),
      caption="**Table A4, Panel C. Median Monthly Asking Rental Price ($, Scraped from Craigslist).**")

#----------------------------------------#
# Panel D: Number of bedrooms

tab_a4_panel_d <- cbind(stratum_labels,
                    calc_descriptives(mean, df = aud,               x = "numbr", label = "aud"),
                    calc_descriptives(sd,   df = aud,               x = "numbr", label = "aud"),
                    calc_descriptives(mean, df = dat,               x = "numbr", label = "exp"),
                    calc_descriptives(sd,   df = dat,               x = "numbr", label = "exp"),
                    calc_descriptives(mean, df = dat[dat$TA != 0,], x = "numbr", label = "trt"),
                    calc_descriptives(sd,   df = dat[dat$TA != 0,], x = "numbr", label = "trt"),
                    calc_descriptives(mean, df = dat[dat$TA == 0,], x = "numbr", label = "con"),
                    calc_descriptives(sd,   df = dat[dat$TA == 0,], x = "numbr", label = "con"))

kable(tab_a4_panel_d, col.names=c("Stratum", "Audit Sample (Mean)", "Audit Sample (SD)",
                                  "Exp Sample (Mean)", "Exp Sample (SD)",
                                  "Any Treatment (Mean)", "Any Treatment (SD)",
                                  "Control (Mean)", "Control (SD)"),
      caption="**Table A4, Panel D. Number of Bedrooms (Scraped from Craigslist).**")

#----------------------------------------#
# Panel E: Square footage

tab_a4_panel_e <- cbind(stratum_labels,
                    calc_descriptives(mean, df = aud,               x = "sqft", label = "aud"),
                    calc_descriptives(sd,   df = aud,               x = "sqft", label = "aud"),
                    calc_descriptives(mean, df = dat,               x = "sqft", label = "exp"),
                    calc_descriptives(sd,   df = dat,               x = "sqft", label = "exp"),
                    calc_descriptives(mean, df = dat[dat$TA != 0,], x = "sqft", label = "trt"),
                    calc_descriptives(sd,   df = dat[dat$TA != 0,], x = "sqft", label = "trt"),
                    calc_descriptives(mean, df = dat[dat$TA == 0,], x = "sqft", label = "con"),
                    calc_descriptives(sd,   df = dat[dat$TA == 0,], x = "sqft", label = "con"))

kable(tab_a4_panel_e, col.names=c("Stratum", "Audit Sample (Mean)", "Audit Sample (SD)",
                                  "Exp Sample (Mean)", "Exp Sample (SD)",
                                  "Any Treatment (Mean)", "Any Treatment (SD)",
                                  "Control (Mean)", "Control (SD)"),
      caption="**Table A4, Panel E. Square Footage (Scraped from Craigslist).** Standard invisble(dev.off())iation is noted as NA when only one observation in a given stratum has non-missing square footage data. Square footage information is rarely posted on Craigslist rental ads in New York City.")

#----------------------------------------#
# Panel F: Listed by broker

tab_a4_panel_f <- cbind(stratum_labels,
                        calc_pct_ses(df = aud,                x = "broker", label = "aud"),
                        calc_pct_ses(df = dat,                x = "broker", label = "exp"),
                        calc_pct_ses(df = dat[dat$TA != 0,],  x = "broker", label = "trt"),
                        calc_pct_ses(df = dat[dat$TA == 0,],  x = "broker", label = "con"))

kable(tab_a4_panel_f, col.names=c("Stratum", "Audit Sample (%)", "Audit Sample (SE)",
                                  "Exp Sample (%)", "Exp Sample (SE)",
                                  "Any Treatment (%)", "Any Treatment (SE)",
                                  "Control (%)", "Control (SE)"),
      caption="**Table A4, Panel E. Listed by Broker (Scraped from Craigslist).**")

#----------------------------------------#
# Min/Max Advertised Monthly Rental Price 
# (Reported in online SI text, page A-19)

min_max_monthly_rent <- cbind(stratum_labels,
                          calc_descriptives(min, df = aud,                 x = "rent", label = "aud"),
                          calc_descriptives(max, df = aud,                 x = "rent", label = "aud"),
                          calc_descriptives(min, df = dat,                 x = "rent", label = "exp"),
                          calc_descriptives(max, df = dat,                 x = "rent", label = "exp"),
                          calc_descriptives(min, df = dat[dat$TA != 0,],   x = "rent", label = "trt"),
                          calc_descriptives(max, df = dat[dat$TA != 0,],   x = "rent", label = "trt"),
                          calc_descriptives(min, df = dat[dat$TA == 0,],   x = "rent", label = "con"),
                          calc_descriptives(max, df = dat[dat$TA == 0,],   x = "rent", label = "con"))

kable(min_max_monthly_rent, col.names=c("Stratum", "Audit Sample (Min)", "Audit Sample (Max)",
                                        "Exp Sample (Min)", "Exp Sample (Max)",
                                        "Any Treatment (Min)", "Any Treatment (Max)",
                                        "Control (Min)", "Control (Max)"),
      caption="**Mininum and Maximum Advertised Monthly Rental Price (Reported in online SI, page A-19)**")
```

```{r, echo=F, eval=T, warning=F, include=FALSE}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# Format table and export to tex

#----------------------------------------#
# Stitch together Table A4, export as tex

tab_a4_panel_a <- rbind(c("Panel A. Number of Units", "N", "Percent", "N", "Percent", "N", "Percent", "N", "Percent"),
                        tab_a4_panel_a)
tab_a4_panel_b <- rbind(c("Panel B. Monthly Asking Price ($)", rep(c("Mean", "SD"), 4)),
                        tab_a4_panel_b)
tab_a4_panel_c <- rbind(c("Panel C. Median Monthly Asking Price ($)", rep(c("Median", ""), 4)),
                        tab_a4_panel_c)
tab_a4_panel_d <- rbind(c("Panel D. Number of Bedrooms", rep(c("Mean", "SD"), 4)),
                        tab_a4_panel_d)
tab_a4_panel_e <- rbind(c("Panel E. Square Footage", rep(c("Mean", "SD"), 4)),
                        tab_a4_panel_e)
tab_a4_panel_f <- rbind(c("Panel F. Listed by Broker", rep(c("Percent", "SE"), 4)),
                        tab_a4_panel_f)

colnames(tab_a4_panel_a) <- colnames(tab_a4_panel_b) <- colnames(tab_a4_panel_c) <- 
  colnames(tab_a4_panel_d) <- colnames(tab_a4_panel_e) <- colnames(tab_a4_panel_f) <- NULL

table_a4 <- rbind(tab_a4_panel_a,
                  tab_a4_panel_b,
                  tab_a4_panel_c,
                  tab_a4_panel_d,
                  tab_a4_panel_e,
                  tab_a4_panel_f)

colnames(table_a4)[1:ncol(table_a4)] <- c("",
                                          rep("Audit Sample",2),
                                          rep("Experimental Sample",2),
                                          rep("Any Treatment",2),
                                          rep("Control Group",2))

table_a4 <- print(xtable(table_a4), include.rownames=FALSE)
table_a4 <- unlist(strsplit(table_a4, "\n"))

swaps <- rbind(c("\\begin{tabular}{lllllllll}", "\\begin{tabular}{lrr|rr|rr|rr}"),
               c("V1 & Audit Sample & Audit Sample & Experimental Sample & Experimental Sample & Any Treatment & Any Treatment & Control Group & Control Group \\\\ ",
                 " & \\multicolumn{2}{c|}{I. Audit Sample} & \\multicolumn{2}{c|}{II. Experimental Sample} & \\multicolumn{2}{c|}{III. Any Treatment} & \\multicolumn{2}{c}{IV. Control Group} \\\\ "))


for(i in 1:nrow(swaps)){
  table_a4 <- gsub(swaps[i,1] , swaps[i,2] , table_a4, fixed=TRUE)
}
rm(i)

table_a4 <- c(table_a4[3:4],
              "\\resizebox{1\\textwidth}{!}{",
              table_a4[5:9], "\\cline{2-9}",
              table_a4[10:16], "\\hline",
              table_a4[17], "\\cline{2-9}",
              table_a4[18:24],"\\hline",
              table_a4[25], "\\cline{2-9}",
              table_a4[26:32],"\\hline",
              table_a4[33], "\\cline{2-9}",
              table_a4[34:40],"\\hline",
              table_a4[41], "\\cline{2-9}",
              table_a4[42:48],"\\hline",
              table_a4[49], "\\cline{2-9}",
              table_a4[50:58],
              "}",
              "\\caption{Selected Pre-Treatment Characteristics of Housing Units in the Audit and Experimental Samples. Data on housing characteristics are scraped from Craigslist ads. In cells where the standard invisble(dev.off())iation is NA (i.e., for square footage), this means that only one observation had non-missing data; these data were largely missing because square footage information is rarely included in Craigslist rental listings in New York City.} \\label{samplechar}",
              table_a4[59])

cat(table_a4, file="out_tablea4_sample_characteristics.tex", sep="\n")
```

## Table A5 (p. A-21): Distribution of Rental Units Across Boroughs, by Sample

```{r tablea5, echo=T, eval=T, warning=F}
# Note: Citywide 2011 numbers are from Table 5 in 
#  http://www.nyc.gov/html/hpd/downloads/pdf/HPD-2011-HVS-Selected-Findings-Tables.pdf
# For an archived version of this document available via the Internet Archive Wayback Machine, see
#  http://web.archive.org/web/20131102065647/http://www.nyc.gov/html/hpd/downloads/pdf/HPD-2011-HVS-Selected-Findings-Tables.pdf 
# A PDF copy of this document is also included in the replication archive

tab.city.n <- c(691178, 388022, 587313, 449108, 57013)
tab.city.pct <- c(31.81, 17.86, 27.03, 20.67, 2.62)

db.aud <- ddply(aud[aud$frame != "likely disc",], .(boro), summarise,
                num = length(cid),
                pct = round(length(cid)/nrow(aud[aud$frame != "likely disc",])*100, 2))

db.exp <- ddply(dat[dat$frame != "likely disc",], .(boro), summarise,
                num = length(cid),
                pct = round(length(cid)/nrow(dat[dat$frame != "likely disc",])*100, 2))

db.con <- ddply(dat[dat$frame != "likely disc" & dat$TA==0,], .(boro), summarise,
                num = length(cid),
                pct = round(length(cid)/nrow(dat[dat$frame != "likely disc" & dat$TA==0,])*100, 2))

colnames(db.aud) <- c("boro", "Audit Sample (N)", "Audit Sample (%)")
colnames(db.exp) <- c("boro", "Exp Sample (N)", "Exp Sample (%)")
colnames(db.con) <- c("boro", "Control Group (N)", "Control Group (%)")

tab.db <- join(db.aud, db.exp, by="boro", type="left", match="all")
tab.db <- join(tab.db, db.con, by="boro", type="left", match="all")

tab.db <- cbind(tab.db[,1],
                tab.city.n,
                tab.city.pct,
                tab.db[,2:ncol(tab.db)])

tab.db[,1] <- c("Brooklyn", "Bronx", "Manhattan", "Queens", "Staten Island")
colnames(tab.db)[1:3] <- c("Borough", "Citywide 2011 (N)", "Citywide 2011 (%)")

tab.db <- rbind(tab.db,
                c("Total", round(apply(tab.db[,2:ncol(tab.db)],2,sum), 0)))


kable(tab.db, caption="**Table A5**")
```


```{r, include=FALSE}
print(xtable(tab.db), file="out_tablea5_dist_by_borough.tex", include.rownames=FALSE)
```

## Figure A5 (p. A-22): Map of the Geographic Distribution of Housing Units Corresponding to Advertised Listings

We omit the replication data and code to produce Figure A5, the map of the geographic distribution of rental units in the audit and experimental samples, because these require non-anonymized data that contain personal identifying information about the subjects.

## Table A6 (p. A-24): Baseline Incidence of Discrimination: In-Person and Post-Visit


```{r tablea6, echo=T, eval=T, warning=F}

outvars <- c("meet", "sales", "qualpraise", "posbg", "posedit", "prof", "cb", "off")

summm <- function(outvar, group1 = "C", group2 = "A"){
  outvar1 <- paste0(outvar, "_", group1)
  outvar2 <- paste0(outvar, "_", group2)
  ctrl_maj_mean <- mean(as.numeric(dat[[outvar1]][dat$TA==0]), na.rm=TRUE) # majority mean
  ctrl_min_mean <- mean(as.numeric(dat[[outvar2]][dat$TA==0]), na.rm=TRUE) # minority mean
  ctrl_diff <- ctrl_maj_mean  - ctrl_min_mean  # difference
  if(outvar %in% c("meet", "cb", "off")){
    ctrl_P <- t.test(x=as.numeric(dat[[outvar1]][dat$TA==0]),
                     y=as.numeric(dat[[outvar2]][dat$TA==0]),
                     alternative="two.sided",
                     paired=TRUE,
                     conf.level=0.95)$p.value # p value from t-test (two-sided)
  } else {
    ctrl_P <- t.test(x = as.numeric(dat[[outvar1]][dat$TA==0]),
                     y = as.numeric(dat[[outvar2]][dat$TA==0]),
                     alternative = "two.sided",
                     paired = TRUE,
                     conf.level = 0.95)$p.value # p value from t-test (two-sided)
  }
  
  
  ctrl_N <- ifelse(length(dat[[outvar1]][dat$TA==0 & !is.na(dat[[outvar1]])])==length(as.numeric(dat[[outvar2]][dat$TA==0 & !is.na(dat[[outvar2]])])),
                   length(dat[[outvar1]][dat$TA==0 & !is.na(dat[[outvar1]])]),
                   length(dat[[outvar1]][dat$TA==0 & !( is.na(dat[[outvar1]]) & is.na(dat[[outvar2]]) )])  )# sample size
  
  ## ANY TREATMENT (cols 7-11)
  any_maj_mean <- mean(as.numeric(dat[[outvar1]][dat$TA %in% c(1,2)]), na.rm=TRUE)  # majority mean
  any_min_mean <- mean(as.numeric(dat[[outvar2]][dat$TA %in% c(1,2)]), na.rm=TRUE)  # minority mean
  any_diff <- any_maj_mean - any_min_mean  # difference
  if(outvar %in% c("meet", "cb", "off")){
    any_P <- t.test(x=as.numeric(dat[[outvar1]][dat$TA %in% c(1,2)]),
                    y=as.numeric(dat[[outvar2]][dat$TA %in% c(1,2)]),
                    alternative="two.sided",
                    paired=TRUE,
                    conf.level=0.95)$p.value # p value from t-test (two-sided)
  } else {
    any_P <- t.test(x=as.numeric(dat[[outvar1]][dat$TA %in% c(1,2)]),
                    y=as.numeric(dat[[outvar2]][dat$TA %in% c(1,2)]),
                    alternative="two.sided",
                    paired=FALSE,
                    conf.level=0.95)$p.value # p value from t-test (two-sided)    
  }
  any_N <- ifelse(length(dat[[outvar1]][dat$TA%in%c(1,2) & !is.na(dat[[outvar1]])])==length(as.numeric(dat[[outvar2]][dat$TA%in%c(1,2) & !is.na(dat[[outvar2]])])),
                  length(dat[[outvar1]][dat$TA%in%c(1,2) & !is.na(dat[[outvar1]])]),
                  length(dat[[outvar1]][dat$TA%in%c(1,2) & !( is.na(dat[[outvar1]]) & is.na(dat[[outvar2]]) )]) )# sample size
  
  ## EXPERIMENT SAMPLE (cols 12-16)
  exp_maj_mean <- mean(as.numeric(dat[[outvar1]][dat$TA %in% c(0,1,2)]), na.rm=TRUE)  # majority mean
  exp_min_mean <- mean(as.numeric(dat[[outvar2]][dat$TA %in% c(0,1,2)]), na.rm=TRUE) # minority mean
  exp_diff <-exp_maj_mean  - exp_min_mean  # difference
  if(outvar %in% c("meet", "cb", "off")){
    exp_P <- t.test(x=as.numeric(dat[[outvar1]][dat$TA %in% c(0,1,2)]),
                    y=as.numeric(dat[[outvar2]][dat$TA %in% c(0,1,2)]),
                    alternative="two.sided",
                    paired=TRUE,
                    conf.level=0.95)$p.value # p value from t-test (two-sided)
  } else {
    exp_P <- t.test(x=as.numeric(dat[[outvar1]][dat$TA %in% c(0,1,2)]),
                    y=as.numeric(dat[[outvar2]][dat$TA %in% c(0,1,2)]),
                    alternative="two.sided",
                    paired=FALSE,
                    conf.level=0.95)$p.value # p value from t-test (two-sided)    
  }
  exp_N <-  ifelse(length(dat[[outvar1]][dat$TA%in%c(0,1,2) & !is.na(dat[[outvar1]])])==length(as.numeric(dat[[outvar2]][dat$TA%in%c(0,1,2) & !is.na(dat[[outvar2]])])),
                   length(dat[[outvar1]][dat$TA%in%c(0,1,2) & !is.na(dat[[outvar1]])]),
                   length(dat[[outvar1]][dat$TA%in%c(0,1,2) & !( is.na(dat[[outvar1]]) & is.na(dat[[outvar2]]))])) # sample size

     c(ctrl_maj_mean, ctrl_min_mean, ctrl_diff, ctrl_P, ctrl_N, 
       any_maj_mean,  any_min_mean,  any_diff,  any_P,  any_N,
       exp_maj_mean,  exp_min_mean,  exp_diff,  exp_P,  exp_N)
  
  }


wb <- cbind( c("Showed up to appointment (White vs. Black)",                         
               "Perceived sales efforts (White vs. Black)",                          
               "Received praise about rental qualifications (White vs. Black)",      
               "Positive reactions to testers' background (White vs. Black)",        
               "Positive editorializing (White vs. Black)",                          
               "Professionalism (White vs. Black)",                                  
               "Received post-visit callback (White vs. Black)",                     
               "Received post-visit offer for unit (White vs. Black)",               
               "Index measure of favorable in-person interactions (White vs. Black)"), 
            rbind(t(sapply(outvars, summm, group1 = "C", group2 = "A")), "") )

wh <- cbind(c("Showed up to appointment (White vs. Hispanic)",                         
               "Perceived sales efforts (White vs. Hispanic)",                          
               "Received praise about rental qualifications (White vs. Hispanic)",      
               "Positive reactions to testers' background (White vs. Hispanic)",        
               "Positive editorializing (White vs. Hispanic)",                          
               "Professionalism (White vs. Hispanic)",                                  
               "Received post-visit callback (White vs. Hispanic)",                     
               "Received post-visit offer for unit (White vs. Hispanic)",               
               "Index measure of favorable in-person interactions (White vs. Hispanic)"),
             rbind(t(sapply(outvars, summm, group1 = "C", group2 = "B")), ""))

bh <- cbind( c("Showed up to appointment (Black vs. Hispanic)",                         
               "Perceived sales efforts (Black vs. Hispanic)",                          
               "Received praise about rental qualifications (Black vs. Hispanic)",      
               "Positive reactions to testers' background (Black vs. Hispanic)",        
               "Positive editorializing (Black vs. Hispanic)",                          
               "Professionalism (Black vs. Hispanic)",                                  
               "Received post-visit callback (Black vs. Hispanic)",                     
               "Received post-visit offer for unit (Black vs. Hispanic)",               
               "Index measure of favorable in-person interactions (Black vs. Hispanic)"), 
              rbind(t(sapply(outvars, summm, group1 = "A", group2 = "B")), ""))
 

## CONTROL GROUP
wb[nrow(wb),4] <- mean(as.numeric(dat[[net.ind.mat[1]]][dat$TA==0]), na.rm=TRUE) # difference
wb[nrow(wb),6] <- length(as.numeric(dat[[net.ind.mat[1]]][dat$TA%in%c(0) & !is.na(dat[[net.ind.mat[1]]])])) # sample size
## ANY TREATMENT
wb[nrow(wb),9] <- mean(as.numeric(dat[[net.ind.mat[1]]][dat$TA%in%c(1,2)]), na.rm=TRUE) # difference
wb[nrow(wb),11] <- length(as.numeric(dat[[net.ind.mat[1]]][dat$TA%in%c(1,2) & !is.na(dat[[net.ind.mat[1]]])])) # sample size
## EXPERIMENT SAMPLE
wb[nrow(wb),14] <- mean(as.numeric(dat[[net.ind.mat[1]]][dat$TA%in%c(0,1,2)]), na.rm=TRUE) # difference
wb[nrow(wb),16] <- length(as.numeric(dat[[net.ind.mat[1]]][dat$TA%in%c(0,1,2) & !is.na(dat[[net.ind.mat[1]]])]))  # sample size     

## CONTROL GROUP
wh[nrow(wh),4] <- mean(as.numeric(dat[[net.ind.mat[2]]][dat$TA==0]), na.rm=TRUE) # difference
wh[nrow(wh),6] <- length(as.numeric(dat[[net.ind.mat[2]]][dat$TA%in%c(0) & !is.na(dat[[net.ind.mat[2]]])])) # sample size
## ANY TREATMENT
wh[nrow(wh),9] <- mean(as.numeric(dat[[net.ind.mat[2]]][dat$TA%in%c(1,2)]), na.rm=TRUE) # difference
wh[nrow(wh),11] <- length(as.numeric(dat[[net.ind.mat[2]]][dat$TA%in%c(1,2) & !is.na(dat[[net.ind.mat[2]]])])) # sample size
## EXPERIMENT SAMPLE
wh[nrow(wh),14] <- mean(as.numeric(dat[[net.ind.mat[2]]][dat$TA%in%c(0,1,2)]), na.rm=TRUE) # difference
wh[nrow(wh),16] <- length(as.numeric(dat[[net.ind.mat[2]]][dat$TA%in%c(0,1,2) & !is.na(dat[[net.ind.mat[2]]])]))  # sample size     

## CONTROL GROUP
bh[nrow(bh),4] <- mean(as.numeric(dat[[net.ind.mat[3]]][dat$TA==0]), na.rm=TRUE) # difference
bh[nrow(bh),6] <- length(as.numeric(dat[[net.ind.mat[3]]][dat$TA%in%c(0) & !is.na(dat[[net.ind.mat[3]]])])) # sample size
## ANY TREATMENT
bh[nrow(bh),9] <- mean(as.numeric(dat[[net.ind.mat[3]]][dat$TA%in%c(1,2)]), na.rm=TRUE) # difference
bh[nrow(bh),11] <- length(as.numeric(dat[[net.ind.mat[3]]][dat$TA%in%c(1,2) & !is.na(dat[[net.ind.mat[3]]])])) # sample size
## EXPERIMENT SAMPLE
bh[nrow(bh),14] <- mean(as.numeric(dat[[net.ind.mat[3]]][dat$TA%in%c(0,1,2)]), na.rm=TRUE) # difference
bh[nrow(bh),16] <- length(as.numeric(dat[[net.ind.mat[3]]][dat$TA%in%c(0,1,2) & !is.na(dat[[net.ind.mat[3]]])]))  # sample size     
```


```{r tablea6format, include=FALSE}
# populate column headings
colnames(wb) <- c("Measure", paste( c(rep("Control-",5),rep("AnyTreat-",5),rep("ExpSamp-",5)) , rep(c("Maj-Mean","Min-Mean","Diff","P","N"),3), sep=""))
colnames(wh) <- c("Measure", paste( c(rep("Control-",5),rep("AnyTreat-",5),rep("ExpSamp-",5)) , rep(c("Maj-Mean","Min-Mean","Diff","P","N"),3), sep=""))
colnames(bh) <- c("Measure", paste( c(rep("Control-",5),rep("AnyTreat-",5),rep("ExpSamp-",5)) , rep(c("Maj-Mean","Min-Mean","Diff","P","N"),3), sep=""))


for(i in 2:ncol(wb)){
  wb[,i] <- round(as.numeric(wb[,i]), 3)
  wh[,i] <- round(as.numeric(wh[,i]), 3)
  bh[,i] <- round(as.numeric(bh[,i]), 3)
  
  if(i %in% c(5,10,15)) {
    wb[,i] <- paste("(",wb[,i],")",sep="")
    wh[,i] <- paste("(",wh[,i],")",sep="")
    bh[,i] <- paste("(",bh[,i],")",sep="")
  }
  if(i %in% c(6,11,16)) {
    wb[,i] <- paste("[",wb[,i],"]",sep="")
    wh[,i] <- paste("[",wh[,i],"]",sep="")
    bh[,i] <- paste("[",bh[,i],"]",sep="")
  }
}


```

```{r print_tablea7}
out <- rbind(wb[c(1,9,2:8),],
             wh[c(1,9,2:8),],
             bh[c(1,9,2:8),])

out <- out[-c(2,11,20),] # get rid of index measure (it makes no sense to include it because it is standardized to the control group mean)


kable(out[,c(1:6)], caption="**Table A6, Panel I: Cases Assigned to Control Group**", col.names=c("Measure", "Majority Group Mean", "Minority Group Mean", "Difference (Maj-Min)", "p-value", "[N]"))
kable(out[,c(1, 7:11)], caption="**Table A6, Panel II: Cases Assigned to Any Treatment Group**", col.names=c("Measure", "Majority Group Mean", "Minority Group Mean", "Difference (Maj-Min)", "p-value", "[N]"))
kable(out[,c(1, 12:16)], caption="**Table A6, Panel III: All Cases in Experimental Sample**", col.names=c("Measure", "Majority Group Mean", "Minority Group Mean", "Difference (Maj-Min)", "p-value", "[N]"))
```

```{r, include=FALSE}
print(xtable(out, caption=c("Baseline Incidence of Discrimination: In-Person and Post-Visit",
                            "Baseline Incidence of Discrimination: In-Person and Post-Visit")), 
      file="out_tablea6_baseline_discrim.tex",
      hline.after=c(0,8,16,24), 
      include.rownames=FALSE)

```

## Table A7 (p. A-25): Estimated Effects of Messaging on Net Discrimination Levels

```{r prep_tablea7, include=FALSE}

itt.wb.mc <- matrix(NA, nrow=length(outvars.wb), ncol=5)
itt.wb.pc <- matrix(NA, nrow=length(outvars.wb), ncol=5)
itt.wb.pm <- matrix(NA, nrow=length(outvars.wb), ncol=5)

itt.wh.mc <- matrix(NA, nrow=length(outvars.wh), ncol=5)
itt.wh.pc <- matrix(NA, nrow=length(outvars.wh), ncol=5)
itt.wh.pm <- matrix(NA, nrow=length(outvars.wh), ncol=5)

itt.bh.mc <- matrix(NA, nrow=length(outvars.bh), ncol=5)
itt.bh.pc <- matrix(NA, nrow=length(outvars.bh), ncol=5)
itt.bh.pm <- matrix(NA, nrow=length(outvars.bh), ncol=5)

fit.wb.mc <- fit.wb.pc <- fit.wb.pm <- fit.wh.mc <- fit.wh.pc <- fit.wh.pm <- fit.bh.mc <- fit.bh.pc <- fit.bh.pm <- list() # store 
```

```{r tablea7, echo=T, eval=T, warning=F}
# ---------------------------------------------------------------------- #
# DEFINE COLNAMES FOR OUTPUT TABLES

col.labels <- c("Outcome", "Estimate","SE","t","P")

# ---------------------------------------------------------------------- #
# MODELS WITH ONLY BLOCK FIXED EFFECTS

# results from ITT est w/ block FE (no other covs)
outvars <- c("nmeet_", "index.", "ncb_","noff_")


reg_and_summ <- function(outvar, compare){
  depvar <- paste0(outvar, compare)
  model.mc <- paste(depvar, " ~ TA1 + as.factor(block)", sep = "")
  model.pc <- paste(depvar, " ~ TA2 + as.factor(block)", sep = "")
  model.pm <- paste(depvar, " ~ TA2 + as.factor(block)", sep = "")
  
  fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1),], weights=ipw10)
  fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2),], weights=ipw20)
  fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2),], weights=ipw21)
  
  itt.mc <- summary(fit.mc)$coefficients[2,]
  itt.pc <- summary(fit.pc)$coefficients[2,]
  itt.pm <- summary(fit.pm)$coefficients[2,]
    
  if(compare == "wb" | compare == "wh"  ){
  itt.mc[4] <- pt(coef(summary(fit.mc))[,3], summary(fit.mc)$df[2], lower=TRUE)[2] #one sided p
  itt.pc[4] <- pt(coef(summary(fit.pc))[,3], summary(fit.pc)$df[2], lower=TRUE)[2] #one sided p
  } 
  
    list(mc     = itt.mc, pc     = itt.pc, pm     = itt.pm,
         fit.mc = fit.mc, fit.pc = fit.pc, fit.pm = fit.pm )
  
}
itt.wb <- lapply(outvars, reg_and_summ,  compare = "wb" )
itt.wb.mc <- sapply(1:length(outvars), function(i) itt.wb[[i]]$mc)
itt.wb.pc <- sapply(1:length(outvars), function(i) itt.wb[[i]]$pc)
itt.wb.pm <- sapply(1:length(outvars), function(i) itt.wb[[i]]$pm)
fit.wb.mc <- sapply(1:length(outvars), function(i) itt.wb[[i]]$fit.mc)
fit.wb.pc <- sapply(1:length(outvars), function(i) itt.wb[[i]]$fit.pc)
fit.wb.pm <- sapply(1:length(outvars), function(i) itt.wb[[i]]$fit.pm)

itt.wh <- lapply(outvars, reg_and_summ,  compare = "wh" )
itt.wh.mc <- sapply(1:length(outvars), function(i) itt.wh[[i]]$mc)
itt.wh.pc <- sapply(1:length(outvars), function(i) itt.wh[[i]]$pc)
itt.wh.pm <- sapply(1:length(outvars), function(i) itt.wh[[i]]$pm)
fit.wh.mc <- sapply(1:length(outvars), function(i) itt.wh[[i]]$fit.mc)
fit.wh.pc <- sapply(1:length(outvars), function(i) itt.wh[[i]]$fit.pc)
fit.wh.pm <- sapply(1:length(outvars), function(i) itt.wh[[i]]$fit.pm)

itt.bh <- lapply(outvars, reg_and_summ,  compare = "bh" )
itt.bh.mc <- sapply(1:length(outvars), function(i) itt.bh[[i]]$mc)
itt.bh.pc <- sapply(1:length(outvars), function(i) itt.bh[[i]]$pc)
itt.bh.pm <- sapply(1:length(outvars), function(i) itt.bh[[i]]$pm)
fit.bh.mc <- sapply(1:length(outvars), function(i) itt.bh[[i]]$fit.mc)
fit.bh.pc <- sapply(1:length(outvars), function(i) itt.bh[[i]]$fit.pc)
fit.bh.pm <- sapply(1:length(outvars), function(i) itt.bh[[i]]$fit.pm)


#----- CIs -----#
df.w_ <- sapply(list(df.wb.mc = fit.wb.mc, 
                     df.wb.pc = fit.wb.pc, 
                     df.wb.pm = fit.wb.pm,
                     df.wh.mc = fit.wh.mc,
                     df.wh.pc = fit.wh.pc, 
                     df.wh.pm = fit.wh.pm,
                     df.bh.mc = fit.bh.mc,
                     df.bh.pc = fit.bh.pc,
                     df.bh.pm = fit.bh.pm),
                function(fit){
                        unlist(lapply(fit, function(x) summary(x)$df[2]))[2:4]})

df.2g.10 <-  c(df.w_[,c("df.wb.mc", "df.wh.mc", "df.bh.mc")])
df.2g.20 <-  c(df.w_[,c("df.wb.pc", "df.wh.pc", "df.bh.pc")])
df.2g.21 <-  c(df.w_[,c("df.wb.pm", "df.wh.pm", "df.bh.pm")])

cv.2g.10 <- qt(p=0.025, df=df.2g.10, lower.tail=TRUE)
cv.2g.20 <- qt(p=0.025, df=df.2g.20, lower.tail=TRUE)
cv.2g.21 <- qt(p=0.025, df=df.2g.21, lower.tail=TRUE)

cv.2g <- c(cv.2g.10, cv.2g.20, cv.2g.21)
itt.2g.ci <- as.numeric(out2[,2]) + abs(cv.2g)*cbind(-as.numeric(out2[,3]), as.numeric(out2[,3]))


```

```{r, include=FALSE}


# Add outcome measure labels
itt.wb.mc <- cbind(outvars.wb.labs, t(itt.wb.mc))
itt.wb.pc <- cbind(outvars.wb.labs, t(itt.wb.pc))
itt.wb.pm <- cbind(outvars.wb.labs, t(itt.wb.pm))

itt.wh.mc <- cbind(outvars.wh.labs, t(itt.wh.mc))
itt.wh.pc <- cbind(outvars.wh.labs, t(itt.wh.pc))
itt.wh.pm <- cbind(outvars.wh.labs, t(itt.wh.pm))

itt.bh.mc <- cbind(outvars.bh.labs, t(itt.bh.mc))
itt.bh.pc <- cbind(outvars.bh.labs, t(itt.bh.pc))
itt.bh.pm <- cbind(outvars.bh.labs, t(itt.bh.pm))

# Add column names
colnames(itt.wb.mc) <- colnames(itt.wb.pc) <- colnames(itt.wb.pm) <- col.labels
colnames(itt.wh.mc) <- colnames(itt.wh.pc) <- colnames(itt.wh.pm) <- col.labels
colnames(itt.bh.mc) <- colnames(itt.bh.pc) <- colnames(itt.bh.pm) <- col.labels

```

```{r, include = TRUE}

# Stack, create output table
out2 <- rbind(itt.wb.mc, itt.wh.mc, itt.bh.mc,
              itt.wb.pc, itt.wh.pc, itt.bh.pc,
              itt.wb.pm, itt.wh.pm, itt.bh.pm)

out2 <- out2[-seq(1,33,4),]


# Round and format results
for(i in 2:ncol(out2)) out2[,i] <- round(as.numeric(out2[,i]),3)
for(i in 5) out2[,i] <- paste("(",out2[,i],")",sep="")

# Assemble results
itt.2g.tab <- cbind(out2, cbind(apply(itt.2g.ci, 1, function(x) paste("[", round(x[1],3) ,", ", round(x[2],3), "]", sep=""))))
colnames(itt.2g.tab) <- c("Outcome", "Estimate", "SE", "t", "p-value", "95% CI")


kable(itt.2g.tab[1:9,], caption="**Table A7, Panel I. Monitoring vs. Control**")
kable(itt.2g.tab[10:18,], caption="**Table A7, Panel II. Punitive vs. Control**")
kable(itt.2g.tab[19:27,], caption="**Table A7, Panel III. Punitive vs. Monitoring**")

```

```{r, include=FALSE}
# Save results
write.csv(itt.2g.tab, "out_tablea7_itt_2g_blockfe_nocovs_UNFORMATTED.csv", row.names=FALSE)

```


```{r, echo=F, eval=T, warning=F, include=FALSE}
# Some helper code for formatting (do not display output)
start.rows <- seq(1,27,3)
stop.rows <- start.rows + 2
Ylabs <- c("Index measure of favorable in-person interactions",
           "Received post-visit callback",
           "Received post-visit offer")
Ylabs <- rep(Ylabs, 9)

seclabs <- c("I. Monitoring vs. Control", "II. Punitive vs. Control", "III. Punitive vs. Monitoring")
sublabs <- rep(c("A. White vs. Black", "B. White vs. Hispanic", "C. Black vs. Hispanic"), 3)
```


```{r, echo=F, eval=T, warning=F, include=FALSE}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# Format table and export to tex

# Format table
tab.out2 <- list()
seclabs.index <- 0
for(i in 1:length(start.rows)){
  if(i %in% c(1,4,7)){
    seclabs.index <- seclabs.index + 1
    tab.out2[[i]] <- rbind(c(seclabs[seclabs.index], rep(NA,5)),
                           c(sublabs[i], rep(NA,5)),
                           itt.2g.tab[start.rows[i]:stop.rows[i],])        
  } else {
    tab.out2[[i]] <- rbind(c(sublabs[i], rep(NA,5)), itt.2g.tab[start.rows[i]:stop.rows[i],])    
  }
}

tab.out2 <- do.call(rbind, tab.out2)

out <- print(xtable(tab.out2, align="llrrrrr", digits=3), include.rownames=FALSE)
out <- unlist(strsplit(out, "\n"))

out <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", out, fixed=TRUE)
out <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", out, fixed=TRUE)
out <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", out, fixed=TRUE)
out <- gsub("I. Monitoring vs. Control &  &  &  &  & ", "\\multicolumn{6}{c}{I. Monitoring vs. Control}", out, fixed=TRUE)
out <- gsub("II. Punitive vs. Control &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{II. Punitive vs. Control}", out, fixed=TRUE)
out <- gsub("III. Punitive vs. Monitoring &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{III. Punitive vs. Monitoring}", out, fixed=TRUE)

out <- gsub("(White vs. Black)", "", out, fixed=TRUE)
out <- gsub("(White vs. Hispanic)", "", out, fixed=TRUE)
out <- gsub("(Black vs. Hispanic)", "", out, fixed=TRUE)

out <- c(out[3:4],
         "\\resizebox{.85\\textwidth}{!}{",
         out[5:49],
         "}",
         "\\caption[Estimated Effects of Messaging on Net Discrimination Levels]{Estimated Effects of Messaging on Net Discrimination Levels. Cells contain ITT estimates from OLS models with inverse probability weights and block fixed effects. For each reference group versus comparison group pairing, outcomes are net discrimination measures against the comparison group relative to the reference group. Estimated effects that are positive (negative) are interpreted as increases (decreases) in net discrimination against the comparison group relative to the reference group. Estimated p-values are reported in parentheses; p-values correspond to a one-sided test of the null hypothesis of equality of means for the monitoring-control and punitive-control comparisons, and to a two-sided test of the null hypothesis of equality of means for the punitive-monitoring comparison and for all analyses involving net discrimination against Hispanic (vs. black) testers.}",
         "\\label{ittnocov}",
         out[50])

cat(out, file="out_tablea7_itt_2g_blockfe_nocovs.tex", sep="\n")
write.csv(tab.out2, file="out_tablea7_itt_2g_blockfe_nocovs.csv", row.names=FALSE)
```

```{r prepittestforplot, eval=T, echo=F, warning=F}
# Prepare ITT estimates for plot
# extract info from main ITT estimates (2-group parametric w/ block FE and ipw, no addl covariates) to populate graph
itt.est <- read.csv("out_tablea7_itt_2g_blockfe_nocovs_UNFORMATTED.csv", header=TRUE, stringsAsFactors=FALSE)

itt.est$lower.ci <- as.numeric(sapply(strsplit(itt.est[,6], ", "), function(x) gsub("[", "", x[1], fixed=TRUE)))
itt.est$upper.ci <- as.numeric(sapply(strsplit(itt.est[,6], ", "), function(x) gsub("]", "", x[2], fixed=TRUE)))

row_labels <- c("W-B", "W-H", "B-H")
col_labels <- c("M-C", "P-C", "P-M")

cb.itt <- itt.est[grepl("callback", itt.est[,1], fixed=TRUE),]
off.itt <- itt.est[grepl("offer", itt.est[,1], fixed=TRUE),]
meindex.itt <- itt.est[grepl("Index", itt.est[,1], fixed=TRUE),]

cb.reg <- list(coef=matrix(cb.itt$Estimate, nrow=3, ncol=3, byrow=FALSE),
               se=matrix(cb.itt$SE, nrow=3, ncol=3, byrow=FALSE),
               lower=matrix(cb.itt$lower.ci, nrow=3, ncol=3, byrow=FALSE),
               upper=matrix(cb.itt$upper.ci, nrow=3, ncol=3, byrow=FALSE))

off.reg <- list(coef=matrix(off.itt$Estimate, nrow=3, ncol=3, byrow=FALSE),
                se=matrix(off.itt$SE, nrow=3, ncol=3, byrow=FALSE),
                lower=matrix(off.itt$lower.ci, nrow=3, ncol=3, byrow=FALSE),
                upper=matrix(off.itt$upper.ci, nrow=3, ncol=3, byrow=FALSE))

meindex.reg <- list(coef=matrix(meindex.itt$Estimate, nrow=3, ncol=3, byrow=FALSE),
                    se=matrix(meindex.itt$SE, nrow=3, ncol=3, byrow=FALSE),
                    lower=matrix(meindex.itt$lower.ci, nrow=3, ncol=3, byrow=FALSE),
                    upper=matrix(meindex.itt$upper.ci, nrow=3, ncol=3, byrow=FALSE))

cv.mat <- cbind(c(rep(c("index", "cb", "off"), times=3)),
      c(rep("wb", 3), rep("wh", 3), rep("bh", 3)),
      abs(cv.2g.10),
      abs(cv.2g.20),
      abs(cv.2g.21))

cv.reg.meindex <- cv.mat[cv.mat[,1]=="index",3:5]
cv.reg.cb <- cv.mat[cv.mat[,1]=="cb",3:5]
cv.reg.off <- cv.mat[cv.mat[,1]=="off",3:5]
```

## Table A8 (p. A-26): Unweighted ITT Estimates of Messaging on Net Discrimination in Receiving a Post-Visit Callback

```{r tablea8, echo=T, warning=F}
# top left
cb1 <- ddply(dat, .(TA), summarise,
              mean.cb.w = mean(cb_C),
              mean.cb.b = mean(cb_A),
              mean.cb.h = mean(cb_B))
cb1 <- t(cb1[,2:4])
colnames(cb1) <- c("Control","Monitoring","Punitive")

# bottom left
cb2 <- ddply(dat, .(TA), summarise,
             mean.ncb.wb = mean(ncb_wb),
             mean.ncb.wh = mean(ncb_wh),
             mean.ncb.bh = mean(ncb_bh))
cb2 <- t(cb2[,2:4])

# top right
cb3 <- cbind(cb1[,2]-cb1[,1], cb1[,3]-cb1[,1], cb1[,3]-cb1[,2])

# bottom right
cb4 <- cbind(cb2[,2]-cb2[,1], cb2[,3]-cb2[,1], cb2[,3]-cb2[,2])

cb1 <- round(cb1,3)
cb2 <- round(cb2,3)
cb3 <- round(cb3,3)
cb4 <- round(cb4,3)

# p values
get_p <- function(var1, var2, T_r, T_c){
  ps <- c(t.test(dat[dat$TA==0, var1],
                 dat[dat$TA==0, var2], 
                 alternative="two.sided",
                 paired=TRUE,
                 conf.level=0.95)$p.value,
          t.test(dat[dat$TA==1, var1],
                 dat[dat$TA==1, var2],
                 alternative="two.sided",
                 paired=TRUE,
                 conf.level=0.95)$p.value,
          t.test(dat[dat$TA==2, var1],
                 dat[dat$TA==2, var2], 
                 alternative="two.sided",
                 paired=TRUE,
                 conf.level=0.95)$p.value)
  return(ps)
  }

p2 <- rbind(get_p("cb_C","cb_A"),
            get_p("cb_C","cb_B"),
            get_p("cb_A","cb_B"))

p2 <- matrix(paste("(",round(p2,3),")",sep=""), ncol=3, byrow=FALSE)

#p-value within race across treatments
get_p2 <- function(var){
  ps <- c(t.test(dat[dat$TA==1, var],
                 dat[dat$TA==0, var], 
                 alternative="two.sided",
                 conf.level=0.95)$p.value,
          t.test(dat[dat$TA==2, var],
                 dat[dat$TA==0, var],
                 alternative="two.sided",
                 conf.level=0.95)$p.value,
          t.test(dat[dat$TA==2, var],
                 dat[dat$TA==1, var], 
                 alternative="two.sided",
                 conf.level=0.95)$p.value)
  return(ps)
}

p3 <- rbind(get_p2("cb_C"), get_p2("cb_A"), get_p2("cb_B"))
p3 <- matrix(paste("(",round(p3,3),")",sep=""), ncol=3, byrow=FALSE)

f0 <- lm(ncb_wb ~ TA1 + TA2, data=dat)
f1 <- lm(ncb_wb ~ TA0 + TA2, data=dat)
p4.wb <- c(pt(q=summary(f0)$coefficients[,3], df=f0$df.residual, lower.tail=TRUE)[2:3],
           summary(f1)$coefficients[3,4])

f0 <- lm(ncb_wh ~ TA1 + TA2, data=dat)
f1 <- lm(ncb_wh ~ TA0 + TA2, data=dat)
p4.wh <- c(pt(q=summary(f0)$coefficients[,3], df=f0$df.residual, lower.tail=TRUE)[2:3],
           summary(f1)$coefficients[3,4])

f0 <- lm(ncb_bh ~ TA1 + TA2, data=dat)
f1 <- lm(ncb_bh ~ TA0 + TA2, data=dat)
p4.bh <- c(summary(f0)$coefficients[2:3,4],
           summary(f1)$coefficients[3,4])

p4 <- rbind(p4.wb, p4.wh, p4.bh)
p4 <- matrix(paste("(",round(p4,3),")",sep=""), ncol=3, byrow=FALSE)

p1 <- matrix(NA, ncol=3, nrow=3)

cb1 <- rbind(cb1[1,], p1[1,],
             cb1[2,], p1[2,],
             cb1[3,], p1[3,])
cb2 <- rbind(cb2[1,], p2[1,],
             cb2[2,], p2[2,],
             cb2[3,], p2[3,])
cb3 <- rbind(cb3[1,], p3[1,],
             cb3[2,], p3[2,],
             cb3[3,], p3[3,])
cb4 <- rbind(cb4[1,], p4[1,],
             cb4[2,], p4[2,],
             cb4[3,], p4[3,])

table_a8 <- cbind(c("White",NA,"Black",NA,"Hispanic",NA,"White vs. Black",NA,"White vs. Hispanic",NA,"Black vs. Hispanic",NA),
             rbind(cbind(cb1, cb3), cbind(cb2, cb4)))
table_a8 <- rbind(table_a8, 
                  c("Sample size", nrow(dat[dat$TA == 0,]), nrow(dat[dat$TA == 1,]), 
                       nrow(dat[dat$TA == 2,]), nrow(dat[dat$TA %in% c(0,1),]), 
                       nrow(dat[dat$TA %in% c(0,2),]), nrow(dat[dat$TA %in% c(1,2),])))

colnames(table_a8) <- c("","Control", "Monitoring", "Punitive", "Monitoring vs Control", "Punitive vs Control", "Punitive vs Monitoring")


kable(table_a8[1:6,], caption="**Table A8, Panel A. Percent Favorable**")
kable(table_a8[7:12,], caption="**Table A8, Panel B. Net Discrimination (% Majority Favorable - % Minority Favorable)**")
kable(rbind(table_a8[13,]), caption="**Table A8. Sample Size (last row)**")
```

```{r, include=FALSE}
print(xtable(table_a8), file="out_tablea8_unweighted_itt_callbacks.tex", include.rownames=FALSE)
```

## Table A9 (p. A-27): Unweighted ITT Estimates of Messaging on Net Discrimination in Receiving a Post-Visit Offer for the Unit

```{r tablea9, echo=T, warning=F}
# top left
off1 <- ddply(dat, .(TA), summarise,
              mean.off.w = mean(off_C),
              mean.off.b = mean(off_A),
              mean.off.h = mean(off_B))
off1 <- t(off1[,2:4])
colnames(off1) <- c("Control","Monitoring","Punitive")

# bottom left
off2 <- ddply(dat, .(TA), summarise,
              mean.noff.wb = mean(noff_wb),
              mean.noff.wh = mean(noff_wh),
              mean.noff.bh = mean(noff_bh))
off2 <- t(off2[,2:4])

# top right
off3 <- cbind(off1[,2]-off1[,1], off1[,3]-off1[,1], off1[,3]-off1[,2])

# bottom right
off4 <- cbind(off2[,2]-off2[,1], off2[,3]-off2[,1], off2[,3]-off2[,2])

off1 <- round(off1,3)
off2 <- round(off2,3)
off3 <- round(off3,3)
off4 <- round(off4,3)

# p values

p2 <- rbind(get_p("off_C","off_A"),
            get_p("off_C","off_B"),
            get_p("off_A","off_B"))

p2 <- matrix(paste("(",round(p2,3),")",sep=""), ncol=3, byrow=FALSE)

p3 <- rbind(get_p2("off_C"),
            get_p2("off_A"),
            get_p2("off_B"))

p3 <- matrix(paste("(",round(p3,3),")",sep=""), ncol=3, byrow=FALSE)

f0 <- lm(noff_wb ~ TA1 + TA2, data=dat)
f1 <- lm(noff_wb ~ TA0 + TA2, data=dat)
p4.wb <- c(pt(q=summary(f0)$coefficients[,3], df=f0$df.residual, lower.tail=TRUE)[2:3],
           summary(f1)$coefficients[3,4])

f0 <- lm(noff_wh ~ TA1 + TA2, data=dat)
f1 <- lm(noff_wh ~ TA0 + TA2, data=dat)
p4.wh <- c(pt(q=summary(f0)$coefficients[,3], df=f0$df.residual, lower.tail=TRUE)[2:3],
           summary(f1)$coefficients[3,4])

f0 <- lm(noff_bh ~ TA1 + TA2, data=dat)
f1 <- lm(noff_bh ~ TA0 + TA2, data=dat)
p4.bh <- c(summary(f0)$coefficients[2:3,4],
           summary(f1)$coefficients[3,4])

p4 <- rbind(p4.wb, p4.wh, p4.bh)
p4 <- matrix(paste("(",round(p4,3),")",sep=""), ncol=3, byrow=FALSE)

p1 <- matrix(NA, ncol=3, nrow=3)

off1 <- rbind(off1[1,], p1[1,],
              off1[2,], p1[2,],
              off1[3,], p1[3,])
off2 <- rbind(off2[1,], p2[1,],
              off2[2,], p2[2,],
              off2[3,], p2[3,])
off3 <- rbind(off3[1,], p3[1,],
              off3[2,], p3[2,],
              off3[3,], p3[3,])
off4 <- rbind(off4[1,], p4[1,],
              off4[2,], p4[2,],
              off4[3,], p4[3,])

table_a9 <- cbind(c("White",NA,"Black",NA,"Hispanic",NA,"White vs. Black",NA,"White vs. Hispanic",NA,"Black vs. Hispanic",NA),
             rbind(cbind(off1, off3), cbind(off2, off4)))
table_a9 <- rbind(table_a9, 
                  c("Sample size", nrow(dat[dat$TA == 0,]), nrow(dat[dat$TA == 1,]), 
                       nrow(dat[dat$TA == 2,]), nrow(dat[dat$TA %in% c(0,1),]), 
                       nrow(dat[dat$TA %in% c(0,2),]), nrow(dat[dat$TA %in% c(1,2),])))
colnames(table_a9) <- c("","Control", "Monitoring", "Punitive", "Monitoring vs Control", "Punitive vs Control", "Punitive vs Monitoring")

kable(table_a9[1:6,], caption="**Table A9, Panel A. Percent Favorable**")
kable(table_a9[7:12,], caption="**Table A9, Panel B. Net Discrimination (% Majority Favorable - % Minority Favorable)**")
kable(rbind(table_a9[13,]), caption="**Table A9. Sample Size (last row)**")
```

```{r, include=FALSE}
print(xtable(table_a9), file="out_tablea9_unweighted_itt_offers.tex", include.rownames=FALSE)
```


## Table A10 (p. A-28): Sensitivity Analysis: Estimated Effects of Messaging on Net Discrimination Levels (Three-Group Parametric Estimator)

```{r tablea10, echo=T, eval=T, warning=F}
  #one-sided p function
  get_plower <- function(x) pt(summary(x)$coefficients[,3], summary(x)$df[2], lower = TRUE)
  
  # select all outcome variables
  outpatt <- c("^index","ncb","noff")
  outvar <- unlist(lapply(outpatt, function(y) grep(y, names(dat), value = TRUE)))
  
  datm <- reshape2::melt(dat, id = names(dat)[!names(dat)%in% c(outvar)],
                         variable.name = "outcome", 
                         value.name = "outcome_val")

  # control as comparison

  # monitoring vs. control
  dat_out_c <- datm %>% 
    dplyr::group_by(outcome) %>%
    dplyr::do(fit = lm(outcome_val ~ TA1 + TA2 + as.factor(block), weights=ipw, data=.))


  dat_outcome_coef <- lapply(dat_out_c$fit, tidy)
  dat_outcome_coef <- do.call(rbind, dat_outcome_coef )
  dat_outcome_coef$outcome <- rep(dat_out_c$outcome, 19) # number of terms
  

  #one-sided p-value
  p_low <- lapply(dat_out_c$fit, get_plower) 
  p.lower <-  do.call(c,  p_low )
  p.lower <- cbind(  p.lower , term = names(p.lower))
  p.lower <- as.data.frame(p.lower)
  p.lower$outcome <- rep(dat_out_c$outcome, 19) # number of terms
 
  dat_coef <- left_join(dat_outcome_coef, p.lower, by = c("outcome","term"))

  # replace with one-sided p.value for all but B-H comparison
dat_coef$p.value[grep("_bh", dat_coef$outcome, invert = T)] <- as.numeric(paste0(dat_coef$p.lower[grep("_bh", dat_coef$outcome, invert = T)]))

  itt3g_mc <- dat_coef[dat_coef$term == "TA1",] %>% dplyr::select(-p.lower, -term)
  itt3g_mc$df <- unlist(lapply(dat_out_c$fit, function(x) summary(x)$df[2]))

  # punitive vs. control
  itt3g_pc <- dat_coef[dat_coef$term == "TA2",] %>% dplyr::select(-p.lower, -term)
  itt3g_pc$df <- unlist(lapply(dat_out_c$fit, function(x) summary(x)$df[2]))
  
  # monitoring as comparison

  # punitive vs monitoring
  dat_out_m <- datm %>%
    group_by(outcome) %>%
    do(fit = lm(outcome_val ~ TA0 + TA2 + as.factor(block), weights=ipw, data=.))

 
  dat_outcome_coef <- lapply(dat_out_m$fit, tidy)
  dat_outcome_coef <- do.call(rbind, dat_outcome_coef )
  dat_outcome_coef$outcome <- rep(dat_out_m$outcome, 19) # number of terms
  

  #one-sided p-value
  p_low <- lapply(dat_out_m$fit, get_plower) 
  p.lower <-  do.call(c,  p_low )
  p.lower <- cbind(  p.lower , term = names(p.lower))
  p.lower <- as.data.frame(p.lower)
  p.lower$outcome <- rep(dat_out_m$outcome, 19) # number of terms
 
  dat_coef <- left_join(dat_outcome_coef, p.lower, by = c("outcome","term"))


  itt3g_pm <- dat_coef[dat_coef$term == "TA2",] %>% dplyr::select(-term, -p.lower)
  itt3g_pm$df <- unlist(lapply(dat_out_m$fit, function(x) summary(x)$df[2]))

  r_ord <- c(7,2,5,8,3,6,9,1,4)

  out3 <- rbind(itt3g_mc[r_ord,], itt3g_pc[r_ord,], itt3g_pm[r_ord,])
  out3$outcome <- rep(c(outvars.wb.labs[-1], outvars.wh.labs[-1], outvars.bh.labs[-1]), 3)

itt.3g.ci <- as.numeric(out3$estimate) + abs(qt(p=0.025, df=out3$df, lower.tail=TRUE))*cbind(-as.numeric(out3$std.error), as.numeric(out3$std.error))
itt.3g.cir <- apply(itt.3g.ci, 1, function(x) paste0("[", round(x[1],3) ,", ", round(x[2],3), "]", sep=""))

# Join with CI
itt.3g.tab <- cbind(out3[,"outcome"], 
                    out3[,c("estimate", "std.error",  "statistic", "p.value")],
                    itt.3g.cir)

# Round and format results
 itt.3g.tab[,c("estimate", "std.error",  "statistic", "p.value")] <- 
 apply(itt.3g.tab[,c("estimate", "std.error",  "statistic", "p.value")],2, round,3 )
itt.3g.tab[,5] <- paste0("(",itt.3g.tab[,5],")",sep="")

colnames(itt.3g.tab) <- c("Outcome", "Estimate", "SE", "t", "p-value", "95% CI")

kable(itt.3g.tab[1:9,], caption="**Table A10, Panel I. Monitoring vs. Control**")
kable(itt.3g.tab[10:18,], caption="**Table A10, Panel II. Punitive vs. Control**")
kable(itt.3g.tab[19:27,], caption="**Table A10, Panel III. Punitive vs. Monitoring**")
```

```{r, include=FALSE}
write.csv(itt.3g.tab, file="out_tablea10_itt_blockfe_nocovs.csv", row.names=FALSE)

```


```{r, echo=F, eval=T, warning=F, include=FALSE}

##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# Format table and export to tex

start.rows <- seq(1,27,3)
stop.rows <- start.rows + 2
#start.rows; stop.rows

Ylabs <- c("Index measure of favorable in-person interactions",
           "Received post-visit callback",
           "Received post-visit offer")
Ylabs <- rep(Ylabs, 9)

seclabs <- c("I. Monitoring vs. Control", "II. Punitive vs. Control", "III. Punitive vs. Monitoring")
sublabs <- rep(c("A. White vs. Black", "B. White vs. Hispanic", "C. Black vs. Hispanic"), 3)


tab.out2 <- list()
seclabs.index <- 0
for(i in 1:length(start.rows)){
  if(i %in% c(1,4,7)){
    seclabs.index <- seclabs.index + 1
    tab.out2[[i]] <- rbind(c(seclabs[seclabs.index], rep(NA,5)),
                           c(sublabs[i], rep(NA,5)),
                           itt.3g.tab[start.rows[i]:stop.rows[i],])        
  } else {
    tab.out2[[i]] <- rbind(c(sublabs[i], rep(NA,5)), itt.3g.tab[start.rows[i]:stop.rows[i],])    
  }
}

tab.out2 <- do.call(rbind, tab.out2)

row.names(tab.out2) <- NULL
out <- print(xtable(tab.out2, align="llrrrrr", digits=3), include.rownames=FALSE)
out <- unlist(strsplit(out, "\n"))

out <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", out, fixed=TRUE)
out <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", out, fixed=TRUE)
out <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", out, fixed=TRUE)
out <- gsub("I. Monitoring vs. Control &  &  &  &  & ", "\\multicolumn{6}{c}{I. Monitoring vs. Control}", out, fixed=TRUE)
out <- gsub("II. Punitive vs. Control &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{II. Punitive vs. Control}", out, fixed=TRUE)
out <- gsub("III. Punitive vs. Monitoring &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{III. Punitive vs. Monitoring}", out, fixed=TRUE)

out <- gsub("(White vs. Black)", "", out, fixed=TRUE)
out <- gsub("(White vs. Hispanic)", "", out, fixed=TRUE)
out <- gsub("(Black vs. Hispanic)", "", out, fixed=TRUE)

out <- c(out[3:4],
         "\\resizebox{.85\\textwidth}{!}{",
         out[5:49],
         "}",
         "\\caption[Estimated Effects of Messaging on Net Discrimination Levels. Cells contain ITT estimates from OLS models with inverse probability weights and block fixed effects. For each reference group versus comparison group pairing, outcomes are net discrimination measures against the comparison group relative to the reference group. Es- timated effects that are positive (negative) are interpreted as increases (decreases) in net discrimination against the comparison group relative to the reference group. Estimated p-values are reported in parentheses; p-values correspond to a one-sided test of the null hypothesis of equality of means for the monitoring-control and punitive-control compar- isons, and to a two-sided test of the null hypothesis of equality of means for the punitive-monitoring comparison and for all analyses involving net discrimination against Hispanic (vs. black) testers.}",
         "\\label{ittnocov_3g}",
         out[50])


```


```{r, include=FALSE}
# write in .tex and .csv
cat(out, file="out_tablea10_itt_blockfe_nocovs.tex", include.rownames=FALSE)

```

## Table A11 (p. A-29): Predicted Means, Differences, and Percent Differences from Two-Group Parametric Estimators

```{r tablea11, echo=T, eval=T, warning=F}
# TA: Treatment Assignment -- {Control = 0, Monitoring = 1, Punitive = 2}

ccdat <- data.frame(TA1=0, TA2=0, block=1:17)
mcdat <- data.frame(TA1=1, TA2=0, block=1:17)
pcdat <- data.frame(TA1=0, TA2=1, block=1:17)

cmdat <- data.frame(TA0=0, TA2=0, block=1:17)
pmdat <- data.frame(TA0=0, TA2=1, block=1:17)
cmdat.i2 <- data.frame(TA0=0, TA2=0, block=c(1:4,6:17))
pmdat.i2 <- data.frame(TA0=0, TA2=1, block=c(1:4,6:17))

#results from ITT est w/ block FE (no other covs)
# Results grabbed from main analysis (see first chunk of code)
pred.vals <- list()

for(i in 1:4){
  
  #--------- m-c ---------#
  # wb
  mc.wb <- c(mean(predict(fit.wb.mc[[i]], mcdat)), mean(predict(fit.wb.mc[[i]], ccdat)))
  # wh
  mc.wh <- c(mean(predict(fit.wh.mc[[i]], mcdat)), mean(predict(fit.wh.mc[[i]], ccdat)))
  # bh
  mc.bh <- c(mean(predict(fit.bh.mc[[i]], mcdat)), mean(predict(fit.bh.mc[[i]], ccdat)))
  
  #--------- p-c ---------#
  # wb
  pc.wb <- c(mean(predict(fit.wb.pc[[i]], pcdat)), mean(predict(fit.wb.pc[[i]], ccdat)))
  # wh
  pc.wh <- c(mean(predict(fit.wh.pc[[i]], pcdat)), mean(predict(fit.wh.pc[[i]], ccdat)))
  # bh
  pc.bh <- c(mean(predict(fit.bh.pc[[i]], pcdat)), mean(predict(fit.bh.pc[[i]], ccdat)))
  
  #--------- p-m ---------#
  
  if(i != 2){
    # wb
    pm.wb <- c(mean(predict(fit.wb.pm[[i]], pmdat)), mean(predict(fit.wb.pm[[i]], cmdat)))
    # w
    pm.wh <- c(mean(predict(fit.wh.pm[[i]], pmdat)), mean(predict(fit.wh.pm[[i]], cmdat)))
    # bh
    pm.bh <- c(mean(predict(fit.bh.pm[[i]], pmdat)), mean(predict(fit.bh.pm[[i]], cmdat)))
  } else {
    # wb
    pm.wb <- c(mean(predict(fit.wb.pm[[i]], pmdat.i2)), mean(predict(fit.wb.pm[[i]], cmdat.i2)))
    # w
    pm.wh <- c(mean(predict(fit.wh.pm[[i]], pmdat.i2)), mean(predict(fit.wh.pm[[i]], cmdat.i2)))
    # bh
    pm.bh <- c(mean(predict(fit.bh.pm[[i]], pmdat.i2)), mean(predict(fit.bh.pm[[i]], cmdat.i2)))
    
  }
  
  pred.vals[[i]] <- rbind(mc.wb, mc.wh, mc.bh,
                          pc.wb, pc.wh, pc.bh,
                          pm.wb, pm.wh, pm.bh)
  
}

names(pred.vals) <- c("meet","index","callback","offer")

pred.mat <- NULL

for(i in 1:nrow(pred.vals[[1]])){
  for(j in 2:length(pred.vals)){
    pred.mat <- rbind(pred.mat, pred.vals[[j]][i,])
  }
}

pred.out <- cbind(pred.mat, pred.mat[,1]-pred.mat[,2], (pred.mat[,1]-pred.mat[,2])/abs(pred.mat[,2])*100)
colnames(pred.out) <- c("Predicted Treatment Mean", "Predicted Control Mean", "Difference", "Percent Difference")

for(i in 1:ncol(pred.out)) pred.out[,i] <- round(pred.out[,i], digits=3)

pred.out <- cbind(Outcome=itt.2g.tab[,1], pred.out)

pred.out2 <- list()
seclabs.index <- 0

for(i in 1:length(start.rows)){
  if(i %in% c(1,4,7)){
    seclabs.index <- seclabs.index + 1
    pred.out2[[i]] <- rbind(c(seclabs[seclabs.index], rep(NA,4)),
                           c(sublabs[i], rep(NA,4)),
                           pred.out[start.rows[i]:stop.rows[i],])        
  } else {
    pred.out2[[i]] <- rbind(c(sublabs[i], rep(NA,4)), pred.out[start.rows[i]:stop.rows[i],])    
  }
}

pred.out2 <- do.call(rbind, pred.out2)

kable(pred.out2, caption="**Table A11**")
```

```{r, echo=F, eval=T, warning=F, include=FALSE}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# Format table and export to tex

out <- print(xtable(pred.out2, align="llrrrr", digits=3), include.rownames=FALSE)
out <- unlist(strsplit(out, "\n"))

out <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", out, fixed=TRUE)
out <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", out, fixed=TRUE)
out <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", out, fixed=TRUE)
out <- gsub("I. Monitoring vs. Control &  &  &  &  & ", "\\multicolumn{6}{c}{I. Monitoring vs. Control}", out, fixed=TRUE)
out <- gsub("II. Punitive vs. Control &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{II. Punitive vs. Control}", out, fixed=TRUE)
out <- gsub("III. Punitive vs. Monitoring &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{III. Punitive vs. Monitoring}", out, fixed=TRUE)

out <- gsub("(White vs. Black)", "", out, fixed=TRUE)
out <- gsub("(White vs. Hispanic)", "", out, fixed=TRUE)
out <- gsub("(Black vs. Hispanic)", "", out, fixed=TRUE)

out <- c(out[3:4],
         "\\resizebox{.85\\textwidth}{!}{",
         out[5:49],
         "}",
         "\\caption{Predicted Means, Difference, and Percent Difference from Two-Group Parametric Estimators. Predicted values calculated using estimates from OLS models with inverse probability weights and block fixed effects.}",
         "\\label{predmeans_2g}",
         out[50])

```

```{r, include = FALSE}
# write in .tex and .csv
cat(out, file="out_tablea11_pred_means_2g.tex", sep="\n", print = F)
```


## Table A12 (p. A-31): Incidence of Early Stage Discrimination: Subjective Measures

```{r tablea12, echo=T, warning=F}

table_a12 <- es_discrim_full_table[-c(1,2,14,15,27,28),]

table_a12_audit   <- es_discrim_full_table[-c(1,2,14,15,27,28),1:6]
table_a12_expsamp <- es_discrim_full_table[-c(1,2,14,15,27,28),c(1, 7:11)]
table_a12_control <- es_discrim_full_table[-c(1,2,14,15,27,28),c(1, 12:16)]

colnames(table_a12_audit)[2:6] <- colnames(table_a12_expsamp)[2:6] <- colnames(table_a12_control)[2:6] <- c("Majority Group Mean", "Minority Group Mean", "Difference (Maj-Min)", "p-value", "[N]")

kable(table_a12_audit, caption="**Table A12, Panel I. All Pursued Cases in Audit Sample**")
kable(table_a12_expsamp, caption="**Table A12, Panel II. All Pursued Cases in Experimental Sample**")
kable(table_a12_control, caption="**Table A12, Panel III. All Pursued Cases in Control Group**")
```


```{r, include=FALSE}
print(xtable(table_a12, caption=c("Incidence of Early Stage Discrimination: Subjective Measures",
                            "Incidence of Early Stage Discrimination: Subjective Measure")),
      file="out_tablea12_early_stage_discrim_subjective.tex",
      include.rownames=FALSE)

```

## Table A13 (p. A-32): Treatment Noncompliance Incidence

```{r tablea13, echo=T, warning=F}
nc.counts <- table(dat$TA, dat$TD, useNA="ifany")
nc.props  <- table(dat$TA, dat$TD, useNA="ifany")/rowSums(table(dat$TA, dat$TD, useNA="ifany"))

table_a13 <- cbind(nc.counts[,1], nc.props[,1],
              nc.counts[,2], nc.props[,2],
              nc.counts[,3], nc.props[,3],
              rowSums(table(dat$TA, dat$TD, useNA="ifany")))

table_a13[,c(2,4,6)] <- round(table_a13[,c(2,4,6)], digits=2)
table_a13 <- cbind(c("Control","Monitoring","Punitive"), table_a13)
colnames(table_a13) <- c("Assigned Arm",
                        "Control (N)", "Control (Proportion)", 
                        "Monitoring (N)", "Monitoring (Proportion)",
                        "Punitive (N)", "Punitive (Proportion)", "Row Totals")

rownames(table_a13) <- NULL
kable(table_a13, caption="**Table A13**")
```


```{r, include=FALSE}
print(xtable(table_a13), file="out_tablea13_noncompliance_incidence.tex", include.rownames=FALSE)
```

## Table A14 (p. A-34): Estimated Complier Average Causal Effects of Messages on Net Discrimination Levels

```{r tablea14, echo=T, warning=F}

# DEFINE PAIRS OF TREATMENT-COMPARISON DIFFERENCES OF INTEREST
diffs <- rbind(c(1,0),
               c(2,0),
               c(2,1))
colnames(diffs) <- c("treatment","comparison")

# ---------------------------------------------------------------------- #
# Code treatment receipt two ways for P vs C comparison
# Alt Approach A: treat Z=punitive D=monitoring as fail-to-treat
# Alt Approach B: treat Z=punitive D=monitoring as effectively punitive messaging
dat$TD_a <- dat$TD_b <- dat$TD
dat$TD_a <- with(dat, ifelse(TA==2 & TD==1, 0, TD_a))
dat$TD_b <- with(dat, ifelse(TA==2 & TD==1, 2, TD_b))

# ---------------------------------------------------------------------- #
# CACE helper function
# inputs:
#  -- dat: data frame
#  -- Y: name of outcome variable
#  -- D: name of treatment receipt variable
#  -- trt_t: value of treatment arm (e.g., 2 for punitive)
#  -- trt_c: value of comparison arm (e.g., 0 for control)

cace.est <- function(dat, Y, D, trt_t, trt_c){
  sub <- dat[dat$TA %in% c(trt_t, trt_c), ]

  sub$Z <- ifelse(sub$TA==trt_t, 1, 0)
  sub$D <- ifelse(sub[[D]]==trt_t, 1, 0)

  sub$wtvar <- sub[[paste0("ipw",trt_t,trt_c)]]
  
  itt.model <- paste(Y, "~ Z + as.factor(block)")
  ittd.model <- "D ~ Z + as.factor(block)"
  iv.model <- paste(Y, "~ D + as.factor(block) | Z + as.factor(block)")
  
  itt.fit <- lm(formula=itt.model, data=sub, weights=wtvar)
  ittd.fit <- lm(formula=ittd.model, data=sub, weights=wtvar)
  iv.fit <- ivreg(formula=iv.model, data=sub, weights=wtvar)
  
  itt.est <- summary(itt.fit)$coef[2,1] # itt
  ittd.est <- summary(ittd.fit)$coef[2,1] # itt.d
  cace.est <- summary(iv.fit)$coef[2,1] # cace
  
  # p value: use one sided if M/C or P/C (j %in% 1:2), use two sided if P/M (j==3)
  if( trt_c == 0 ){
    cace.pval <- pt(coef(summary(iv.fit))[,3], summary(iv.fit)$df[2], lower=TRUE)[2]
  } else {
    cace.pval <- summary(iv.fit)$coef[2,4]
  }
  
  out <- c(Y, trt_t, trt_c, round(itt.est,3), round(ittd.est,3), round(cace.est,3), round(cace.pval,3))
  names(out) <- c("Y", "treatment", "comparison", "ITT", "ITT_D", "CACE", "p-value")
  return(out)
}

# M vs C - estimate CACE using IV
mc.cace.wb <- rbind(cace.est(dat=dat, Y=outvars.wb[2], D="TD", trt_t=1, trt_c=0),
                    cace.est(dat=dat, Y=outvars.wb[3], D="TD", trt_t=1, trt_c=0),
                    cace.est(dat=dat, Y=outvars.wb[4], D="TD", trt_t=1, trt_c=0))

mc.cace.wh <- rbind(cace.est(dat=dat, Y=outvars.wh[2], D="TD", trt_t=1, trt_c=0),
                    cace.est(dat=dat, Y=outvars.wh[3], D="TD", trt_t=1, trt_c=0),
                    cace.est(dat=dat, Y=outvars.wh[4], D="TD", trt_t=1, trt_c=0))

mc.cace.bh <- rbind(cace.est(dat=dat, Y=outvars.bh[2], D="TD", trt_t=1, trt_c=0),
                    cace.est(dat=dat, Y=outvars.bh[3], D="TD", trt_t=1, trt_c=0),
                    cace.est(dat=dat, Y=outvars.bh[4], D="TD", trt_t=1, trt_c=0))

# P vs C (v1) - treat Z=punitive D=monitoring as fail-to-treat
pc1.cace.wb <- rbind(cace.est(dat=dat, Y=outvars.wb[2], D="TD_a", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wb[3], D="TD_a", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wb[4], D="TD_a", trt_t=2, trt_c=0))

pc1.cace.wh <- rbind(cace.est(dat=dat, Y=outvars.wh[2], D="TD_a", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wh[3], D="TD_a", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wh[4], D="TD_a", trt_t=2, trt_c=0))

pc1.cace.bh <- rbind(cace.est(dat=dat, Y=outvars.bh[2], D="TD_a", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.bh[3], D="TD_a", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.bh[4], D="TD_a", trt_t=2, trt_c=0))

# P vs C (v2) - treat Z=punitive D=monitoring as effectively punitive messaging
pc2.cace.wb <- rbind(cace.est(dat=dat, Y=outvars.wb[2], D="TD_b", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wb[3], D="TD_b", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wb[4], D="TD_b", trt_t=2, trt_c=0))

pc2.cace.wh <- rbind(cace.est(dat=dat, Y=outvars.wh[2], D="TD_b", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wh[3], D="TD_b", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wh[4], D="TD_b", trt_t=2, trt_c=0))

pc2.cace.bh <- rbind(cace.est(dat=dat, Y=outvars.bh[2], D="TD_b", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.bh[3], D="TD_b", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.bh[4], D="TD_b", trt_t=2, trt_c=0))

# Stitch together results
stitch.cace <- function(wb, wh, bh, wb.labs, wh.labs, bh.labs){
  blanks <- rep(NA, 4)
  out <- rbind(blanks, wb[,4:7],
               blanks, wh[,4:7],
               blanks, bh[,4:7])
  out <- cbind(c("A. White vs. Black", wb.labs[2:4],
                 "B. White vs. Hispanic", wh.labs[2:4],
                 "C. Black vs. Hispanic", bh.labs[2:4]), out)
  out[,1] <- gsub("(White vs. Black)", "", out[,1], fixed=TRUE)
  out[,1] <- gsub("(White vs. Hispanic)", "", out[,1], fixed=TRUE)
  out[,1] <- gsub("(Black vs. Hispanic)", "", out[,1], fixed=TRUE)

  rownames(out) <- NULL
  colnames(out) <- c("Outcome", "ITT", "ITT_D", "CACE", "p-value")
  return(out)
}

cace.mc <- stitch.cace(wb=mc.cace.wb,
                       wh=mc.cace.wh,
                       bh=mc.cace.bh,
                       wb.labs=outvars.wb.labs,
                       wh.labs=outvars.wh.labs,
                       bh.labs=outvars.bh.labs)
cace.pc1 <- stitch.cace(wb=pc1.cace.wb,
                        wh=pc1.cace.wh,
                        bh=pc1.cace.bh,
                        wb.labs=outvars.wb.labs,
                        wh.labs=outvars.wh.labs,
                        bh.labs=outvars.bh.labs)
cace.pc2 <- stitch.cace(wb=pc2.cace.wb,
                        wh=pc2.cace.wh,
                        bh=pc2.cace.bh,
                        wb.labs=outvars.wb.labs,
                        wh.labs=outvars.wh.labs,
                        bh.labs=outvars.bh.labs)

# Assemble final CACE table (same info; two possible formats)
cace.table1 <- rbind(c("I. Monitoring vs. Control", rep(NA, 4)),
                     cace.mc,
                     c("II. Punitive vs. Control (Upper Bound)", rep(NA,4)),
                     cace.pc1,
                     c("III. Punitive vs. Control (Lower Bound)", rep(NA, 4)),
                     cace.pc2)
cace.pc <- list()
keep.rows <- c(1,5,9)
for(i in 1:nrow(cace.pc1)){
  if(i %in% keep.rows){
    cace.pc[[i]] <- cace.pc1[i,]
  } else {
    cace.pc[[i]] <- rbind(c(cace.pc1[i,1], rep(NA, 4)),
                          c("v1",cace.pc1[i,2:5]),
                          c("v2",cace.pc2[i,2:5]))
  }
}
cace.pc <- do.call(rbind, cace.pc)
cace.pc[,1] <- gsub("v1", "Upper bound", cace.pc[,1], fixed=TRUE)
cace.pc[,1] <- gsub("v2", "Lower bound", cace.pc[,1], fixed=TRUE)
cace.table2 <- rbind(c("I. Monitoring vs. Control", rep(NA, 4)),
                     cace.mc,
                     c("II. Punitive vs. Control (Upper and Lower Bounds)", rep(NA, 4)),
                     cace.pc)
kable(cace.table2, caption="**Table A14**")
```

```{r, echo=F, eval=T, warning=F, include=FALSE}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output

# format and export tables
# we create two versions with different formats but with the same information
# out_tablea14_cace_table_format2.tex is the table used in the appendix

cace1 <- print(xtable(cace.table1, align="llrrrr"), include.rownames=FALSE)
cace1 <- unlist(strsplit(cace1, "\n"))

cace1.caption.short <- "Estimated Complier Average Causal Effects of Messages on Net Discrimination Levels for Monitoring-Control and Punitive-Control Comparisons"
cace1.caption <- "\\footnotesize Estimated Complier Average Causal Effects of Messages on Net Discrimination Levels for Monitoring-Control and Punitive-Control Comparisons. Cells contain estimates of the $ITT$, $ITT_D$ (proportion of Compliers), $CACE$, and $p$-values. $ITT$ and $ITT_D$ estimates are from OLS models with inverse probability weights and block fixed effects. $CACE$ estimates are from IV regression models with inverse probability weights and block fixed effects. Estimated $p$-values are reported in parentheses; $p$-values correspond to a one-sided test of the null hypothesis of equality of means for the monitoring-control and punitive-control comparisons, and to a two-sided test of the null hypothesis of equality of means for all analyses involving net discrimination against Hispanic (vs. black) testers. For the punitive-control comparison, the upper bound on the $CACE$ treats all subjects assigned to the punitive condition but who received the monitoring condition as subjects not complying with their treatment assignment. The lower bound on the CACE interprets the receipt of a monitoring message when assigned to punitive as effectively receiving a punitive message."

cace1 <- c(cace1[3:4],
           "\\resizebox{.85\\textwidth}{!}{",
           cace1[5:49],
           "}",
           paste0("\\caption[",cace1.caption.short,"]{",cace1.caption,"}"),
           "\\label{cace1}",
           cace1[50])

cace1 <- gsub("I. Monitoring vs. Control &  &  &  &",
              "\\multicolumn{5}{c}{I. Monitoring vs. Control}",
              cace1, fixed=TRUE)
cace1 <- gsub("II. Punitive vs. Control (Upper Bound) &  &  &  &",
              "\\multicolumn{5}{c}{II. Punitive vs. Control (Upper Bound)}",
              cace1, fixed=TRUE)
cace1 <- gsub("III. Punitive vs. Control (Lower Bound) &  &  &  &",
              "\\multicolumn{5}{c}{III. Punitive vs. Control (Lower Bound)}",
              cace1, fixed=TRUE)

cace1 <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", cace1, fixed=TRUE)
cace1 <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", cace1, fixed=TRUE)
cace1 <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", cace1, fixed=TRUE)
cat(cace1, file="out_tablea14_cace_table_format1.tex", sep="\n")


cace2 <- print(xtable(cace.table2, align="llrrrr"), include.rownames=FALSE)
cace2 <- unlist(strsplit(cace2, "\n"))

cace2.caption.short <- cace1.caption.short
cace2.caption <- cace1.caption

cace2 <- c(cace2[3:4],
           "\\resizebox{.8\\textwidth}{!}{",
           cace2[5],
           cace2[7:21],
           "\\hline\\hline",
           cace2[22:54],
           "}",
           paste0("\\caption[",cace2.caption.short,"]{",cace2.caption,"}"),
           "\\label{cace2}",
           cace2[55])

cace2 <- gsub("Upper bound &", "\\qquad Upper bound &", cace2, fixed=TRUE)
cace2 <- gsub("Lower bound &", "\\qquad Lower bound &", cace2, fixed=TRUE)
cace2 <- gsub("Outcome & ITT & ITT\\_D & CACE & p-value \\\\ ",
              "Outcome & $ITT$ & $ITT_D$ & $CACE$ & $p$-value \\\\ ",
              cace2, fixed=TRUE)

cace2 <- gsub("I. Monitoring vs. Control &  &  &  &", 
              "\\multicolumn{5}{c}{I. Monitoring vs. Control}",
              cace2, fixed=TRUE)
cace2 <- gsub("II. Punitive vs. Control (Upper and Lower Bounds) &  &  &  &", 
              "\\multicolumn{5}{c}{II. Punitive vs. Control (Upper and Lower Bounds)}",
              cace2, fixed=TRUE)

cace2 <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", cace2, fixed=TRUE)
cace2 <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", cace2, fixed=TRUE)
cace2 <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", cace2, fixed=TRUE)
cat(cace2, file="out_tablea14_cace_table_format2.tex", sep="\n")
```

```{r prep_lasso}
Ys <- c("index.wb", "ncb_wb", "noff_wb",
        "index.wh", "ncb_wh", "noff_wh",
        "index.bh", "ncb_bh", "noff_bh")

llrace <- c("primary_api", "primary_blk", "primary_hsp", "primary_wht")
llage <- c("primary_age_18to34", "primary_age_35to44", "primary_age_45to64", "primary_age_65over", "primary_age_unknown")
testerfe <- c("tid.A01", "tid.A10", "tid.A11", "tid.A13", "tid.A02", "tid.A21", "tid.A22", "tid.A03", "tid.A04", "tid.A05",
              "tid.A06", "tid.A07", "tid.A08", "tid.A09", "tid.B01", "tid.B11", "tid.B12", "tid.B14", "tid.B16", "tid.B02", 
              "tid.B20", "tid.B23", "tid.B24", "tid.B25", "tid.B27", "tid.B03", "tid.B04", "tid.B06", "tid.B07", "tid.B08", 
              "tid.B09", "tid.C01", "tid.C10", "tid.C12", "tid.C13", "tid.C14", "tid.C15", "tid.C02", "tid.C27", "tid.C29", 
              "tid.C03", "tid.C31", "tid.C33", "tid.C04", "tid.C05", "tid.C06", "tid.C07", "tid.C08", "tid.C09")
tafe <- testerfe[grepl(".A",testerfe,fixed=TRUE)]
tbfe <- testerfe[grepl(".B",testerfe,fixed=TRUE)]
tcfe <- testerfe[grepl(".C",testerfe,fixed=TRUE)]

Xs <- c("frame", "partnered", "rent + m.rent", "numbr", "sqft + m.sqft","regime1","regime2","regime3",
        "nnumattr_bh", "nnumattr_wh", "nnumattr_wb", "nnumskep_bh", "nnumskep_wh", "nnumskep_wb", 
        "npctskep_bh", "npctskep_wh", "npctskep_wb", "nnumpos_bh" , "nnumpos_wh" , "nnumpos_wb" , 
        "nnumneu_bh" , "nnumneu_wh" , "nnumneu_wb" , "nnumneg_bh" , "nnumneg_wh" , "nnumneg_wb" , 
        "npctpos_bh" , "npctpos_wh" , "npctpos_wb" , "npctneu_bh" , "npctneu_wh" , "npctneu_wb" , 
        "npctneg_bh" , "npctneg_wh" , "npctneg_wb" , "nanyskep_bh", "nanyskep_wh", "nanyskep_wb", 
        "nanyneg_bh" , "nanyneg_wh" , "nanyneg_wb" , "team_gender", "broker",
        "callorder.wb", "callorder.wh", "callorder.bh",
        "incrank.wb.gt", "incrank.wh.gt", "incrank.bh.gt",
        "incrank.wb.eq", "incrank.wh.eq", "incrank.bh.eq",
        "incrank.wb.lt", "incrank.wh.lt", "incrank.bh.lt",
        "boro.brx", "boro.brk", "boro.mnh", "boro.que", "boro.stn",
        "inc.w.hi","inc.b.hi","inc.h.hi","ll.female + m.ll.female",
        llrace, llage, testerfe)

# Define sets of matched pair vars to kick out, by outcome
wb.vars <- Xs[grepl(".wb",Xs,fixed=TRUE) | grepl("_wb",Xs,fixed=TRUE)]
wh.vars <- Xs[grepl(".wh",Xs,fixed=TRUE) | grepl("_wh",Xs,fixed=TRUE)]
bh.vars <- Xs[grepl(".bh",Xs,fixed=TRUE) | grepl("_bh",Xs,fixed=TRUE)]


# Change factor vars to numeric (needed for cross validation function)
dat$frame <- as.numeric(dat$frame) # 1=likely disc, 2=representative (recoded to 0)
dat$frame <- ifelse(dat$frame==2, 0, dat$frame)

dat$team_gender <- as.numeric(dat$team_gender) # 1=female, 2=male (recoded to 0)
dat$team_gender <- ifelse(dat$team_gender==2, 0, dat$team_gender)

# Subset data by treatment arm
sub0 <- dat[dat$TA==0,]
sub1 <- dat[dat$TA==1,]
sub2 <- dat[dat$TA==2,]

```


```{r, eval = run_lasso,  child = "Replicate_lasso.Rmd"}
```


```{r eval=T, echo=F, warning=F}
# Recode frame and team_gender variables back to string (for subsequent analyses)
dat$frame <- ifelse(dat$frame == 0, "representative", "likely disc")
dat$team_gender <- ifelse(dat$team_gender == 0, "male", "female") # previously 0=male, 1=female
```

## Table A16 (p. A-37): Missingness Analysis: Estimated Correlation between Treatment Assignment and Missingness on Subject Index Measure

```{r tablea16, echo=T, eval=T, warning=F}
# Missingness analysis - Part 1: Does treatment predict missingness on index net discrim measure?
# Regression analysis with block FE and IPW

miss.index <- list(wb.10 = lm(is.na(index.wb) ~ TA1 + as.factor(block), data=dat[dat$TA %in% c(0,1),], weights=ipw10),
                   wb.20 = lm(is.na(index.wb) ~ TA2 + as.factor(block), data=dat[dat$TA %in% c(0,2),], weights=ipw20),
                   wb.21 = lm(is.na(index.wb) ~ TA2 + as.factor(block), data=dat[dat$TA %in% c(1,2),], weights=ipw21),
                   wh.10 = lm(is.na(index.wh) ~ TA1 + as.factor(block), data=dat[dat$TA %in% c(0,1),], weights=ipw10),
                   wh.20 = lm(is.na(index.wh) ~ TA2 + as.factor(block), data=dat[dat$TA %in% c(0,2),], weights=ipw20),
                   wh.21 = lm(is.na(index.wh) ~ TA2 + as.factor(block), data=dat[dat$TA %in% c(1,2),], weights=ipw21),
                   bh.10 = lm(is.na(index.bh) ~ TA1 + as.factor(block), data=dat[dat$TA %in% c(0,1),], weights=ipw10),
                   bh.20 = lm(is.na(index.bh) ~ TA2 + as.factor(block), data=dat[dat$TA %in% c(0,2),], weights=ipw20),
                   bh.21 = lm(is.na(index.bh) ~ TA2 + as.factor(block), data=dat[dat$TA %in% c(1,2),], weights=ipw21))

miss.index.out <- cbind(do.call(rbind, lapply(miss.index, function(x) summary(x)$coefficient[2,])), # estimates
                        unlist(lapply(miss.index, function(x) summary(x)$fstatistic[1])), # f test, f statistic
                        unlist(lapply(miss.index, lmp))) # f-test p value

miss.index.out <- apply(miss.index.out, 2, function(x) round(x, 3))

miss.index.out <- cbind(rep(c("Monitoring vs. Control", "Punitive vs. Control", "Punitive vs. Monitoring"), 3),
                        miss.index.out)
colnames(miss.index.out) <- c("Comparison", "Estimate", "SE", "t", "p-value", "F-statistic", "F-test p-value")
rownames(miss.index.out) <- NULL

out.miss <- rbind(c("A. White vs. Black", rep(NA, 6)),
                  miss.index.out[1:3,],
                  c("B. White vs. Hispanic", rep(NA, 6)),
                  miss.index.out[4:6,],
                  c("C. Black vs. Hispanic", rep(NA, 6)),
                  miss.index.out[7:9,])

kable(out.miss, caption="**Table A16**")
```

```{r, eval=T, echo=F, warning=F, include=FALSE}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# format and export tex 
out.miss <- print(xtable(out.miss, align="llrrrrrr"), include.rownames=FALSE)
out.miss <- unlist(strsplit(out.miss, "\n"))

out.miss <- c(out.miss[3:4],
         "\\resizebox{.9\\textwidth}{!}{",
         out.miss[5:22],
         "}",
         "\\caption[Predicting missingness on subjective index as a function of treatment assignment]{The estimated correlation between treatment assignment and missingness on the subjective index measure of net discrimination in interactions during appointments, estimated from OLS models regressing missingness on treatment assignment and block fixed effects with inverse probability weighting. The F-statistic and F-test p-value tests the null hypothesis that all coefficients equal zero.}",
         "\\label{predmissindex}",
         out.miss[23])

```

```{r, include = FALSE}
cat(out.miss, file="out_tablea16_pred_missing_net_index.tex", sep="\n")
```

## Table A17 (p. A-37): Missingness Analysis: Pairwise Correlations between Missing Subjective Index Measures and Objective Net Discrimination Measures

```{r, eval=T, echo=T, warning=F}
# Part 2: Is missingness correlated with offers or callbacks?

miss.cor <- cor(cbind(dat[,c("ncb_wb", "noff_wb", 
                             "ncb_wh", "noff_wh",
                             "ncb_bh", "noff_bh" )],
                      is.na(dat$index.wb), is.na(dat$index.wh), is.na(dat$index.bh)))

miss.cor <- miss.cor[7:9,1:6]

miss.cor <- apply(miss.cor, 2, function(x) round(x,2))
rownames(miss.cor) <- paste("Missing Index Measure", c("W-B", "W-H", "B-H"), sep=", ")
miss.cor <- rbind(rep(c("Callbacks", "Offers"), 3), miss.cor)
colnames(miss.cor) <- c("W-B", "W-B", "W-H", "W-H", "B-H", "B-H") # label for tex output

# output for Rmd file only (omit first row of tex table and relabel column names)
kable(miss.cor[-1,], col.names = c("W-B Callbacks", "W-B Offers", "W-H Callbacks", "W-H Offers", "B-H Callbacks", "B-H Offers"), caption="**Table A17**")
```

```{r, eval=T, echo=F, warning=F, include=FALSE}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# format and export tex 
miss.cor <- print(xtable(miss.cor, align="l|cc|cc|cc"))
miss.cor <- unlist(strsplit(miss.cor, "\n"))

miss.cor <- c(miss.cor[3:4],
              "\\resizebox{1\\textwidth}{!}{",
              miss.cor[5:6],
              " \\cline{2-7}",
              " & \\multicolumn{2}{c|}{White vs. Black} &   \\multicolumn{2}{c|}{White vs. Hispanic} &  \\multicolumn{2}{c}{Black vs. Hispanic}  \\\\",
              miss.cor[9],
              "\\hline",
              miss.cor[10:14],
              "}",
              "\\caption[Pairwise correlations between missing subjective net discrimination index measures and objective net discrimination measures]{Pairwise correlations between missing subjective net discrimination index measures and objective net discrimination measures.}",
              "\\label{pwmisscorr}",
              miss.cor[15])

cat(miss.cor, file="out_tablea17_pwmisscorr.tex", sep="\n")
```


## Table A18 (p. A-37): Missingness Analysis: Predicting Missingness on Subjective Indicators As a Function of Tester Race

```{r tablea18, echo=T, eval=T, warning=F}
# Part 3: Missingness as a function of tester race/ethnicity

miss.Ys <- c("sales", "qualpraise", "posbg", "posedit", "prof")
coefs <- fstat <- fp <- list()
for(i in 1:length(miss.Ys)){
  fmla <- paste0("is.na(",miss.Ys[i],") ~ ttype")
  fit <- lm(formula=fmla, data=ctx)
  coefs[[i]] <- apply(summary(fit)$coefficients, 2, function(x) round(x,3))
  fstat[[i]] <- summary(fit)$fstatistic[1]
  fp[[i]] <- lmp(fit)
}

# for subjective measures used to make index - results same across items (keep element #3)
miss3 <- rbind(cbind(rownames(coefs[[1]]), coefs[[1]]),
               c("F-statistic:", round(fstat[[i]], 3), rep(NA, 3)),
               c("F test p-value:", round(fp[[1]], 3), rep(NA, 3)))
miss3[,1] <- gsub("ttypeB", "Hispanic tester", miss3[,1], fixed=TRUE)
miss3[,1] <- gsub("ttypeC", "White tester", miss3[,1], fixed=TRUE)
rownames(miss3) <- NULL
colnames(miss3) <- c("Variable", "Estimate", "SE", "t", "p-value")

kable(miss3, caption="**Table A18**")
```

```{r, eval=T, echo=F, warning=F, include=FALSE}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# format and export tex 
miss3 <- print(xtable(miss3, align="llcccc"), include.rownames=FALSE)
miss3 <- unlist(strsplit(miss3, "\n"))

miss3 <- c(miss3[3:4],
              miss3[5:11],
              "\\hline",
              miss3[12:15],
              "\\caption[Predicting missingness on subjective indicators of favorable treatment during appointments as a function of tester race]{The table presents OLS estimates from a regression predicting missingness on subjective indicators of favorable treatment during appointments as a function of tester race. The reference racial group is black. The F-test results reported at the bottom of the table are for a test of the null hypothesis that all coefficients equal zero.}",
              "\\label{predmisstesterrace}",
              miss3[16])

cat(miss3, file="out_tablea18_pred_missing_tester_race.tex", sep="\n")
```

## Table A19 (p. A-38): Balance Table for Categorical Covariates

```{r balanceprep, echo=F, eval=T, warning=F, include=FALSE}
## CODE TO PREP DATA TO PRODUCE BALANCE TABLES

#-----------------------------------------------------------------------------#
# Split covariates into numeric vs factor variables
dat$o_rent <- ifelse(dat$rent == -9, NA, dat$rent)
dat$o_sqft <- ifelse(dat$sqft == -9, NA, dat$sqft)
Xs.num <- c("numbr", "o_rent", "o_sqft", "regime1", "regime2", "regime3", 
            "nnumattr_bh", "nnumattr_wh", "nnumattr_wb", "nnumskep_bh", "nnumskep_wh", "nnumskep_wb", 
            "npctskep_bh", "npctskep_wh", "npctskep_wb", "nnumpos_bh" , "nnumpos_wh" , "nnumpos_wb" , 
            "nnumneu_bh" , "nnumneu_wh" , "nnumneu_wb" , "nnumneg_bh" , "nnumneg_wh" , "nnumneg_wb" , 
            "npctpos_bh" , "npctpos_wh" , "npctpos_wb" , "npctneu_bh" , "npctneu_wh" , "npctneu_wb" , 
            "npctneg_bh" , "npctneg_wh" , "npctneg_wb" , "nanyskep_bh", "nanyskep_wh", "nanyskep_wb", 
            "nanyneg_bh" , "nanyneg_wh" , "nanyneg_wb" ,
            "broker", "callorder.wb", "callorder.wh", "callorder.bh", 
            "incrank.wb.gt", "incrank.wh.gt", "incrank.bh.gt",
            "incrank.wb.eq", "incrank.wh.eq", "incrank.bh.eq",
            "incrank.wb.lt", "incrank.wh.lt", "incrank.bh.lt",
            "boro.brx", "boro.brk", "boro.mnh", "boro.que", "boro.stn", 
            "inc.w.hi", "inc.b.hi", "inc.h.hi",
            llrace, llage, testerfe )
Xs.cat <- Xs[!(Xs %in% Xs.num)]
Xs.cat <- Xs.cat[!grepl("+", Xs.cat, fixed=TRUE)]
```

```{r tablea19, echo=T, eval=T, warning=F}
# CREATE BALANCE TABLES (Both categorical and continuous variables)
# unweighted, all arms
bt_unwtd = balanceTables(d=dat, z="TA", xcon=Xs.num, xcat=Xs.cat)
# weighted, all arms
bt_wtd   = balanceTables(d=dat, z="TA", xcon=Xs.num, xcat=Xs.cat, weight="ipw")

# BALANCE TABLES: CATEGORICAL VARIABLES ONLY
bt_xcat = cbind(bt_unwtd$bt_cat[,1:4],
                bt_wtd$bt_cat[,2:4],
                bt_unwtd$bt_cat[,5:7],
                bt_wtd$bt_cat[,5:7],
                bt_unwtd$bt_cat[,8:10],
                bt_wtd$bt_cat[,8:10])
bt_xcat[,1] = c("Frame", "Likely discrimination", "Representative", "Household size", "1", "2", "Tester team gender", "Female", "Male")

kable(bt_xcat[,c(1:7)], 
      caption="**Table A19, Left Panel: Control Group**",
      col.names=c("Variable", "Unweighted Proportion", "Unweighted SE", "Unweighted N",
                  "Weighted Proportion", "Weighted SE", "Weighted N"))

kable(bt_xcat[,c(1, 8:13)], 
      caption="**Table A19, Middle Panel: Monitoring Group**",
      col.names=c("Variable", "Unweighted Proportion", "Unweighted SE", "Unweighted N",
                  "Weighted Proportion", "Weighted SE", "Weighted N"))

kable(bt_xcat[,c(1, 14:19)], 
      caption="**Table A19, Right Panel: Punitive Group**",
      col.names=c("Variable", "Unweighted Proportion", "Unweighted SE", "Unweighted N",
                  "Weighted Proportion", "Weighted SE", "Weighted N"))
```

```{r, eval=T, echo=F, warning=F,include = FALSE}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output

# format and output to tex
bt_xcat = print(xtable(bt_xcat, align="llccc|ccc|ccc|ccc|ccc|ccc"), include.rownames=FALSE)
bt_xcat = unlist(strsplit(bt_xcat, "\n"))

bt_xcat = c(bt_xcat[3:4],
            "\\resizebox{1.3\\textwidth}{!}{",
            bt_xcat[5:6],
            "& \\multicolumn{6}{c}{Control} & \\multicolumn{6}{c}{Monitoring} & \\multicolumn{6}{c}{Punitive} \\\\ ",
            "& \\multicolumn{3}{c}{Unweighted} & \\multicolumn{3}{c}{Weighted} & \\multicolumn{3}{c}{Unweighted} & \\multicolumn{3}{c}{Weighted} & \\multicolumn{3}{c}{Unweighted} & \\multicolumn{3}{c}{Weighted} \\\\",
            "& Pct & SE & N & Pct & SE & N & Pct & SE & N & Pct & SE & N & Pct & SE & N & Pct & SE & N \\\\ ",
            bt_xcat[8:19],
            "}",
            "\\caption[Balance Table, Categorical Covariates]{Balance Table for Categorical Covariates. Cells contain proportions, standard errors, and counts.}",
            "\\label{balance_cat}",
            bt_xcat[20])


bt_xcat = gsub("Frame &", "\\textbf{Frame} &", bt_xcat, fixed=TRUE)
bt_xcat = gsub("Household size &", "\\textbf{Household size} &", bt_xcat, fixed=TRUE)
bt_xcat = gsub("Tester team gender &", "\\textbf{Tester team gender} &", bt_xcat, fixed=TRUE)

cat(bt_xcat, file="out_tablea19_balance_table_categoricalvars.tex", sep="\n")
```

## Table A20 (p. A-38 to A-44): Balance Table for Continuous Covariates

```{r tablea20, eval=T, echo=T, warning=F}
bt_xcon = cbind(bt_unwtd$bt_con[,1:2],
                bt_wtd$bt_con[,2],
                bt_unwtd$bt_con[,3],
                bt_wtd$bt_con[,3],
                bt_unwtd$bt_con[,4],
                bt_wtd$bt_con[,4])

# reorder table output by covariate group
xcon_groups = list(Apartment.Characteristics = c(1:3, 53:57),
                   Randomization.Regime = 4:6,
                   Early.Stage.Discrimination = 7:39,
                   Subject.Characteristics = c(40, 61:69),
                   Tester.Call.Order = 41:43,
                   Testers.Assumed.Income = c(44:52, 58:60),
                   Tester.Fixed.Effects = 70:118)

con_tab = list()
for(i in 1:length(xcon_groups)){
  vals = xcon_groups[[i]]
  temp = list()
  for(j in 1:length(vals)){
    temp[[j]] = bt_xcon[c(3*vals[j]-2, 3*vals[j]-1, 3*vals[j]),]
  }
  temp = do.call(rbind, temp)
  temp = rbind(c(gsub(".", " ", names(xcon_groups)[i], fixed=TRUE), rep(NA, ncol(temp)-1)), temp)
  names(temp)[3:7] = c("Z=0 wt", "Z=1", "Z=1 wt", "Z=2", "Z=2 wt")
  con_tab[[i]] = temp
}
con_tab = do.call(rbind, con_tab)
rownames(con_tab) = NULL
colnames(con_tab) <-  c("Variable", "Control-Unweighted", "Control-Weighted", "Monitoring-Unweighted","Monitoring-Weighted", "Punitive-Unweighted", "Punitive-Weighted")

con_tab_labs = Xs.num
con_tab_labs = gsub("tid.", "Tester ID ", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("numbr", "Advertised number of bedrooms", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("o_rent", "Advertised monthly rental price", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("o_sqft", "Advertised square footage", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("regime", "Regime ", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("boro.brx", "Borough: Bronx", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("boro.brk", "Borough: Brooklyn", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("boro.mnh", "Borough: Manhattan", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("boro.que", "Borough: Queens", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("boro.stn", "Borough: Staten Island", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.wb.gt", "Assumed Tester Incomes: White > Black", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.wh.gt", "Assumed Tester Incomes: White > Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.bh.gt", "Assumed Tester Incomes: Black > Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.wb.eq", "Assumed Tester Incomes: White = Black", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.wh.eq", "Assumed Tester Incomes: White = Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.bh.eq", "Assumed Tester Incomes: Black = Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.wb.lt", "Assumed Tester Incomes: White < Black", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.wh.lt", "Assumed Tester Incomes: White < Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.bh.lt", "Assumed Tester Incomes: Black < Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("inc.w.hi", "Assumed Tester Incomes: White Highest", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("inc.b.hi", "Assumed Tester Incomes: Black Highest", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("inc.h.hi", "Assumed Tester Incomes: Hispanic Highest", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_api", "Modal Perception of Landlord Race among Testers: Asian", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_blk", "Modal Perception of Landlord Race among Testers: Black", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_hsp", "Modal Perception of Landlord Race among Testers: Hispanic/Latino", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_wht", "Modal Perception of Landlord Race among Testers: White", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_age_18to34", "Modal Perception of Landlord Age among Testers: 18 to 34", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_age_35to44", "Modal Perception of Landlord Age among Testers: 35 to 44", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_age_45to64", "Modal Perception of Landlord Age among Testers: 45 to 64", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_age_65over", "Modal Perception of Landlord Age among Testers: 65 and up", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_age_unknown", "Modal Perception of Landlord Age among Testers: Unknown/No Consensus", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("callorder.wb", "Randomized Tester Call Order: White before Black", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("callorder.wh", "Randomized Tester Call Order: White before Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("callorder.bh", "Randomized Tester Call Order: Black before Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("broker", "Subject is a Broker", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("_wb", ": White-Black", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("_wh", ": White-Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("_bh", ": Black-Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nnumattr", "Net Diff. in Num. Attributes Inquired About over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nnumskep", "Net Diff. in Num. Attributes Eliciting Skeptical Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("npctskep", "Net Diff. in Pct. of Attributes Raised Eliciting Skeptical Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nnumpos", "Net Diff. in Num. Attributes Eliciting Positive Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nnumneu", "Net Diff. in Num. Attributes Eliciting Neutral Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nnumneg", "Net Diff. in Num. Attributes Eliciting Negative Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("npctpos", "Net Diff. in Pct. of Attributes Raised Eliciting Positive Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("npctneu", "Net Diff. in Pct. of Attributes Raised Eliciting Neutral Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("npctneg", "Net Diff. in Pct. of Attributes Raised Eliciting Negative Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nanyskep", "Net Diff. in Receiving Any Skeptical Reaction to Attributes over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nanyneg", "Net Diff. in Receiving Any Negative Reaction to Attributes over Phone", con_tab_labs, fixed=TRUE)

for(i in 1:length(Xs.num)){
  indpos=which(con_tab[,1]==Xs.num[i])
  con_tab[indpos,1] = con_tab_labs[i]
}

# produce version to display w/ kable() -- remove all rows w/ all NAs that are included for tex formatting
con_tab_rmd <- con_tab[apply(con_tab, 1, function(J) sum(is.na(J)) != ncol(con_tab) ),]
rownames(con_tab_rmd) <- NULL
kable(con_tab_rmd, caption="**Table A20**")
```


```{r, eval=T, echo=F, warning=F, include = FALSE}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output

# format and export tex
con_tab_head = c("\\begin{center}",
                 "\\begin{longtable}{lcccccc}",
                 "\\caption[Balance Table, Continuous Covariates]{Balance Table for Continuous Covariates. Cells contain means and standard invisble(dev.off())iations in parentheses.} \\label{balance} \\\\",
                 "\\hline ",
                 " & \\multicolumn{2}{c}{Control} & \\multicolumn{2}{c}{Monitoring} & \\multicolumn{2}{c}{Punitive} \\\\",
                 "Variable & Unweighted & Weighted & Unweighted & Weighted & Unweighted & Weighted \\\\",
                 "\\hline ",
                 "\\endfirsthead",
                 "\\hline ",
                 " & \\multicolumn{2}{c}{Control} & \\multicolumn{2}{c}{Monitoring} & \\multicolumn{2}{c}{Punitive} \\\\",
                 "Variable & Unweighted & Weighted & Unweighted & Weighted & Unweighted & Weighted \\\\",
                 "\\hline ",
                 "\\endhead",
                 "\\hline \\multicolumn{7}{r}{(continued)} ",
                 "\\endfoot",
                 "\\hline \\hline",
                 "\\endlastfoot")

con_tab_end = c("\\end{longtable}", "\\end{center}")

con_tab_out = print(xtable(con_tab), include.rownames=FALSE)
con_tab_out = unlist(strsplit(con_tab_out, "\n"))

con_tab_out = gsub("Apartment Characteristics &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{I. Apartment Characteristics}",
                   con_tab_out, fixed=TRUE)

con_tab_out = gsub("Randomization Regime &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{II. Randomization Regime}",
                   con_tab_out, fixed=TRUE)

con_tab_out = gsub("Early Stage Discrimination &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{III. Early Stage Discrimination}",
                   con_tab_out, fixed=TRUE)

con_tab_out = gsub("Subject Characteristics &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{IV. Subject (Landlord/Broker) Characteristics}",
                   con_tab_out, fixed=TRUE)

con_tab_out = gsub("Tester Call Order &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{V. Tester Call Order}",
                   con_tab_out, fixed=TRUE)

con_tab_out = gsub("Testers Assumed Income &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{VI. Testers' Assumed Income}",
                   con_tab_out, fixed=TRUE)

con_tab_out = gsub("Tester Fixed Effects &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{VII. Tester Fixed Effects}",
                   con_tab_out, fixed=TRUE)

con_tab_out = c(con_tab_head,
                con_tab_out[9:369],
                con_tab_end)
cat(con_tab_out, file="out_tablea20_balance_table_continuousvars.tex", sep="\n")
```

## Table A21 (p. A-46): Sensitivity Analysis: Estimated Effects of Messaging on Net Discrimination Levels Among Subsample Excluding Likely Discrimination Cases


```{r tablea21, echo=T, warning=F}
col.labels <- c("Outcome", "Estimate","SE","t","P") 

outvars <- c("nmeet_", "index.", "ncb_","noff_" )
# ---------------------------------------------------------------------- #
# MODELS WITH ONLY BLOCK FIXED EFFECTS

reg_and_summ <- function(outvar, compare) {  
  depvar <- paste0(outvar, compare)
  
  model.mc <- paste(depvar," ~ TA1 + as.factor(block)", sep = "")
  model.pc <- paste(depvar," ~ TA2 + as.factor(block)", sep = "")
  model.pm <- paste(depvar," ~ TA2 + as.factor(block)", sep = "")
  
  fit.mc <- lm(formula = model.mc, data=dat[dat$TA %in% c(0,1) & dat$frame == "representative",], weights=ipw10)
  fit.pc <- lm(formula = model.pc, data=dat[dat$TA %in% c(0,2) & dat$frame == "representative",], weights=ipw20)
  fit.pm <- lm(formula = model.pm, data=dat[dat$TA %in% c(1,2) & dat$frame == "representative",], weights=ipw21)
  
  itt.mc <- summary(fit.mc)$coefficients[2,]
  itt.pc <- summary(fit.pc)$coefficients[2,]
  itt.pm <- summary(fit.pm)$coefficients[2,]
  
  if(compare == "wb" | compare == "wh"){
  itt.mc[4] <- pt(coef(summary(fit.mc))[,3], summary(fit.mc)$df[2], lower=TRUE)[2] #one sided p
  itt.pc[4] <- pt(coef(summary(fit.pc))[,3], summary(fit.pc)$df[2], lower=TRUE)[2] #one sided p
  }

  list(mc     = itt.mc, pc     = itt.pc, pm     = itt.pm,
         fit.mc = fit.mc, fit.pc = fit.pc, fit.pm = fit.pm )
  
}
itt.wb <- lapply(outvars, reg_and_summ,  compare = "wb" )
itt.wb.mc <- sapply(1:length(outvars), function(i) itt.wb[[i]]$mc)
itt.wb.pc <- sapply(1:length(outvars), function(i) itt.wb[[i]]$pc)
itt.wb.pm <- sapply(1:length(outvars), function(i) itt.wb[[i]]$pm)
fit.wb.mc <- sapply(1:length(outvars), function(i) itt.wb[[i]]$fit.mc)
fit.wb.pc <- sapply(1:length(outvars), function(i) itt.wb[[i]]$fit.pc)
fit.wb.pm <- sapply(1:length(outvars), function(i) itt.wb[[i]]$fit.pm)

itt.wh <- lapply(outvars, reg_and_summ,  compare = "wh" )
itt.wh.mc <- sapply(1:length(outvars), function(i) itt.wh[[i]]$mc)
itt.wh.pc <- sapply(1:length(outvars), function(i) itt.wh[[i]]$pc)
itt.wh.pm <- sapply(1:length(outvars), function(i) itt.wh[[i]]$pm)
fit.wh.mc <- sapply(1:length(outvars), function(i) itt.wh[[i]]$fit.mc)
fit.wh.pc <- sapply(1:length(outvars), function(i) itt.wh[[i]]$fit.pc)
fit.wh.pm <- sapply(1:length(outvars), function(i) itt.wh[[i]]$fit.pm)

itt.bh <- lapply(outvars, reg_and_summ,  compare = "bh" )
itt.bh.mc <- sapply(1:length(outvars), function(i) itt.bh[[i]]$mc)
itt.bh.pc <- sapply(1:length(outvars), function(i) itt.bh[[i]]$pc)
itt.bh.pm <- sapply(1:length(outvars), function(i) itt.bh[[i]]$pm)
fit.bh.mc <- sapply(1:length(outvars), function(i) itt.bh[[i]]$fit.mc)
fit.bh.pc <- sapply(1:length(outvars), function(i) itt.bh[[i]]$fit.pc)
fit.bh.pm <- sapply(1:length(outvars), function(i) itt.bh[[i]]$fit.pm)


#----- CIs -----#
df.w_ <- sapply(list(df.wb.mc = fit.wb.mc, 
                     df.wb.pc = fit.wb.pc, 
                     df.wb.pm = fit.wb.pm,
                     df.wh.mc = fit.wh.mc,
                     df.wh.pc = fit.wh.pc, 
                     df.wh.pm = fit.wh.pm,
                     df.bh.mc = fit.bh.mc,
                     df.bh.pc = fit.bh.pc,
                     df.bh.pm = fit.bh.pm),
                function(fit){
                        unlist(lapply(fit, function(x) summary(x)$df[2]))[2:4]})

df.2g.10 <-  c(df.w_[,c("df.wb.mc", "df.wh.mc", "df.bh.mc")])
df.2g.20 <-  c(df.w_[,c("df.wb.pc", "df.wh.pc", "df.bh.pc")])
df.2g.21 <-  c(df.w_[,c("df.wb.pm", "df.wh.pm", "df.bh.pm")])

cv.2g.10 <- qt(p=0.025, df=df.2g.10, lower.tail=TRUE)
cv.2g.20 <- qt(p=0.025, df=df.2g.20, lower.tail=TRUE)
cv.2g.21 <- qt(p=0.025, df=df.2g.21, lower.tail=TRUE)

cv.2g <- c(cv.2g.10, cv.2g.20, cv.2g.21)
itt.2g.ci <- as.numeric(out2[,2]) + abs(cv.2g)*cbind(-as.numeric(out2[,3]), as.numeric(out2[,3]))


```

```{r, include=FALSE}


# Add outcome measure labels
itt.wb.mc <- cbind(outvars.wb.labs, t(itt.wb.mc))
itt.wb.pc <- cbind(outvars.wb.labs, t(itt.wb.pc))
itt.wb.pm <- cbind(outvars.wb.labs, t(itt.wb.pm))

itt.wh.mc <- cbind(outvars.wh.labs, t(itt.wh.mc))
itt.wh.pc <- cbind(outvars.wh.labs, t(itt.wh.pc))
itt.wh.pm <- cbind(outvars.wh.labs, t(itt.wh.pm))

itt.bh.mc <- cbind(outvars.bh.labs, t(itt.bh.mc))
itt.bh.pc <- cbind(outvars.bh.labs, t(itt.bh.pc))
itt.bh.pm <- cbind(outvars.bh.labs, t(itt.bh.pm))

# Add column names
colnames(itt.wb.mc) <- colnames(itt.wb.pc) <- colnames(itt.wb.pm) <- col.labels
colnames(itt.wh.mc) <- colnames(itt.wh.pc) <- colnames(itt.wh.pm) <- col.labels
colnames(itt.bh.mc) <- colnames(itt.bh.pc) <- colnames(itt.bh.pm) <- col.labels

```

```{r, include = TRUE}

# Stack, create output table
out2 <- rbind(itt.wb.mc, itt.wh.mc, itt.bh.mc,
              itt.wb.pc, itt.wh.pc, itt.bh.pc,
              itt.wb.pm, itt.wh.pm, itt.bh.pm)

out2 <- out2[-seq(1,33,4),]


# Assemble vector of 95% CIs
itt.2g.ci <- as.numeric(out2[,2]) + abs(cv.2g)*cbind(-as.numeric(out2[,3]), as.numeric(out2[,3]))
table_a21_cis <- apply(itt.2g.ci, 1, function(x) paste("[", round(x[1],3) ,", ", round(x[2],3), "]", sep=""))

# Round and format results
for(i in 2:ncol(out2)) out2[,i] <- round(as.numeric(out2[,i]),3)
for(i in 5) out2[,i] <- paste("(",out2[,i],")",sep="")

# Attach CIs
table_a21 <- cbind(out2, table_a21_cis)
# Clean up column names
colnames(table_a21) <- c("Outcome", "Estimate", "SE", "t", "p-value", "95% CI")

# display table in Rmd output file
kable(table_a21[1:9,], caption="**Table A21, Panel I. Monitoring vs. Control**")
kable(table_a21[10:18,], caption="**Table A21, Panel II. Punitive vs. Control**")
kable(table_a21[19:27,], caption="**Table A21, Panel III. Punitive vs. Monitoring**")
```
```{r, include=FALSE}
# Save unformatted table
write.csv(table_a21, "out_tablea21_SA_ExcludeLD_itt_2g_blockfe_nocovs_UNFORMATTED.csv", row.names=FALSE)
```

```{r echo=F, eval=T, warning=F, include = FALSE}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# Format table

start.rows <- seq(1,27,3)
stop.rows <- start.rows + 2
#start.rows; stop.rows


Ylabs <- c("Index measure of favorable in-person interactions",
           "Received post-visit callback",
           "Received post-visit offer")
Ylabs <- rep(Ylabs, 9)
#Ylabs

seclabs <- c("I. Monitoring vs. Control", "II. Punitive vs. Control", "III. Punitive vs. Monitoring")
sublabs <- rep(c("A. White vs. Black", "B. White vs. Hispanic", "C. Black vs. Hispanic"), 3)


tab.out2 <- list()
seclabs.index <- 0
for(i in 1:length(start.rows)){
  if(i %in% c(1,4,7)){
    seclabs.index <- seclabs.index + 1
    tab.out2[[i]] <- rbind(c(seclabs[seclabs.index], rep(NA,5)),
                           c(sublabs[i], rep(NA,5)),
                           table_a21[start.rows[i]:stop.rows[i],])        
  } else {
    tab.out2[[i]] <- rbind(c(sublabs[i], rep(NA,5)), table_a21[start.rows[i]:stop.rows[i],])    
  }
}

tab.out2 <- do.call(rbind, tab.out2)

out <- print(xtable(tab.out2, align="llrrrrr", digits=3), include.rownames=FALSE)
out <- unlist(strsplit(out, "\n"))

out <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", out, fixed=TRUE)
out <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", out, fixed=TRUE)
out <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", out, fixed=TRUE)
out <- gsub("I. Monitoring vs. Control &  &  &  &  & ", "\\multicolumn{6}{c}{I. Monitoring vs. Control}", out, fixed=TRUE)
out <- gsub("II. Punitive vs. Control &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{II. Punitive vs. Control}", out, fixed=TRUE)
out <- gsub("III. Punitive vs. Monitoring &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{III. Punitive vs. Monitoring}", out, fixed=TRUE)

out <- gsub("(White vs. Black)", "", out, fixed=TRUE)
out <- gsub("(White vs. Hispanic)", "", out, fixed=TRUE)
out <- gsub("(Black vs. Hispanic)", "", out, fixed=TRUE)

out <- c(out[3:4],
         "\\resizebox{.8\\textwidth}{!}{",
         out[5:49],
         "}",
         "\\caption[Estimated Effects of Messaging on Net Discrimination Levels Among Subsample Excluding Likely Discrimination Cases]{Estimated Effects of Messaging on Net Discrimination Levels Among Subsample Excluding Likely Discrimination Cases. Cells contain ITT estimates from OLS models with inverse probability weights and block fixed effects. For each reference group versus comparison group pairing, outcomes are net discrimination measures against the comparison group relative to the reference group. Estimated effects that are positive (negative) are interpreted as increases (decreases) in net discrimination against the comparison group relative to the reference group. Estimated p-values are reported in parentheses; p-values correspond to a one-sided test of the null hypothesis of equality of means for the monitoring-control and punitive-control comparisons, and to a two-sided test of the null hypothesis of equality of means for the punitive-monitoring comparison and for all analyses involving net discrimination against Hispanic (vs. black) testers.}",
         "\\label{ittnocov}",
         out[50])

cat(out, file="out_tablea21_SA_ExcludeLD_itt_2g_blockfe_nocovs.tex", sep="\n")
write.csv(tab.out2, file="out_tablea21_SA_ExcludeLD_itt_2g_blockfe_nocovs.csv", row.names=FALSE)
```

## Table A22 (p. A-47): Distribution of Subjects by their Perceived Race

```{r tablea22, echo=T, warning=F}

dat$subj_race <- rep(NA, nrow(dat))
dat$subj_race <- ifelse(dat$primary_wht == 1 & dat$primary_blk != 1 & dat$primary_api != 1 & dat$primary_hsp != 1, "White", dat$subj_race)
dat$subj_race <- ifelse(dat$primary_wht != 1 & dat$primary_blk == 1 & dat$primary_api != 1 & dat$primary_hsp != 1, "Black", dat$subj_race)
dat$subj_race <- ifelse(dat$primary_wht != 1 & dat$primary_blk != 1 & dat$primary_api != 1 & dat$primary_hsp == 1, "Hispanic", dat$subj_race)
dat$subj_race <- ifelse(is.na(dat$subj_race), "Other", dat$subj_race)

tab_pcvdrace <- cbind(table(dat$subj_race), round(table(dat$subj_race)/nrow(dat), 2))
colnames(tab_pcvdrace) <- c("N", "Proportion")
pcvrace <- tab_pcvdrace
kable(tab_pcvdrace, caption="**Table A22**", col.names=c("Number of Subjects", "Proportion"))
```

```{r echo=F, eval=T, warning=F, include=FALSE}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# export results to tex
tab_pcvdrace <- print(xtable(tab_pcvdrace), include.rownames=TRUE)
tab_pcvdrace <- unlist(strsplit(tab_pcvdrace, "\n"))

tab_pcvdrace <- c(tab_pcvdrace[1:4],
                  "\\caption{Distribution of Subjects by their Perceived Race. A subject is classified as Black, Hispanic, or White if at least two testers in a matched trio perceive them to belong to that racial group. All other subjects are classified in the Other category.}\\label{pcvrace}",
                  "\\begin{tabular}{lrr}",
                  tab_pcvdrace[6],
                  "Subject's Perceived Race & Number of Subjects & Proportion \\\\",
                  tab_pcvdrace[8:15])

cat(tab_pcvdrace, file="out_tablea22_perceived_race_of_subjects.tex", sep="\n")
```

## Figure A6 (p. A-48): Estimated Effects of Monitoring Messaging on Net Discrimination Levels Relative to Control, by the Perceived Race of the Landlord/Broker

```{r figurea6, echo=T, warning=F, fig.width=11, fig.height=7}
# monitoring-control
fit_mc_llrace <- do.call(rbind, 
                         lapply(names(table(dat$subj_race)), function(J){
                           x<-rbind(summary(lm(ncb_wb ~ TA1 + factor(block), data=dat, subset=(TA %in% c(0,1) & subj_race==J), weights=ipw10))$coefficients["TA1",],
                                    summary(lm(ncb_wh ~ TA1 + factor(block), data=dat, subset=(TA %in% c(0,1) & subj_race==J), weights=ipw10))$coefficients["TA1",],
                                    summary(lm(noff_wb ~ TA1 + factor(block), data=dat, subset=(TA %in% c(0,1) & subj_race==J), weights=ipw10))$coefficients["TA1",],
                                    summary(lm(noff_wh ~ TA1 + factor(block), data=dat, subset=(TA %in% c(0,1) & subj_race==J), weights=ipw10))$coefficients["TA1",]
                           )
                           x<-data.frame(Pair = c("Net Discrimination\nAgainst Blacks (vs. Whites)", 
                                                  "Net Discrimination\nAgainst Hispanics (vs. Whites)", 
                                                  "Net Discrimination\nAgainst Blacks (vs. Whites)", 
                                                  "Net Discrimination\nAgainst Hispanics (vs. Whites)"),
                                         Outcome = c("Callback", "Callback", "Offer", "Offer"),
                                         SubjectRace = J,
                                         x,
                                         stringsAsFactors=FALSE)
                           return(x)
                         }))
# table(dat$subj_race[dat$TA %in% c(0,1)])
fit_mc_llrace$SubjectRace <- ifelse(fit_mc_llrace$SubjectRace == "Black", "Black\n(n=48)", fit_mc_llrace$SubjectRace)
fit_mc_llrace$SubjectRace <- ifelse(fit_mc_llrace$SubjectRace == "Hispanic", "Hispanic\n(n=61)", fit_mc_llrace$SubjectRace)
fit_mc_llrace$SubjectRace <- ifelse(fit_mc_llrace$SubjectRace == "Other", "Other\n(n=115)", fit_mc_llrace$SubjectRace)
fit_mc_llrace$SubjectRace <- ifelse(fit_mc_llrace$SubjectRace == "White", "White\n(n=229)", fit_mc_llrace$SubjectRace)

fit_mc_llrace$SubjectRace <- factor(fit_mc_llrace$SubjectRace, levels=c("White\n(n=229)", "Black\n(n=48)", "Hispanic\n(n=61)", "Other\n(n=115)"))

ggplot(fit_mc_llrace, aes(group=SubjectRace, colour=SubjectRace)) + 
  geom_linerange(aes(x=Outcome, ymin=Estimate-1.96*Std..Error, ymax=Estimate+1.96*Std..Error), lwd=1, position=position_dodge(width=1/2)) +
  geom_linerange(aes(x=Outcome, ymin=Estimate-1.64*Std..Error, ymax=Estimate+1.64*Std..Error), lwd=2, position=position_dodge(width=1/2)) +
  geom_point(aes(x=Outcome, y=Estimate), size=3.5, position=position_dodge(width=1/2)) + 
  scale_y_continuous(limits = c(-.5, .5), breaks=seq(-.5, .5, .1)) +
  xlab("\nNet Discrimination Outcome") +
  ylab("\nEstimated Effect of Sending\nMonitoring Message (vs. Control)\n") +
  geom_hline(yintercept=0, lty=2) + facet_wrap( ~ Pair) +
  scale_colour_brewer(palette = "Dark2", guide = guide_legend(title = "Perceived Race of Landlord/Broker:")) +
  theme_bw(base_size=15) + theme(legend.position="bottom", legend.key = element_blank())
ggsave("out_figurea6_appx_hetfxSUBJRACE_mc.pdf", width=11, height=7)
```

## Figure A7 (p. A-49): Estimated Effects of Punitive Messaging on Net Discrimination Levels Relative to Control, by the Perceived Race of the Landlord/Broker

```{r figurea7, echo=T, warning=F, fig.width=11, fig.height=7}
# punitive-control
fit_pc_llrace <- 
  do.call(rbind, 
   lapply(names(table(dat$subj_race)),
     function(J){
        x <- rbind(summary(lm(ncb_wb ~ TA2 + factor(block),
                               data=dat, 
                               subset=(TA %in% c(0,2) & subj_race==J),
                               weights=ipw20))$coefficients["TA2",],
                   
                   summary(lm(ncb_wh ~ TA2 + factor(block), 
                               data=dat, 
                               subset = (TA %in% c(0,2) & subj_race==J),
                               weights=ipw20))$coefficients["TA2",],
                   
                   summary(lm(noff_wb ~ TA2 + factor(block), 
                               data = dat,
                               subset =(TA %in% c(0,2) & subj_race==J),
                               weights = ipw20))$coefficients["TA2",],
                   
                   summary(lm(noff_wh ~ TA2 + factor(block), 
                               data = dat, 
                               subset = (TA %in% c(0,2) & subj_race==J), 
                               weights = ipw20))$coefficients["TA2",])
        
        x <- data.frame(Pair = c("Net Discrimination\nAgainst Blacks (vs. Whites)", 
                                 "Net Discrimination\nAgainst Hispanics (vs. Whites)", 
                                 "Net Discrimination\nAgainst Blacks (vs. Whites)", 
                                 "Net Discrimination\nAgainst Hispanics (vs. Whites)"),
                        Outcome = c("Callback", "Callback", "Offer", "Offer"),
                        SubjectRace = J,
                        x,
                        stringsAsFactors = FALSE)
  return(x)
}))

# table(dat$subj_race[dat$TA %in% c(0,2)])
fit_pc_llrace$SubjectRace <- ifelse(fit_pc_llrace$SubjectRace == "Black", "Black\n(n=59)", fit_pc_llrace$SubjectRace)
fit_pc_llrace$SubjectRace <- ifelse(fit_pc_llrace$SubjectRace == "Hispanic", "Hispanic\n(n=59)", fit_pc_llrace$SubjectRace)
fit_pc_llrace$SubjectRace <- ifelse(fit_pc_llrace$SubjectRace == "Other", "Other\n(n=112)", fit_pc_llrace$SubjectRace)
fit_pc_llrace$SubjectRace <- ifelse(fit_pc_llrace$SubjectRace == "White", "White\n(n=249)", fit_pc_llrace$SubjectRace)

fit_pc_llrace$SubjectRace <- factor(fit_pc_llrace$SubjectRace, levels=c("White\n(n=249)", "Black\n(n=59)", "Hispanic\n(n=59)", "Other\n(n=112)"))

ggplot(fit_pc_llrace, aes(group=SubjectRace, colour=SubjectRace)) + 
  geom_linerange(aes(x=Outcome, ymin=Estimate-1.96*Std..Error, ymax=Estimate+1.96*Std..Error), lwd=1, position=position_dodge(width=1/2)) +
  geom_linerange(aes(x=Outcome, ymin=Estimate-1.64*Std..Error, ymax=Estimate+1.64*Std..Error), lwd=2, position=position_dodge(width=1/2)) +
  geom_point(aes(x=Outcome, y=Estimate), size=3.5, position=position_dodge(width=1/2)) + 
  scale_y_continuous(limits = c(-.6, .6), breaks=seq(-.8, .8, .1)) +
  xlab("\nNet Discrimination Outcome") +
  ylab("\nEstimated Effect of Sending\nPunitive Message (vs. Control)\n") +
  geom_hline(yintercept=0, lty=2) + facet_wrap( ~ Pair) +
  scale_colour_brewer(palette = "Dark2", guide = guide_legend(title = "Perceived Race of Landlord/Broker:")) +
  theme_bw(base_size=15) + theme(legend.position="bottom", legend.key = element_blank())

ggsave("out_figurea7_appx_hetfxSUBJRACE_pc.pdf", width=11, height=7)
```

## Appendix E.15 (p. A-50): Analyses Addressing Spillover Concerns

For transparency, we show the code we use to conduct the analyses reported in Appendix E.15. However, we have omitted the raw data files that are used to conduct these analyses (i.e., every scraped ad from Craigslist) because they contain personal identifying information about human subjects.

```{r appendixe15, eval=F, echo=T}
setwd("") # set to directory containing all scraped Craigslist ads (before sampling)
# dirs1 - subdirectory listing, one per day

phone <- NULL # one row for each ad
phone_nums <- NULL 
for(i in 1:length(dirs1)) {
  
  thisdir <- dirs1[i]
  subdirs <- list.files(thisdir)
  subdirs <- subdirs[which(!grepl(".csv", subdirs))]
  
  if(length(subdirs) > 0) {
    
    subphone <- array(NA, length(subdirs))
    entry <- array(NA, length(subdirs))
    for(j in 1:length(subdirs)) { # one subdir per listing each day
      
      listing <- list.files(paste0(thisdir, "/", subdirs[j]))
      listing <- listing[which(grepl(".html", listing))]
      if(length(listing) > 1) { listing <- listing[1] }
      list_source <- readLines(paste0(thisdir, "/", subdirs[j], "/", listing))
      
      entry[j] <- paste(thisdir, subdirs[j], sep = "_")
      subphone[j] <- ifelse(length(which(str_extract_all(list_source, 
      "\\(?\\d{3}\\)?[.-]? *\\d{3}[[:space:]]*[.-]+ *[.-[:space:]]?\\d{4}",
      simplify = TRUE) != "")) > 0, 1, 0)
      if(subphone[j] == 1) {
        phone_vec <- str_extract_all(list_source, 
        "\\(?\\d{3}\\)?[.-]? *\\d{3}[[:space:]]*[.-]+ *[.-[:space:]]?\\d{4}",
        simplify = TRUE)
      phone_nums <- c(phone_nums, unique(phone_vec[which(phone_vec != "")])) # keep all phone numbers here (not available)
      }
  }
 phone <- rbind(phone, cbind(entry, subphone))
}
}

phone_df <- as.data.frame(phone)
names(phone_df)[2] <- "hasphonenum"
phone_df$hasphonenum <- as.numeric(as.character(phone_df$hasphonenum))

phone_df
# # A tibble: 85,981 x 2
#             entry hasphonenum
#             <chr>       <dbl>
# 1   2012-04-13_1           1
# 2  2012-04-13_10           1
# 3 2012-04-13_100           0
# 4 2012-04-13_101           1
# 5 2012-04-13_102           1
# 6 2012-04-13_103           1
# 7 2012-04-13_104           0
# 8 2012-04-13_105           1
# 9 2012-04-13_106           0
# 10 2012-04-13_107          1
# # ... with 85,971 more rows


# > length(unique(phone_df$entry))
# [1] 85981
# > sum(phone_df$hasphonenum)
# [1] 47978
# > mean(phone_df$hasphonenum) ### estimated proportion of rental listings requiring contact by phone 
# [1] 0.558007

### probability a subject enters the audit sample
2711 / 85981
# [1] 0.03153022
### probability a subject enters the experiment sample
653 / 85981
# [1] 0.007594701

### estimated percentage of duplicate-subject listings by phone number
# > length(which(duplicated(phone_nums)))/length(phone_nums) # 78.96%

0.7896 * 85981
# [1] 67890.6
85981-67890
# [1] 18091
2711 / 18091
# [1] 0.1498535
653 / 18091
# [1] 0.0360953


### Capture-recapture analysis ###

phone_df <- tbl_df(as.data.frame(phone[which(phone[,2] == "1"),])) # keep only listings with phone numbers
phone_df <- phone_df[which(!duplicated(phone_nums)),] # de-dupe using list of phone numbers from above (not available)

# sample 1 - which are marked

set.seed(9382)

phone_s1 <- sample(phone_df$entry, 1000)
#sum(as.numeric(as.character(phone_s1))) # 538

# sample 2

phone_s2 <- sample(phone_df$entry, 1000)

length(which(phone_s2 %in% phone_s1)) # 95

# estimate pop

1000 / (95/1000) # [1] 10526.32

# using 55.8% est. with phone #, this means:

10526.32 / .558 # 18864.37
```

## Appendix E.16 (p. A-51): Joint Distribution of the Number of Testers in Matched Trios Who Receive a Callback and an Offer

```{r appendixe16, eval=T, echo=T, warning=F}
dat$num_cb <- apply(dat[,c("cb_A", "cb_B", "cb_C")], 1, sum)
dat$num_off <- apply(dat[,c("off_A", "off_B", "off_C")], 1, sum)

appxe16 <- with(dat, table(num_cb, num_off))

appxe16a <- cbind(appxe16, rowSums(appxe16))
appxe16b <- round(cbind(appxe16, rowSums(appxe16)) / rowSums(appxe16) * 100, 2)

appxe16_table <- list()
for(i in 1:nrow(appxe16a)) {
  appxe16_table[[i]] <- rbind(as.character(round(appxe16a[i,], 0)),
                               paste0("(", sprintf("%.2f", appxe16b[i,]) ,")"))
}
appxe16_table <- do.call(rbind, appxe16_table)
appxe16_table <- cbind(c("Number of Testers Receiving a Callback: 0", "",
                         "Number of Testers Receiving a Callback: 1", "",
                         "Number of Testers Receiving a Callback: 2", "",
                         "Number of Testers Receiving a Callback: 3", ""),
                       appxe16_table)

colnames(appxe16_table) <- c("", paste(rep("Number of Testers Receiving an Offer:", 4), 0:3), "Row Totals")
  
kable(appxe16_table, caption="**Appendix E.16: Number of Testers in a Matched Trio Receiving an Offer by the Number of Testers in a Matched Trio Receiving a Callback.** Cells report counts with percentages in parentheses.")
```