# 0. Packages

In [None]:
rm(list = ls())
library(dplyr)
library(stringr)

CapStr <- function(y) {
  c <- strsplit(y, " ")[[1]]
  paste(toupper(substring(c, 1,1)), substring(c, 2),
      sep="", collapse=" ")
}


# 1. Load teacher treatment status and student rosters

In [None]:
teacher_txstatus = read.csv("../randomization_status/fallsemschools_summarizeRA.csv")


In [None]:
student_rosters = read.csv("../data/analysis_data/student_roster_sent01312020.csv")

# 2. Create common school name and match within a school

In [None]:
teacher_txstatus = teacher_txstatus %>%
            mutate(school_init = gsub("\\_.*", "", strata_id),
                  school_final = ifelse(grepl("chec", school_init), "chec", school_init))

table(teacher_txstatus$school_final)

In [None]:
student_rosters = student_rosters %>%
                mutate(school = 
                      case_when(grepl("Ana", InstitutionName) ~ "anacostia",
                               grepl("Columbia", InstitutionName) ~ "chec",
                               grepl("Dunbar", InstitutionName) ~ "dunbar"))

## non-unique count of students
table(student_rosters$school)

## 2.1 match and code anacostia

In [None]:
return_teacher_matches <- function(roster_data, randomization_data, school_name){
    
    ## get list of teachers in a school
    teachers_searchin = unique(roster_data$TeacherName1[roster_data$school == school_name])
    
    ## get teachers randomized to treatment
    tx_teachers = unique(randomization_data$teacher_id[randomization_data$school_final == school_name & 
                                        randomization_data$treatment == "Pilot"])
    control_teachers = unique(randomization_data$teacher_id[randomization_data$school_final == school_name & 
                                        randomization_data$treatment == "Not pilot"])
    
    ## get matches with tx teachers
    tx_teacher_matches = unlist(lapply(tx_teachers, function(x) agrep(x, teachers_searchin, value = TRUE,
                                                         max = 4)))
    print(sprintf("%s original treatment teachers were: %s", length(tx_teachers), paste(tx_teachers, collapse = "; ")))
    print(sprintf("%s matched treatment teachers are: %s", length(tx_teacher_matches), paste(tx_teacher_matches, collapse = "; ")))
                                       
    ## get matches with control teachers
    control_teacher_matches = unlist(lapply(control_teachers, function(x) agrep(x, teachers_searchin, value = TRUE,
                                                         max = 4)))
    print(sprintf("%s original control teachers were: %s", length(control_teachers), paste(control_teachers, collapse = "; ")))
    print(sprintf("%s matched control teachers are: %s", length(control_teacher_matches), 
                  paste(control_teacher_matches, collapse = "; ")))
                                            
    ## store in a list and return
    matched_teachers = list(tx_teacher_matches = tx_teacher_matches,
                            control_teacher_matches = control_teacher_matches)
                                       
    return(matched_teachers)
    
}

In [None]:
code_studentlevel_txstatus <- function(matched_teachers, roster_data, school_name){
    
    studlevel_RA = roster_data %>%
                filter(school == school_name) %>%
                mutate(T_teacher = ifelse(TeacherName1 %in% matched_teachers$tx_teacher_matches, 1, 0),
                C_teacher = ifelse(TeacherName1 %in% matched_teachers$control_teacher_matches, 1, 0)) %>%
                group_by(StudentID) %>%
                summarise(tx_summary = paste(T_teacher, collapse = "; "), #all teachers in tx (since 
                         #teachers can be repeated across terms)
                        control_summary = paste(C_teacher, collapse= "; "), #all teachers in control
                        any_tx = ifelse(grepl("1", tx_summary), 1, 0), #at least one tx teacher
                        any_control = ifelse(grepl("1", control_summary), 1, 0), # at least one control teacher
                        student_grade = unique(StudentGrade),
                        final_status = 
                        case_when(any_tx == 1 & any_control == 1 ~ "Both types",
                                             any_tx == 1 & any_control == 0 ~ "Tx only",
                                             any_tx == 0 & any_control == 1 ~ "Control only",
                                             any_tx == 0 & any_control == 0 ~ "Neither"))
    
    return(studlevel_RA)
    
    
}

get_write_multipletx = function(roster_data, student_summary,
                               matched_teachers, schoolname){
    
    studlevel_crossover = roster_data %>%
                    filter(StudentID %in% student_summary$StudentID[student_summary$final_status == "Both types"]) %>%
                    mutate(T_teacher = ifelse(TeacherName1 %in% matched_teachers$tx_teacher_matches, 1, 0),
                C_teacher = ifelse(TeacherName1 %in% matched_teachers$control_teacher_matches, 1, 0)) %>%
                group_by(StudentID) %>%
                filter(T_teacher == 1 | C_teacher == 1) %>% arrange(StudentID)
    write.csv(studlevel_crossover, 
             sprintf("../data_checkwithteachertext/%s_multipleassigned.csv",
                    schoolname))
    return(studlevel_crossover)
    
    
}

In [None]:
matched_teachers_ana = return_teacher_matches(roster_data  = student_rosters,
                                             randomization_data = teacher_txstatus,
                                             school_name = "anacostia")


In [None]:
studlevel_RA_ana = code_studentlevel_txstatus(matched_teachers_ana, student_rosters, "anacostia")

## get schedules of students with multiple tx status
crossover_ana = get_write_multipletx(roster_data = student_rosters, 
                                    student_summary = studlevel_RA_ana,
                                    matched_teachers = matched_teachers_ana,
                                    schoolname = "anacostia")

## 2.2 Match and code chec

In [None]:
## first, since chec hs only had emails rather than names
## code to names and replace that col in main data
chec_emails = teacher_txstatus %>% filter(school_init == "checHS") %>%
            mutate(first_name = unlist(lapply(gsub("\\..*", "", teacher_id), CapStr)),
                  last_name = unlist(lapply(str_match(teacher_id, "\\.(.*)@")[, 2], CapStr))) %>%
            dplyr::select(-teacher_id) %>%
            mutate(teacher_id = sprintf("%s %s", first_name, last_name)) %>%
            dplyr::select(-first_name, -last_name)

others = teacher_txstatus %>% filter(school_init != "checHS")

teacher_txstatus_updated = rbind.data.frame(chec_emails, others) %>%
                    mutate(treatment = ifelse(treatment %in% c("No pilot", "Not pilot"),
                                             "Not pilot", "Pilot"))



In [None]:
matched_teachers_chec = return_teacher_matches(roster_data  = student_rosters,
                                             randomization_data = teacher_txstatus_updated,
                                             school_name = "chec")

In [None]:
## resolve discrepancies
chec_final_matched = c(setdiff(matched_teachers_chec$tx_teacher_matches, 
                            chec_teach_falsepos), chec_teach_falseneg)

matched_teachers_chec$tx_teacher_matches = chec_final_matched


studlevel_RA_chec = code_studentlevel_txstatus(matched_teachers_chec, student_rosters, "chec") %>%
                mutate(campus = ifelse(student_grade %in% c("6", "7", "8"), "ms", "hs"))

crossover_chec = get_write_multipletx(roster_data = student_rosters, 
                                    student_summary = studlevel_RA_chec,
                                    matched_teachers = matched_teachers_chec,
                                    schoolname = "chec")

## noticed when examining that some teachers 
## (eg mcfadden) have disabled in their email
## maybe left school?


## 3.3 match and code dunbar

In [None]:
matched_teachers_dunbar = return_teacher_matches(roster_data  = student_rosters,
                                             randomization_data = teacher_txstatus_updated,
                                             school_name = "dunbar")

studlevel_RA_dunbar = code_studentlevel_txstatus(matched_teachers_dunbar, student_rosters, "dunbar")
table(studlevel_RA_dunbar$final_status)

crossover_dunbar = get_write_multipletx(roster_data = student_rosters, 
                                    student_summary = studlevel_RA_dunbar,
                                    matched_teachers = matched_teachers_dunbar,
                                    schoolname = "dunbar")

# 4. Bind into a single file and write for use in messaging data analysis

In [None]:
ranames = list(studlevel_RA_ana %>% mutate(school = "anacostia"), 
               studlevel_RA_chec %>% mutate(school = sprintf("chec_%s", campus)) %>% dplyr::select(-campus),
              studlevel_RA_dunbar %>% mutate(school = "dunbar"))
ra_combined = do.call(rbind.data.frame, ranames)

write.csv(ra_combined, "../randomization_status/allstudent_RAstatus.csv",
         row.names = FALSE)
