# 0. Packages and functions

In [None]:
library(dplyr)
library(blockrand)
library("experiment")


In [None]:
get_grades_eachteacher <- function(data, teacher_identifier){
    
    data_gradedist = data %>%
                group_by_at(vars(one_of(teacher_identifier))) %>%
                summarise(all_grades = paste(Grade, collapse= ";")) %>%
                mutate(ms = ifelse(grepl("6|7|8", all_grades), 1, 0),
                  hs = ifelse(grepl("9|10|11|12", all_grades), 1, 0),
                  both_ms_and_hs = ifelse(ms == 1 & hs == 1, 1, 0))
    return(data_gradedist)
    
    
    
}

parse_course_info <- function(data, colname){
    
    course_info = strsplit(as.character(data[[colname]]),'-')
    course_info_cols = do.call(rbind, course_info)
    colnames(course_info_cols) = c("course_name", "teacher", "period")
    data_wcourse = cbind.data.frame(data, course_info_cols)
    return(data_wcourse)
    
}

find_course_grade_periods_byteacher <- function(data, teacher_identifier){
    
    ## initialize storage vectors
    unique_courses = c()
    unique_grades = c()
    unique_periods = c()
    
    ## get all identifiers
    all_teachers = unique(data[[teacher_identifier]])
    
    ## iterate through teachers
    for(i in 1:length(all_teachers)){
    
        ## subset to one teacher teacher
        one_teacher = data[data[[teacher_identifier]] == all_teachers[i], ]
        print(sprintf("Returning values for %s", all_teachers[i]))
    
        ## add their courses
        unique_courses = c(unique_courses, paste(unique(one_teacher$course_name), collapse = "; "))
        unique_grades  = c(unique_grades, paste(unique(one_teacher$Grade), collapse = ";"))
        unique_periods = c(unique_periods, paste(unique(one_teacher$period), collapse = ";"))
        
    }
    
    ## get number of students per teacher
    n_students_perteacher = data %>% group_by_at(vars(one_of(teacher_identifier))) %>% 
                            summarise(total_students = sum(Students))
    
    ## left join n students per teacher onto other teacher attributes
    teacher_summary =merge(data.frame(teacher_id = all_teachers,
                            courses = unique_courses,
                            grades = unique_grades,
                            periods = unique_periods),
                           n_students_perteacher,
                            by.x = "teacher_id",
                           by.y = teacher_identifier,
                           all.x = TRUE)
    ## return data
    return(teacher_summary)

    
}

aggregate_courses_grades <- function(longform_data, vars_finddummies, teacher_identifier,
                                    wide_teacher_summary){
    
    ## first create dummy vars
    dummy_vars =  model.matrix(formula(sprintf("~ %s - 1",
                                      paste(vars_finddummies, 
                                           collapse = "+"))), data=longform_data)
    
    ## add to original data
    data_withdummies = cbind.data.frame(longform_data, dummy_vars)
    
    ## above creates longform data 
    ## find how many times a teacher teaches each course
    ## and find avg grade they teach
    total_courses_byteacher = data_withdummies %>%
                        dplyr::select(-course_name) %>%
                         group_by_at(vars(one_of(teacher_identifier))) %>%
                         summarise_at(vars(contains("course_name")), 
                         ~sum(.x)) 
    
    avg_grade_byteacher = longform_data %>%
            group_by_at(vars(one_of(teacher_identifier))) %>%
            mutate(grade_numeric = as.numeric(as.character(Grade))) %>%
            summarise(avg_grade = mean(grade_numeric))
    
    ## merge with summaries

    teacher_summary_wcourses = merge(wide_teacher_summary,
                                    total_courses_byteacher,
                                    by.x = "teacher_id",
                                    by.y = teacher_identifier)
    teacher_summary_all = merge(teacher_summary_wcourses,
                               avg_grade_byteacher,
                               by.x = "teacher_id",
                                by.y = teacher_identifier)
    return(teacher_summary_all)
    
}

## do block randomization within strata
blockrand_withstrata <- function(strata_level, data, strata_varname){
    
    df_strata = data[data[[strata_varname]] == strata_level, ] 
    df_strata = df_strata %>% mutate(id = 1:nrow(df_strata))
    block_output = blockrand(n = nrow(df_strata),
                            num.levels = 2, 
                            levels = c("Pilot", "Not pilot"),
                            block.sizes = 2)
    
    ## left join 
    df_strata_withtx = merge(df_strata, block_output, by = "id",
                            all.x = TRUE)
    return(df_strata_withtx)
    
}

# 0. Friendship 



In [None]:
friendship_homerooms = read.csv("../data/friendship_homeroom.csv")

friendship_homerooms = friendship_homerooms %>%
                mutate(first_name = gsub("\\s+", "", 
                        gsub(".*\\,", "", teacher)))


gender_df = lapply(friendship_homerooms$first_name, gender)




In [None]:
gender_df_all = do.call(rbind.data.frame, gender_df) %>% dplyr::select(name, gender)

friendship_homerooms_wgender = merge(friendship_homerooms, gender_df_all,
                                    by.x = "first_name",
                                    by.y = "name",
                                    all.x = TRUE) %>%
                            mutate(gender_final = ifelse(first_name == "Jaraina", "female", 
                                                        gender))



In [None]:
head(friendship_homerooms_wgender)

In [None]:
## create strata based on gender, grade, and special education
friendship_homerooms_wgender = friendship_homerooms_wgender %>%
                    mutate(strata_name = ifelse(grade_9 == 1 & gender_final == "female",
                                               "9th grade female",
                                        ifelse(grade_9 == 1 & gender_final == "male",
                                              "9th grade male",
                                        ifelse(grade_10 == 1 & gender_final == "female",
                                              "10th grade female",
                                        ifelse(grade_10 == 1 & gender_final == "male",
                                               "10th grade male",
                                               "Special education")))))


In [None]:
## randomize within strata
set.seed(91988)
strata_levels = unique(friendship_homerooms_wgender$strata_name)
friendship_randomization  = lapply(strata_levels,  blockrand_withstrata, 
                data = friendship_homerooms_wgender, 
               strata_varname = 'strata_name')

In [None]:
friendship_randomization_df = do.call(rbind.data.frame,
                                     friendship_randomization) %>%
                        filter(!duplicated(teacher))

## write results
## write to csv
write.csv(friendship_randomization_df, "../randomization_status/friendship_teacherpilotstatus.csv",
          row.names = FALSE)

friendship_randomization_df %>% filter(treatment == "Pilot") %>%
                dplyr::select(teacher, grade_9, grade_10, special_education) %>%
                arrange(grade_9, grade_10, special_education, teacher) %>%
                filter(special_education == 1) %>%
                dplyr::select(teacher)

# 0. Looking at Paul

In [None]:
find_course_grade_periods_byteacher_noN <- function(data, teacher_identifier){
    
    ## initialize storage vectors
    unique_courses = c()
    unique_grades = c()
    unique_periods = c()
    
    ## get all identifiers
    all_teachers = unique(data[[teacher_identifier]])
    
    ## iterate through teachers
    for(i in 1:length(all_teachers)){
    
        ## subset to one teacher teacher
        one_teacher = data[data[[teacher_identifier]] == all_teachers[i], ]
        print(sprintf("Returning values for %s", all_teachers[i]))
    
        ## add their courses
        unique_courses = c(unique_courses, paste(unique(one_teacher$course_name), collapse = "; "))
        unique_grades  = c(unique_grades, paste(unique(one_teacher$Grade), collapse = ";"))
        unique_periods = c(unique_periods, paste(unique(one_teacher$period), collapse = ";"))
        
    }
    

    
    ## left join n students per teacher onto other teacher attributes
    teacher_summary =data.frame(teacher_id = all_teachers,
                            courses = unique_courses,
                            grades = unique_grades,
                            periods = unique_periods)
    ## return data
    return(teacher_summary)

    
}

In [None]:
paul_sis = read.csv("../data/paul_electives.csv")
paul_maskteachers = read.csv("../data/paul_maskteachers.csv")
paul_teachernames = paste(paul_maskteachers$teacher_name, collapse = "|")

paul_ms = paul_sis %>% filter(Grade %in% c(6, 7, 8))
paul_hs = paul_sis %>% filter(!Grade %in% c(6, 7, 8))

paul_hs_science =  paul_hs %>% filter(grepl('science', Subject)) %>%
                    filter(grepl(paul_teachernames, Primary.Teacher))
unique(paul_hs_science$Primary.Teacher)

paul_hs_science_courses = parse_course_info(paul_hs_science, "Name") %>%
                                mutate(teacher = trimws(teacher)) 


paul_hs_science_summary = find_course_grade_periods_byteacher_noN(paul_hs_science_courses,
                                                                 "Primary.Teacher") %>%
                            mutate(strata_name = ifelse(grades %in% c(9, 10),
                                                  "9th and 10th grade",
                                                  "11th and 12th grade")) %>%
                            arrange(desc(strata_name))

paul_hs_science_summary

## randomize
set.seed(91988)
strata_levels = unique(paul_hs_science_summary$strata_name)
paul_hs_randomization = lapply(strata_levels,  blockrand_withstrata, 
                data = paul_hs_science_summary, 
               strata_varname = 'strata_name') 

paul_hs_randomization_df = do.call(rbind.data.frame, paul_hs_randomization)
paul_hs_randomization_df


In [None]:
paul_ms_electives = paul_ms %>% filter(Subject == "PE and health" | Subject == "other")
paul_ms_electives_courses = parse_course_info(paul_ms_electives, "Name") %>%
                                mutate(teacher = trimws(teacher)) %>%
                        filter(!grepl("Algebra", course_name) &
                              !grepl("Lunch", course_name))

paul_ms_electives_summary = find_course_grade_periods_byteacher_noN(paul_ms_electives_courses,
                                                                 "Primary.Teacher") %>%
                            filter(grepl("Physical", courses)) %>%
                            mutate(strata_name = "one_strata")

strata_levels = unique(paul_ms_electives_summary$strata_name)
paul_ms_randomization = lapply(strata_levels,  blockrand_withstrata, 
                data = paul_ms_electives_summary, 
               strata_varname = 'strata_name')

paul_ms_randomization



## Re-randomize for second-sem rollout

In [None]:
set.seed(040488)
paul_ms_randomization_spring = lapply(strata_levels,  blockrand_withstrata, 
                data = paul_ms_electives_summary, 
               strata_varname = 'strata_name')

paul_ms_randomization_spring

# 1. CHEC HS randomization

## 1.1 Load SIS data

In [None]:
# Load data
chec_SIS = read.csv("../data/chec_ELA_fromSIS.csv")
chec_teachersexclude = read.csv("../data/chec_teachersexclude.csv")

# look at distribution of grades by teacher
## before subsetting to HS teachers
chec_distgrades = get_grades_eachteacher(data = chec_SIS, teacher_identifier = "Teacher.Email")



In [None]:
## subset to high school teachers
chec_SIS_hs = chec_SIS %>% filter(Grade %in% c(9, 10, 11, 12))

## add separated cols to data
chec_SIS_hs_wcourse= parse_course_info(chec_SIS_hs, "Name")

## 1.2 Explore data to figure out blocking (high school)

In [None]:
teacher_summary = find_course_grade_periods_byteacher(data = chec_SIS_hs_wcourse, 
                                          teacher_identifier = "Teacher.Email")

In [None]:
chec_SIS_forblock  = aggregate_courses_grades(chec_SIS_hs_wcourse,
                               c("course_name", "Grade"),
                               "Teacher.Email",
                               teacher_summary)

In [None]:
## clean up names
clean_colnames = gsub("\\s+|\\&|course\\_name", "", colnames(chec_SIS_forblock))
colnames(chec_SIS_forblock) = clean_colnames

sprintf("Data for blocking/randomization")
chec_SIS_forblock

## 1.3 Randomize using blocking

Vars to match on:

- Total students a teacher has. Why? Capacity/burnout
- Main courses: English I, English II, English III, English IV 

Why I didn't match on others:
- Didn't block on extended literacy and reading support since those seem to occur at same time as the English I-IV courses
- Didn't block on AP lang/lit since same teachers who teach English III teach AP lang; Same teachers who teach English IV teach AP lit, so can't have those as blocking vars due to singular matrix/etc..
- Grade is same as course 

In [None]:
## create diff matching formulas 
match_formula_course_students_grade = formula(sprintf("~ total_students + 
                                EnglishI + EnglishII + EnglishIII + EnglishIV + avg_grade"))

match_formula_students_grade =  formula(sprintf("~ total_students + avg_grade"))

match_formula_grade =  formula(sprintf("~ avg_grade"))


In [None]:
chec_SIS_forblock_final = chec_SIS_forblock %>%
                    filter(!teacher_id %in% chec_teachersexclude$teacher_email) 
nrow(chec_SIS_forblock_final)

chec_SIS_forblock_final


In [None]:
## randomize
## set seed
set.seed(91988)
randobj_match_coursestudentsgrade = randomize(chec_SIS_forblock_final,
                 group = c("Pilot", "No pilot"), # omitted ratio arg bc default is 0.5
                 n.block = 7,
                match = match_formula_course_students_grade ,
                complete = TRUE)
randobj_match_studentsgrade = randomize(chec_SIS_forblock_final,
                 group = c("Pilot", "No pilot"), # omitted ratio arg bc default is 0.5
                 n.block = 7,
                match = match_formula_students_grade ,
                complete = TRUE)
randobj_match_grade = randomize(chec_SIS_forblock_final,
                 group = c("Pilot", "No pilot"), # omitted ratio arg bc default is 0.5
                 n.block = 7,
                match = match_formula_grade ,
                complete = TRUE)





df_withpairs= randobj_match_studentsgrade$data
df_withpairs = df_withpairs %>%
             mutate(treatment_status_CSG = randobj_match_coursestudentsgrade[["treatment"]],
                    pair_id_CSG = randobj_match_coursestudentsgrade$match.id,
                    treatment_status_SG = randobj_match_studentsgrade[["treatment"]],
                    pair_id_SG = randobj_match_studentsgrade$match.id,
                    treatment_status_G = randobj_match_grade[["treatment"]],
                    pair_id_G = randobj_match_grade$match.id,
                   Teacher.Email = chec_SIS_forblock_final$Teacher.Email)



In [None]:
## merge back with teacher summary and wide form pairs

### summarize pairs
df_withpairs_withattributes = merge(df_withpairs %>% dplyr::select(Teacher.Email, 
                                                                  contains("treatment_status_CSG"),
                                                                  contains("pair_id_CSG")),
                                   teacher_summary %>% dplyr::select(-excluded_in_handwritten), 
                                    by = "Teacher.Email", all.x = TRUE) %>%
                                arrange(pair_id_CSG, treatment_status_CSG)

df_withpairs_withattributes

df_withpairs_withattributes_SG = merge(df_withpairs %>% dplyr::select(Teacher.Email, 
                                                                  contains("treatment_status_SG"),
                                                                  contains("pair_id_SG")),
                                   teacher_summary %>% dplyr::select(-excluded_in_handwritten), 
                                    by = "Teacher.Email", all.x = TRUE) %>%
                                arrange(pair_id_SG, treatment_status_SG)

df_withpairs_withattributes_SG

df_withpairs_withattributes_G = merge(df_withpairs %>% dplyr::select(Teacher.Email, 
                                                                  contains("treatment_status_G"),
                                                                  contains("pair_id_G")),
                                   teacher_summary %>% dplyr::select(-excluded_in_handwritten), 
                                    by = "Teacher.Email", all.x = TRUE) %>%
                                arrange(pair_id_G, treatment_status_G)

df_withpairs_withattributes_G


# 1.3 Write results

In [None]:
pilot_teachers = chec_SIS_hs_wcourse %>%
                filter(Teacher.Email %in% 
                       (df_withpairs_withattributes %>% filter(treatment_status_CSG == "Pilot"))$Teacher.Email) %>%
                arrange(Primary.Teacher, period)
pilot_teachers


In [None]:
## merge back with teacher name
pilot_teachers = chec_SIS_hs[chec_SIS_hs['Teacher.Email'].isin(df_withpairs_withattributes['Teacher.Email'])]
pilot_teachers

In [None]:
## write two versions of the results

## version one for school: teacher names and emails for pilot group
teachers_in_pilot = df_withpairs_withattributes %>% filter(treatment_status_CSG == "Pilot") %>%
                        dplyr::select(Teacher.Email) %>%
                        left_join(chec_SIS_hs %>% dplyr::select(Teacher.Email, Primary.Teacher) %>%
                                         filter(!duplicated(Teacher.Email)),
                                 by = "Teacher.Email")
teachers_in_pilot
##write.csv(teachers_in_pilot,
  ##        "../randomization_status/teachers_in_pilot_FORSCHOOL.csv",
    ##      row.names = FALSE)

## version two for us: all name/info
##write.csv(df_withpairs_withattributes_SG, 
  ##        "../randomization_status/teachers_in_pilot_FORLAB.csv")



# 1.4 Use whole roster to create list of teachers to exclude


In [None]:
pilot_teachers_chec = read.csv("../randomization_status/teachers_in_pilot_FORSCHOOL.csv")

In [None]:
chec_yl_all = read.csv("../data/chec_yl_all.csv")
chec_t1_all = read.csv("../data/chec_t1_all.csv")

In [None]:
chec_yl_all_wcourses = parse_course_info(data = chec_yl_all, colname = "Name")
chec_yl_all_wcourses_hs = chec_yl_all_wcourses[chec_yl_all_wcourses[['Grade']] %in% c(9, 10, 11, 12), ]
chec_t1_all_wcourses = parse_course_info(data = chec_t1_all, colname = "Name")
chec_t1_all_wcourses_hs = chec_t1_all_wcourses[chec_t1_all_wcourses[['Grade']] %in% c(9, 10, 11, 12), ]


In [None]:
##bind into one
chec_all = rbind.data.frame(chec_t1_all_wcourses_hs,
                           chec_yl_all_wcourses_hs)
head(chec_all)

In [None]:
sprintf("CHEC has %s unique teachers teaching %s unique courses (either half-year or full year) before removing pilot",
       length(unique(chec_all[['Primary.Teacher']])),
       length(unique(chec_all[['course_name']])))

In [None]:
## remove pilot teachers 
nonpilot_teachers = setdiff(chec_all[['Primary.Teacher']], pilot_teachers_chec[['Primary.Teacher']])

In [None]:
## look at two intersections
pilot_notin_roster = setdiff(pilot_teachers_chec[['Primary.Teacher']], chec_all[['Primary.Teacher']])
sprintf("Pilot teachers not on updated roster: %s", pilot_notin_roster)

## check if in there without a middle name
alternate_names_pilot = unique(grep(chec_alternatenames, chec_all[['Primary.Teacher']], value = TRUE))
admin_find = unique(grep(chec_admin,
                 chec_all[['Primary.Teacher']], value = TRUE))

sprintf("Pilot teachers' alternate names to exclude: %s", alternate_names_pilot)
sprintf("Admins to exclude: %s", admin_find)


In [None]:
## teachers to active are: 1. pilot teachers, 2. alternate spelling of two 
## pilot teachers, and admins
teachers_dontblock= c(intersect(pilot_teachers_chec[['Primary.Teacher']],
                                   chec_all[['Primary.Teacher']]), admin_find, alternate_names_pilot)
sprintf("Teachers or admins whose accounts shouldnt be suspended: %s", paste(teachers_dontblock, collapse = ";"))

In [None]:
## read in file that has teacher emails
full_roster_emails = read.csv("../data/chec_fullroster_fromSIS.csv")

## get ids and names for teachers to suspend
teachers_tosuspend = chec_all %>% filter(!Primary.Teacher %in% teachers_dontblock &
                                        !Teacher.ID == "#N/A") %>%
                    dplyr::select(Primary.Teacher, Teacher.ID) %>%
                    mutate(last_name = gsub(".*\\s", "", Primary.Teacher)) %>%
                    arrange(last_name) %>%
                    dplyr::select(-last_name) %>%
                    filter(!duplicated(Teacher.ID)) 


In [None]:
teachers_tosuspend_wemails = merge(teachers_tosuspend, 
                                 full_roster_emails %>% dplyr::select(Email, Teacher.ID),
                                 by = "Teacher.ID",
                                 all.x = TRUE) %>%
                            arrange(Email)

In [None]:


write.csv(teachers_tosuspend_wemails, "../randomization_status/CHEC_teacherstosuspend.csv", row.names = FALSE)

# 2. CHEC MS 


In [None]:
chec_t1 = read.csv("../data/chec_t1_all.csv") %>% mutate(course_type = "term1_only")
chec_yl = read.csv("../data/chec_yl_all.csv") %>% mutate(course_type = "yearlong")

chec_all = rbind.data.frame(chec_t1, chec_yl) 
chec_ELA_MS = chec_all %>% filter(Subject == "english/language arts" &
                        Grade %in% c(6, 7, 8) &
                        !Primary.Teacher %in% chec_ms_remove)


## parse name col into course_name, teacher_period
chec_ELA_ms_wcourse =  parse_course_info(chec_ELA_MS, "Name")

## create wide form teacher summary
chec_ELA_ms_teachersummary = find_course_grade_periods_byteacher(data = chec_ELA_ms_wcourse, 
                                          teacher_identifier = "Primary.Teacher") %>%
                            arrange(teacher_id)

chec_ELA_ms_teachersummary

chec_SIS_forblock_ms  = aggregate_courses_grades(chec_ELA_ms_wcourse,
                               c("course_name", "Grade"),
                               "Primary.Teacher",
                               chec_ELA_ms_teachersummary)

chec_SIS_forblock_ms

clean_colnames = gsub("\\s+|\\&|course\\_name", "", colnames(chec_SIS_forblock_ms))
colnames(chec_SIS_forblock_ms) = clean_colnames
chec_SIS_forblock_ms = chec_SIS_forblock_ms %>% mutate(strata_name = 
                                ifelse(avg_grade == 6, 
                                "6th grade only",
                                ifelse(avg_grade > 6 & avg_grade < 8,
                                "Mixed grades",
                                "8th grade only"))) %>% arrange(avg_grade)

sprintf("Data for blocking/randomization")
chec_SIS_forblock_ms


In [None]:

set.seed(91988)

strata_levels= unique(chec_SIS_forblock_ms$strata_name)
chec_ms_randomization = lapply(strata_levels, 
                               blockrand_withstrata, 
                               data = chec_SIS_forblock_ms, 
                               strata_varname = 'strata_name')
chec_ms_randomization_tobind = lapply(chec_ms_randomization, function(x) x %>% dplyr::select(-id, -block.id,
                                                                                            -block.size))

chec_ms_randomization_all = do.call(rbind.data.frame, chec_ms_randomization_tobind)

## summarize
chec_ms_randomization_all %>% arrange(avg_grade, treatment)
write.csv(chec_ms_randomization_all,
          "../randomization_status/chec_MS_teacherpilotstatus.csv", row.names = FALSE)
                                      
full_roster_emails = read.csv("../data/chec_fullroster_fromSIS.csv") %>%
                                mutate(teacher_id = sprintf("%s %s", First, Last))
chec_ms_randomization_all_wemail = merge(chec_ms_randomization_all,
                                        full_roster_emails %>% dplyr::select(teacher_id, Email),
                                        all.x = TRUE)
chec_ms_randomization_all_wemail %>% filter(treatment == "Pilot") %>% dplyr::select(Email)
                                      

# 3. Anacostia

In [None]:
anacostia_t1 = read.csv("../data/anacostia_t1_all.csv") %>% mutate(course_type = "term1_only")
anacostia_yl = read.csv("../data/anacostia_yl_all.csv") %>% mutate(course_type = "yearlong")
anacostia_socialstudies = read.csv("../data/socialstudies_teachers.csv")

## create a single dataset with
## both term1 and yl
anacostia_both = rbind.data.frame(anacostia_t1, anacostia_yl)


In [None]:
# look at distribution of grades by teacher
## before subsetting to HS teachers
anacostia_distgrades = get_grades_eachteacher(data = anacostia_yl, 
                                              teacher_identifier = "Primary.Teacher")
#anacostia_distgrades

## already only contains HS teachers, which makes sense

In [None]:
## clean up course - teacher-period col so that 
## splitting on "-" delimiter produces correct columns
anacostia_both = anacostia_both %>% mutate(name_clean_1 = gsub(hyphen_name, 
                                                    hyphen_name_nohyphen, Name),
                                        name_clean_2 = gsub("JROTC\\s\\-\\sLET", 
                                                           "JROTC_LET",
                                                           name_clean_1),
                                        name_clean_3 = gsub("Lab\\-Self", "Lab Self",
                                                           name_clean_2),
                                        name_clean_4 = gsub("Geo\\sI\\-[A|B]", "Geo_I_AorB",
                                                           name_clean_3),
                                        name_clean_5 = gsub("Pre\\-AP", "PreAP",
                                                           name_clean_4),
                                        name_clean_6 = gsub("II\\-RE2", "II\\_RE2", name_clean_5)) %>%
                                dplyr::select(-name_clean_1, -name_clean_2, -name_clean_3, -name_clean_4,
                                             -name_clean_5)


In [None]:
## parse name col into course_name, teacher_period
anacostia_wcourse =  parse_course_info(anacostia_both, "name_clean_6")
head(anacostia_wcourse)

In [None]:
## subset to social studies teachers they provided
## and code electives
## code courses to requested or electives
electives = c("African American History & Culture", 
             "Sociology")
anacostia_wcourse_socstudies = anacostia_wcourse %>% filter(Primary.Teacher %in% 
                                                           ## filter to social studies teachers school confirmed 
                                    anacostia_socialstudies$teachername) %>%
                            mutate(course_clean = trimws(course_name),
                                elective = ifelse(course_clean %in% electives, 1, 0)) %>%
                            filter(course_clean != "Writing Workshop IA")

## count of students of those teachers in electives
anacostia_wcourse_socstudies %>% group_by(elective) %>% summarise(count_students = sum(Students))


In [None]:
anacostia_teachersummary_socstudies = find_course_grade_periods_byteacher(data = anacostia_wcourse_socstudies, 
                                          teacher_identifier = "Primary.Teacher") %>%
                            mutate(strata_name = 
                                      ifelse(grades == "12;11",
                                    "11th and 12 graders",
                                    ifelse(teacher_id %in% anacostia_specialeducation,
                                    "Special education",
                                    "Other")))


strata_levels= unique(anacostia_teachersummary_socstudies$strata_name)
set.seed(91988)
anacostia_randomization  = lapply(strata_levels, 
                               blockrand_withstrata, 
                               data = anacostia_teachersummary_socstudies, 
                               strata_varname = 'strata_name')

anacostia_randomization_all = do.call(rbind.data.frame, anacostia_randomization)

## merge additional info
anacostia_randomization_all_wid = merge(anacostia_randomization_all,
                                       anacostia_both %>% dplyr::select(Primary.Teacher,
                                                                       Teacher.ID) %>% filter(!duplicated(Teacher.ID)),
                                       by.x = "teacher_id",
                                       by.y = "Primary.Teacher")


In [None]:
## merge with emails from roster
anacostia_fullroster = read.csv("../data/anacostia_fullroster_fromSIS.csv")

In [None]:
anacostia_randomization_all_wid_wemail = merge(anacostia_randomization_all_wid,
                                              anacostia_fullroster %>% dplyr::select(Teacher.ID, Email),
                                              by = "Teacher.ID") %>% arrange(strata_name)
anacostia_randomization_all_wid_wemail %>% arrange(strata_name)

In [None]:
## write to csv
write.csv(anacostia_randomization_all_wid_wemail, "../randomization_status/anacostia_teacherpilotstatus.csv",
          row.names = FALSE)

# 4. Dunbar

In [None]:
dunbar_t1 = read.csv("../data/dunbar_t1_all.csv") %>% mutate(course_type = "term1_only")
dunbar_yl = read.csv("../data/dunbar_yl_all.csv") %>% mutate(course_type = "yearlong")

## create a single dataset with
## both term1 and yl
dunbar_both = rbind.data.frame(dunbar_t1, dunbar_yl)

In [None]:
## look at distribution of grades 
## within each teacher 
dunbar_distgrades = get_grades_eachteacher(data = dunbar_both, 
                                              teacher_identifier = "Primary.Teacher")

In [None]:
## subset to ELA only
dunbar_ela = dunbar_both %>% filter(Subject == "english/language arts" |
                                   Primary.Teacher == dunbar_additionalteacher) 

## keep all grades since some teachers teach
## across multiple grades
## parse name col into course_name, teacher_period
dunbar_ela = dunbar_ela %>% mutate(name_clean = gsub("Pre\\-AP", "PreAP",
                                                    Name),
                                  name_clean_2 = gsub("Pre\\-Algebra", "PreAlgebra",
                                                     name_clean))




In [None]:
dunbar_ela_wcourses = parse_course_info(dunbar_ela, "name_clean_2")

dunbar_ela_wcourses_onlyyl = dunbar_ela_wcourses %>% filter(course_type != "term1_only")

In [None]:
dunbar_teachersummary_ela = find_course_grade_periods_byteacher(data = dunbar_ela_wcourses_onlyyl, 
                                          teacher_identifier = "Primary.Teacher") %>%
                        arrange(grades)

dunbar_teachersinpool = dunbar_teachersummary_ela %>% filter((grades == 9 | grades == 10 |
                                    grepl(dunbar_teacherkeep, teacher_id)) &
                                    !grepl(dunbar_teachremove, teacher_id))



In [None]:
dunbar_teachersummary_ela

In [None]:

dunbar_teachersinpool = dunbar_teachersinpool %>% mutate(strata_name = ifelse(grepl(large_classteachers, 
                            teacher_id),
                                        "Many students",
                        "Few students"))
dunbar_teachersinpool



In [None]:
strata_levels= unique(dunbar_teachersinpool$strata_name)
set.seed(91988)
dunbar_randomization  = lapply(strata_levels, 
                               blockrand_withstrata, 
                               data = dunbar_teachersinpool, 
                               strata_varname = 'strata_name')

dunbar_randomization_all = do.call(rbind.data.frame, dunbar_randomization)

## merge additional info
dunbar_randomization_all_wid = merge(dunbar_randomization_all,
                                       dunbar_both %>% dplyr::select(Primary.Teacher,
                                                                       Teacher.ID) %>% filter(!duplicated(Teacher.ID)),
                                       by.x = "teacher_id",
                                       by.y = "Primary.Teacher") %>% arrange(strata_name) %>%
                            dplyr::select(teacher_id, treatment, courses, grades, total_students, strata_name)
dunbar_randomization_all_wid

## merge with email from SIS
dunbar_randomization_all_wid %>% filter(treatment == "Pilot")


In [None]:
write.csv(dunbar_randomization_all_wid, "../randomization_status/dunbar_teacherpilotstatus.csv",
         row.names = FALSE)

# 5. Johnson

In [None]:
find_course_grade_periods_byteacher_studlevel <- function(data, teacher_identifier){
    
    ## initialize storage vectors
    unique_courses = c()
    unique_grades = c()
    unique_periods = c()
    
    ## get all identifiers
    all_teachers = unique(data[[teacher_identifier]])
    
    ## iterate through teachers
    for(i in 1:length(all_teachers)){
    
        ## subset to one teacher teacher
        one_teacher = data[data[[teacher_identifier]] == all_teachers[i], ]
        print(sprintf("Returning values for %s", all_teachers[i]))
    
        ## add their courses
        unique_courses = c(unique_courses, paste(unique(one_teacher$course_name), collapse = "; "))
        unique_grades  = c(unique_grades, paste(unique(one_teacher$StudentGrade), collapse = ";"))
        unique_periods = c(unique_periods, paste(unique(one_teacher$period), collapse = ";"))
        
    }
    
    ## get number of students per teacher
    n_students_perteacher = data %>% group_by_at(vars(one_of(teacher_identifier))) %>% 
                            summarise(total_students = length(unique(StudentID)))
    
    ## left join n students per teacher onto other teacher attributes
    teacher_summary =merge(data.frame(teacher_id = all_teachers,
                            courses = unique_courses,
                            grades = unique_grades,
                            periods = unique_periods),
                           n_students_perteacher,
                            by.x = "teacher_id",
                           by.y = teacher_identifier,
                           all.x = TRUE)
    ## return data
    return(teacher_summary)

    
}

aggregate_courses_grades_studlevel <- function(longform_data, vars_finddummies, teacher_identifier,
                                    wide_teacher_summary){
    
    ## first create dummy vars
    dummy_vars =  model.matrix(formula(sprintf("~ %s - 1",
                                      paste(vars_finddummies, 
                                           collapse = "+"))), data=longform_data)
    
    ## add to original data
    data_withdummies = cbind.data.frame(longform_data, dummy_vars)
    
    ## above creates longform data 
    ## find how many times a teacher teaches each course
    ## and find avg grade they teach
    total_courses_byteacher = data_withdummies %>%
                        dplyr::select(-course_name) %>%
                         group_by_at(vars(one_of(teacher_identifier))) %>%
                         summarise_at(vars(contains("course_name")), 
                         ~sum(.x)) 
    
    avg_grade_byteacher = longform_data %>%
            group_by_at(vars(one_of(teacher_identifier))) %>%
            mutate(grade_numeric = as.numeric(as.character(StudentGrade))) %>%
            summarise(avg_grade = mean(grade_numeric))
    
    ## merge with summaries

    teacher_summary_wcourses = merge(wide_teacher_summary,
                                    total_courses_byteacher,
                                    by.x = "teacher_id",
                                    by.y = teacher_identifier)
    teacher_summary_all = merge(teacher_summary_wcourses,
                               avg_grade_byteacher,
                               by.x = "teacher_id",
                                by.y = teacher_identifier)
    return(teacher_summary_all)
    
}

In [None]:
johnson = read.csv("../data/Johnson MS Rosters for Randomization.csv")

## parse courses
johnson_interest = johnson %>%
                filter(grepl("Advisory|Math|Science", ClassName))

johnson_wcourses = parse_course_info(johnson_interest, "ClassName") %>%
                mutate(type = case_when(grepl("Advisory", course_name) ~ "Advisory",
                            grepl("Math", course_name) ~ "Math",
                            grepl("Science", course_name) ~ "Science",
                            TRUE ~ "Other"))

johnson_teachersummary_advisory = find_course_grade_periods_byteacher_studlevel(data = johnson_wcourses %>%
                                                                filter(type == "Advisory"), 
                                          teacher_identifier = "TeacherEmail1") %>%
                        arrange(grades)

johnson_teachersummary_advisory = find_course_grade_periods_byteacher_studlevel(data = johnson_wcourses %>%
                                                                filter(type == "Advisory"), 
                                          teacher_identifier = "TeacherEmail1") %>%
                        arrange(grades) %>%
                        mutate(grades_coarse = ifelse(grades == "6", "6",
                                                ifelse(grades == "7", "7",
                                                ifelse(grades == "8", "8", "Mixed"))))

head(johnson_teachersummary_advisory)

In [None]:
set.seed(21483)

## separate by grade
match_result = randomize(johnson_teachersummary_advisory,
                 group = c("Pilot", "No pilot"), # omitted ratio arg bc default is 0.5
                 n.block = nrow(johnson_teachersummary_advisory)/2,
                match = formula(sprintf("~ %s", "total_students")),
                complete = TRUE)

match_result

df_withpairs = match_result$data %>%
             mutate(treatment_status = match_result[["treatment"]],
                    pair_id = match_result$match.id,
                   teacher_id = johnson_teachersummary_advisory$teacher_id)
#df_withpairs

df_withpairs_withattributes = merge(df_withpairs,
                                   johnson_teachersummary_advisory %>% dplyr::select(-total_students, -periods), 
                                    by = "teacher_id", all.x = TRUE) %>%
                                arrange(pair_id, treatment_status)

df_withpairs_withattributes 

## filter to pilot
df_withpairs_withattributes %>% filter(treatment_status == "Pilot") %>% dplyr::select(teacher_id)

In [None]:
johnson_teachersummary_math = find_course_grade_periods_byteacher_studlevel(data = johnson_wcourses %>%
                                                                filter(type == "Math"), 
                                          teacher_identifier = "TeacherEmail1") %>%
                        arrange(grades)

johnson_teachersummary_math = find_course_grade_periods_byteacher_studlevel(data = johnson_wcourses %>%
                                                                filter(type == "Math"), 
                                          teacher_identifier = "TeacherEmail1") %>%
                        arrange(grades) %>%
                        mutate(grades_coarse = ifelse(grades == "6", "6",
                                                ifelse(grades == "7", "7",
                                                ifelse(grades == "8", "8", "Mixed"))),
                               strata = ifelse(grades_coarse %in% c("6", "7"),
                                              "Sixth and seventh",
                                "8th and mixed"))

johnson_teachersummary_math


## create blocks for math (no formula since small N and randomize)
strata_levels= unique(johnson_teachersummary_math$strata)
set.seed(91988)
johnson_randomization  = lapply(strata_levels, 
                               blockrand_withstrata, 
                               data = johnson_teachersummary_math, 
                               strata_varname = 'strata')

johnson_randomization_all = do.call(rbind.data.frame, johnson_randomization)

johnson_randomization_all %>% arrange(treatment)

johnson_randomization_all %>% filter(treatment == "Pilot") %>% dplyr::select(teacher_id)

tx_teachers = (johnson_randomization_all %>% filter(treatment == "Pilot") %>% dplyr::select(teacher_id))$teacher_id
control_teachers = (johnson_randomization_all %>% filter(treatment == "Not pilot") %>% dplyr::select(teacher_id))$teacher_id

control_teachers

In [None]:
## look at overlap in students
johnson_wcourses_overlap = johnson_wcourses %>%
                    mutate(tx = ifelse(TeacherEmail1 %in%  tx_teachers, 1, 0),
                          control = ifelse(TeacherEmail1 %in% control_teachers, 1, 0)) %>%
                    group_by(StudentID) %>%
                    summarise(all_tx = paste(tx, collapse = ";"),
                             all_control = paste(control, collapse = ";"),
                             crossover = ifelse(grepl("1", all_tx) & grepl("1", all_control), 1, 0))

table(johnson_wcourses_overlap$crossover)

16/length(unique(johnson_wcourses_overlap$StudentID))

In [None]:
johnson_teachersummary_science = find_course_grade_periods_byteacher_studlevel(data = johnson_wcourses %>%
                                                                filter(type == "Science"), 
                                          teacher_identifier = "TeacherEmail1") %>%
                        arrange(grades)

johnson_teachersummary_science = find_course_grade_periods_byteacher_studlevel(data = johnson_wcourses %>%
                                                                filter(type == "Science"), 
                                          teacher_identifier = "TeacherEmail1") %>%
                        arrange(grades) %>%
                        mutate(grades_coarse = ifelse(grades == "6", "6",
                                                ifelse(grades == "7", "7",
                                                ifelse(grades == "8", "8", "Mixed"))))

johnson_teachersummary_science