In [None]:
import warnings

import numpy as np
import pandas as pd

## graphing
from plotnine import *

from suso import abtesting
from suso.plotting_themes import standard_background
from suso.utils import here

warnings.filterwarnings("ignore")

In [None]:
DATA_DIR = here("data")
OUTPUT_DIR = here("output")

In [None]:
def create_plot_ofcertainty(
    probs_alldraws,
    single_draw_value,
    scale_y,
    direction,
    outcome,
    adjust_x=-0.05,
    treatment_color="#2B4888",
    control_color="#444444",
):
    prob_df = pd.DataFrame({"certainty": probs_alldraws})
    plot = (
        ggplot(prob_df, aes(x="certainty"))
        + geom_density(fill="white", color=treatment_color)
        + standard_background
        + ylab("Density of draws")
        + scale_fill_gradient(low="white", high="gray")
        + scale_x_continuous(breaks=np.arange(0, 1, 0.1))
        + annotate(
            "text",
            x=single_draw_value + adjust_x,
            y=(scale_y / 4) * 3.8,
            label="Observed degree\nof certainty:\n{}".format(
                round(single_draw_value, 3)
            ),
        )
        + xlab(
            "Degree of certainty that treatment students\nhad {direction} {outcome} than control students".format(
                direction=direction, outcome=outcome
            )
        )
    )
    return plot

# 1. Load data

In [None]:
attendance_analytic = pd.read_pickle(DATA_DIR / "attendance_readyforAB.pkl")
attendance_analytic.head()

treat_data = attendance_analytic[attendance_analytic.is_treatment == True].copy()
control_data = attendance_analytic[attendance_analytic.is_treatment == False].copy()

# 2. A/B testing of binary year-end attendance outcomes

## 2.1 Truancy

### 2.1.1 One draw version

In [None]:
outcome_varname = "truant_indicator"

In [None]:
successes_treatment = treat_data.usi[treat_data[outcome_varname] == 1].nunique()
failures_treatment = treat_data.usi[treat_data[outcome_varname] == 0].nunique()
successes_control = control_data.usi[control_data[outcome_varname] == 1].nunique()
failures_control = control_data.usi[control_data[outcome_varname] == 0].nunique()

truancy_prob_txgreaterthancontrol = abtesting.degree_of_certainty(
    successes_control, failures_control, successes_treatment, failures_treatment
)
truancy_prob_txlessthancontrol = 1 - truancy_prob_txgreaterthancontrol

outcome = "truant"
direction = "LOWER"
print(f"Count {outcome} in treatment group {successes_treatment}")
print(
    f"Percent {outcome} in treatment group {successes_treatment / (successes_treatment + failures_treatment) * 100}"
)
print(f"Count {outcome} in control group {successes_control}")
print(
    "Percent {} in control group ".format(outcome)
    + str(successes_control / (successes_control + failures_control) * 100)
)
print("--------------------------------")
print(
    f"A/B test single draw prob. that treatment group had {direction} rate than control group: {truancy_prob_txlessthancontrol}"
)

In [None]:
num_treatment = successes_treatment + failures_treatment
num_control = successes_control + failures_control
base_rate = successes_control / num_control
treatment_rate = successes_treatment / num_treatment

(
    truancy_prob_txlessthancontrol_perc,
    truancy_prob_txlessthancontrol_alldraws,
    truancy_posterior,
) = abtesting.degree_of_certainty_draws_forwriteup(
    base_rate=base_rate,
    treatment_rate=treatment_rate,
    successes_control=successes_control,
    successes_treatment=successes_treatment,
    failures_control=failures_control,
    failures_treatment=failures_treatment,
    num_control=num_control,
    num_treatment=num_treatment,
    num_draws=10000,
    tx_greater=False,
)

In [None]:
certainty_truancy = create_plot_ofcertainty(
    probs_alldraws=truancy_prob_txlessthancontrol_alldraws,
    single_draw_value=truancy_prob_txlessthancontrol,
    scale_y=2,
    direction="lower",
    outcome="year-end truancy",
)
ggsave(
    certainty_truancy,
    OUTPUT_DIR / "certainty_truancy.pdf",
    width=12,
    height=8,
)

## 2.2 Chronic absenteeism

### 2.2.1 One draw version

In [None]:
outcome_varname = "chronicabsent_indicator"

In [None]:
successes_treatment = len(treat_data.usi[treat_data[outcome_varname] == 1].unique())
failures_treatment = len(treat_data.usi[treat_data[outcome_varname] == 0].unique())
successes_control = len(control_data.usi[control_data[outcome_varname] == 1].unique())
failures_control = len(control_data.usi[control_data[outcome_varname] == 0].unique())

ca_prob_txgreaterthancontrol = abtesting.degree_of_certainty(
    successes_control, failures_control, successes_treatment, failures_treatment
)

outcome = "chronically absent"
direction = "LOWER"
print("Count {} in treatment group ".format(outcome) + str(successes_treatment))
print(
    "Percent {} in treatment group ".format(outcome)
    + str(successes_treatment / (successes_treatment + failures_treatment) * 100)
)
print("Count {} in control group ".format(outcome) + str(successes_control))
print(
    "Percent {} in control group ".format(outcome)
    + str(successes_control / (successes_control + failures_control) * 100)
)
print("--------------------------------")
print(
    "A/B test single draw prob. that treatment group had {} rate than control group: ".format(
        direction
    )
    + str(1 - ca_prob_txgreaterthancontrol)
)

### 2.2.2 Many draw version

In [None]:
num_treatment = successes_treatment + failures_treatment
num_control = successes_control + failures_control
base_rate = successes_control / num_control
treatment_rate = successes_treatment / num_treatment

(
    ca_prob_txlessthancontrol_perc,
    ca_prob_txlessthancontrol_alldraws,
    ca_posterior,
) = abtesting.degree_of_certainty_draws_forwriteup(
    base_rate=base_rate,
    treatment_rate=treatment_rate,
    successes_control=successes_control,
    successes_treatment=successes_treatment,
    failures_control=failures_control,
    failures_treatment=failures_treatment,
    num_control=num_control,
    num_treatment=num_treatment,
    num_draws=10000,
    tx_greater=False,
)

In [None]:
certainty_ca = create_plot_ofcertainty(
    probs_alldraws=ca_prob_txlessthancontrol_alldraws,
    single_draw_value=(1 - ca_prob_txgreaterthancontrol),
    scale_y=2,
    direction="lower",
    outcome="year-end chronic absenteeism",
)

# 3. A/B testing: count outcomes 

Repeat same process for count outcomes

## 3.1 unexcused in two week period: median delivery date

### 3.1.1: single draws

In [None]:
##
outcome_varname = "diff_unexcused_mediandelivery"
exposure_varname = "diff_schooldays_mediandelivery"
events_treatment = np.sum(treat_data[outcome_varname])
exposure_treatment = np.sum(treat_data[exposure_varname])
events_control = np.sum(control_data[outcome_varname])
exposure_control = np.sum(control_data[exposure_varname])


## single draw test
unex_mediandelivery_prob_txgreaterthancontrol = abtesting.degree_of_certainty_counts(
    events_control=events_control,
    exposure_control=exposure_control,
    events_tx=events_treatment,
    exposure_tx=exposure_treatment,
)

unex_mediandelivery_prob_txlessthancontrol = (
    1 - unex_mediandelivery_prob_txgreaterthancontrol
)

outcome = "of unexcused absences in two weeks after letter (median delivery day)"
direction = "lower"
print("Count {} in treatment group ".format(outcome) + str(events_treatment))
print(
    "Rate {} in treatment group ".format(outcome)
    + str(events_treatment / (exposure_treatment))
)
print("Count {} in control group ".format(outcome) + str(events_control))
print(
    "Rate {} in control group ".format(outcome)
    + str(events_control / (exposure_control))
)
print("--------------------------------")
print(
    "A/B test single draw prob. that treatment group had {} count than control group: ".format(
        direction
    )
    + str(unex_mediandelivery_prob_txlessthancontrol)
)

### 3.1.2 multiple draws

In [None]:
(
    unex_mediandelivery_perc,
    unex_mediandelivery_alldraws,
    unex_mediandelivery_posterior,
) = abtesting.degree_of_certainty_count_draws_forwriteup(
    control_events=events_control,
    treatment_events=events_treatment,
    control_exposure=exposure_control,
    treatment_exposure=exposure_treatment,
    num_draws=10000,
    tx_greater=False,
)

In [None]:
unex_mediandelivery_alldraws

In [None]:
certainty_unex_itt = create_plot_ofcertainty(
    probs_alldraws=unex_mediandelivery_alldraws,
    single_draw_value=unex_mediandelivery_prob_txlessthancontrol,
    scale_y=40,
    direction="lower",
    outcome="count of unexcused absencesover two weeks\n(assume 7 day delivery timeline)\n",
    adjust_x=-0.15,
)

ggsave(
    certainty_unex_itt,
    OUTPUT_DIR / "certainty_unex_itt.pdf",
    width=12,
    height=8,
)

## 3.2 unexcused in two week period: observed delivery

In [None]:
outcome_varname = "diff_unexcused_observeddelivery"
exposure_varname = "diff_schooldays_observeddelivery"
events_treatment = np.sum(treat_data[outcome_varname])
exposure_treatment = np.sum(treat_data[exposure_varname])
events_control = np.sum(control_data[outcome_varname])
exposure_control = np.sum(control_data[exposure_varname])


## single draw test
unex_obsdelivery_prob_txgreaterthancontrol = abtesting.degree_of_certainty_counts(
    events_control=events_control,
    exposure_control=exposure_control,
    events_tx=events_treatment,
    exposure_tx=exposure_treatment,
)

unex_obsdelivery_prob_txlessthancontrol = 1 - unex_obsdelivery_prob_txgreaterthancontrol

outcome = "of unexcused absences in two weeks after letter (observed delivery day)"
direction = "lower"
print("Count {} in treatment group ".format(outcome) + str(events_treatment))
print("Exposure {} in treatment group ".format(outcome) + str(exposure_treatment))
print(
    "Rate {} in treatment group ".format(outcome)
    + str(events_treatment / (exposure_treatment))
)
print("Count {} in control group ".format(outcome) + str(events_control))
print("Exposure {} in control group ".format(outcome) + str(exposure_control))
print(
    "Rate {} in control group ".format(outcome)
    + str(events_control / (exposure_control))
)
print("--------------------------------")
print(
    "A/B test single draw prob. that treatment group had {} count than control group: ".format(
        direction
    )
    + str(unex_obsdelivery_prob_txlessthancontrol)
)

In [None]:
(
    unex_obsdelivery_perc,
    unex_obsdelivery_alldraws,
    unex_obsdelivery_posterior,
) = abtesting.degree_of_certainty_count_draws_forwriteup(
    control_events=events_control,
    treatment_events=events_treatment,
    control_exposure=exposure_control,
    treatment_exposure=exposure_treatment,
    num_draws=10000,
    tx_greater=False,
)

certainty_unex_observed = create_plot_ofcertainty(
    probs_alldraws=unex_obsdelivery_alldraws,
    single_draw_value=unex_obsdelivery_prob_txlessthancontrol,
    scale_y=20,
    direction="lower",
    outcome="count of unexcused absences over two weeks\n(using observed delivery date)\n",
    adjust_x=-0.15,
)

ggsave(
    certainty_unex_observed,
    OUTPUT_DIR / "certainty_unex_observed.pdf",
    width=12,
    height=8,
)

## 3.3 unexcused and excused in two-week period: median delivery date


In [None]:
outcome_varname = "diff_excusedorunexcused_mediandelivery"
exposure_varname = "diff_schooldays_mediandelivery"
events_treatment = np.sum(treat_data[outcome_varname])
exposure_treatment = np.sum(treat_data[exposure_varname])
events_control = np.sum(control_data[outcome_varname])
exposure_control = np.sum(control_data[exposure_varname])


## single draw test
unexandex_mediandelivery_prob_txgreaterthancontrol = (
    abtesting.degree_of_certainty_counts(
        events_control=events_control,
        exposure_control=exposure_control,
        events_tx=events_treatment,
        exposure_tx=exposure_treatment,
    )
)

unexandex_mediandelivery_prob_txlessthancontrol = (
    1 - unexandex_mediandelivery_prob_txgreaterthancontrol
)

outcome = "of unexcused absences in two weeks after letter (observed delivery day)"
direction = "lower"
print("Count {} in treatment group ".format(outcome) + str(events_treatment))
print(
    "Rate {} in treatment group ".format(outcome)
    + str(events_treatment / (exposure_treatment))
)
print("Count {} in control group ".format(outcome) + str(events_control))
print(
    "Rate {} in control group ".format(outcome)
    + str(events_control / (exposure_control))
)
print("--------------------------------")
print(
    "A/B test single draw prob. that treatment group had {} count than control group: ".format(
        direction
    )
    + str(unexandex_mediandelivery_prob_txlessthancontrol)
)

In [None]:
(
    unexandex_mediandelivery_perc,
    unexandex_mediandelivery_alldraws,
    unexandex_mediandelivery_posterior,
) = abtesting.degree_of_certainty_count_draws_forwriteup(
    control_events=events_control,
    treatment_events=events_treatment,
    control_exposure=exposure_control,
    treatment_exposure=exposure_treatment,
    num_draws=10000,
    tx_greater=False,
)

In [None]:
certainty_both_itt = create_plot_ofcertainty(
    probs_alldraws=unexandex_mediandelivery_alldraws,
    single_draw_value=unexandex_mediandelivery_prob_txlessthancontrol,
    scale_y=1.5,
    direction="lower",
    outcome="count of unexcused OR excused absences over two weeks\n(assume 7 day delivery timeline)\n",
    adjust_x=-0.15,
)

ggsave(
    certainty_both_itt,
    OUTPUT_DIR / "certainty_both_itt.pdf",
    width=12,
    height=8,
)

## 3.4 unexcused and excused in two-week period: observed delivery

In [None]:
outcome_varname = "diff_excusedorunexcused_observeddelivery"
exposure_varname = "diff_schooldays_observeddelivery"
events_treatment = np.sum(treat_data[outcome_varname])
exposure_treatment = np.sum(treat_data[exposure_varname])
events_control = np.sum(control_data[outcome_varname])
exposure_control = np.sum(control_data[exposure_varname])


## single draw test
unexandex_obsdelivery_prob_txgreaterthancontrol = abtesting.degree_of_certainty_counts(
    events_control=events_control,
    exposure_control=exposure_control,
    events_tx=events_treatment,
    exposure_tx=exposure_treatment,
)

unexandex_obsdelivery_prob_txlessthancontrol = (
    1 - unexandex_obsdelivery_prob_txgreaterthancontrol
)

outcome = "of unexcused absences in two weeks after letter (observed delivery day)"
direction = "lower"
print("Count {} in treatment group ".format(outcome) + str(events_treatment))
print("Exposure {} in treatment group ".format(outcome) + str(exposure_treatment))
print(
    "Rate {} in treatment group ".format(outcome)
    + str(events_treatment / (exposure_treatment))
)
print("Count {} in control group ".format(outcome) + str(events_control))
print("Exposure {} in control group ".format(outcome) + str(exposure_control))
print(
    "Rate {} in control group ".format(outcome)
    + str(events_control / (exposure_control))
)
print("--------------------------------")
print(
    "A/B test single draw prob. that treatment group had {} count than control group: ".format(
        direction
    )
    + str(unexandex_obsdelivery_prob_txlessthancontrol)
)

In [None]:
(
    unexandex_obsdelivery_perc,
    unexandex_obsdelivery_alldraws,
    unexandex_obsdelivery_posterior,
) = abtesting.degree_of_certainty_count_draws_forwriteup(
    control_events=events_control,
    treatment_events=events_treatment,
    control_exposure=exposure_control,
    treatment_exposure=exposure_treatment,
    num_draws=10000,
    tx_greater=False,
)

In [None]:
certainty_both_obs = create_plot_ofcertainty(
    probs_alldraws=unexandex_mediandelivery_alldraws,
    single_draw_value=unexandex_mediandelivery_prob_txlessthancontrol,
    scale_y=1.5,
    direction="lower",
    outcome="count of unexcused OR excused absences over two weeks\n(using observed delivery date)\n",
    adjust_x=-0.15,
)

ggsave(
    certainty_both_obs,
    OUTPUT_DIR / "certainty_both_obs.pdf",
    width=12,
    height=8,
)

# 4. Engagement in 14 days (rerunning here to standardize)

In [None]:
%%capture

engage_df = pd.read_csv(DATA_DIR / "engage_forAB.csv")
engage_df["engaged_binary"] = np.where(engage_df.engaged_14days == False, 0, 1)
treat_data = engage_df.loc[engage_df.is_treatment == True].copy()
control_data = engage_df.loc[engage_df.is_treatment == False].copy()
outcome_varname = "engaged_binary"
engage_df.head()

In [None]:
successes_treatment = len(
    treat_data.student_id[treat_data[outcome_varname] == 1].unique()
)
failures_treatment = len(
    treat_data.student_id[treat_data[outcome_varname] == 0].unique()
)
successes_control = len(
    control_data.student_id[control_data[outcome_varname] == 1].unique()
)
failures_control = len(
    control_data.student_id[control_data[outcome_varname] == 0].unique()
)

engaged_prob_txgreaterthancontrol = abtesting.degree_of_certainty(
    successes_control, failures_control, successes_treatment, failures_treatment
)
engaged_prob_txlessthancontrol = 1 - truancy_prob_txgreaterthancontrol

outcome = "engaged"
direction = "LOWER"
print("Count {} in treatment group ".format(outcome) + str(successes_treatment))
print(
    "Percent {} in treatment group ".format(outcome)
    + str(successes_treatment / (successes_treatment + failures_treatment) * 100)
)
print("Count {} in control group ".format(outcome) + str(successes_control))
print(
    "Percent {} in control group ".format(outcome)
    + str(successes_control / (successes_control + failures_control) * 100)
)
print("--------------------------------")
print(
    "A/B test single draw prob. that treatment group had {} rate than control group: ".format(
        direction
    )
    + str(engaged_prob_txgreaterthancontrol)
)

In [None]:
## many draw version
num_treatment = successes_treatment + failures_treatment
num_control = successes_control + failures_control
base_rate = successes_control / num_control
treatment_rate = successes_treatment / num_treatment

(
    engaged_prob_txlessthancontrol_perc,
    engaged_prob_txlessthancontrol_alldraws,
    engaged_posterior,
) = abtesting.degree_of_certainty_draws_forwriteup(
    base_rate=base_rate,
    treatment_rate=treatment_rate,
    successes_control=successes_control,
    successes_treatment=successes_treatment,
    failures_control=failures_control,
    failures_treatment=failures_treatment,
    num_control=num_control,
    num_treatment=num_treatment,
    num_draws=10000,
    tx_greater=False,
)

certainty_engaged = create_plot_ofcertainty(
    probs_alldraws=engaged_prob_txlessthancontrol_alldraws,
    single_draw_value=engaged_prob_txlessthancontrol,
    scale_y=1.5,
    direction="lower",
    outcome="engagement rate",
    adjust_x=-0.15,
)

certainty_engaged

ggsave(
    certainty_engaged,
    OUTPUT_DIR / "certainty_engaged.pdf",
    width=12,
    height=8,
)

# 4. Export posteriors to plot in R

In [None]:
## create df with all posteriors
posterior_arrays = [
    truancy_posterior,
    ca_posterior,
    unex_obsdelivery_posterior,
    unex_mediandelivery_posterior,
    unexandex_obsdelivery_posterior,
    unexandex_mediandelivery_posterior,
    engaged_posterior,
]

In [None]:
posterior_array_df_list = [pd.DataFrame(x) for x in posterior_arrays]
posterior_array_df = pd.concat(posterior_array_df_list, axis=1)
posterior_array_df.columns = [
    "truancy",
    "ca",
    "unex_obs",
    "unex_median",
    "unexandex_obs",
    "unexandex_median",
    "engagement",
]
posterior_array_df.head()

In [None]:
posterior_array_df.to_csv(
    DATA_DIR / "attendanceandengageoutcomes_posteriors_toplot.csv", index=False
)