### Import

In [1]:
import pandas as pd
import hauer

In [2]:
pd.set_option('display.max_colwidth', None)
pd.reset_option('display.float_format')

### Naïve BA

In [23]:
# Numerical Example 7.2. Differing ratios of duration (pg 78)
# Dataset
df = pd.DataFrame()
df['treatment_before_duration'] = [3,3,2,2,1]
df['treatment_after_duration'] = [1,1,1,1,1]
df['treatment_before_count'] = [31,23,7,8,5]
df['treatment_after_count'] = [7,4,1,5,7]

# Run
df_modified, result = hauer.NaiveBeforeAfter(df)

result

  std_teta_vte = var_teta_vte**0.5


Unnamed: 0,NAive BA
lambda,24.0
var_lambda,24.0
std_lambda,4.898979
pi,30.5
var_pi,14.75
std_pi,3.840573
delta,6.5
delta_norm,1.3
var_delta,38.75
std_delta,6.22495


### Comparison Group BA

In [25]:
# Numerical Example 9.3. R.I.D.E. with comparison groups.
df_t= pd.DataFrame()
df_t['treatment_before_duration'] = [1]
df_t['treatment_after_duration'] = [1]
df_t['treatment_before_count'] = [173]
df_t['treatment_after_count'] = [144]

df_cg = pd.DataFrame()
df_cg['comparison_group_before_duration'] = [1]
df_cg['comparison_group_after_duration'] = [1]
df_cg['comparison_group_before_count'] = [897]
df_cg['comparison_group_after_count'] = [870]

df_t_modified, df_cg_modified, result = hauer.ComparisonGroupBeforeAfter(df_t,df_cg,var_w_par=0.0055)

result

Unnamed: 0,CG BA
K,173.0
L,144.0
M,897.0
N,870.0
rt,0.96882
var_rt/rt2,0.007764
var_w,0.0055
lambda,144.0
var_lambda,144.0
std_lambda,12.0


### Comparison Methods

In [35]:
# Numerical Example 9.3. R.I.D.E. with comparison groups.
# Naive BA
df = pd.DataFrame()
df['treatment_before_duration'] = [1]
df['treatment_after_duration'] = [1]
df['treatment_before_count'] = [173]
df['treatment_after_count'] = [144]
_, result_Naive_BA = hauer.NaiveBeforeAfter(df)

# CG BA
df_t= pd.DataFrame()
df_t['treatment_before_duration'] = [1]
df_t['treatment_after_duration'] = [1]
df_t['treatment_before_count'] = [173]
df_t['treatment_after_count'] = [144]
df_cg = pd.DataFrame()
df_cg['comparison_group_before_duration'] = [1]
df_cg['comparison_group_after_duration'] = [1]
df_cg['comparison_group_before_count'] = [897]
df_cg['comparison_group_after_count'] = [870]
_,_, result_CG_BA = hauer.ComparisonGroupBeforeAfter(df_t,df_cg,var_w_par=0.0055)

result = pd.concat([result_Naive_BA.T,result_CG_BA.T],ignore_index=True).T
result.columns = ["Naive BA","CG BA"]
result

Unnamed: 0,Naive BA,CG BA
lambda,144.0,144.0
var_lambda,144.0,144.0
std_lambda,12.0,12.0
pi,173.0,167.605791
var_pi,173.0,380.490835
std_pi,13.152946,19.506174
delta,29.0,23.605791
delta_norm,29.0,23.605791
var_delta,317.0,524.490835
std_delta,17.804494,22.901765


### Class Examples

In [3]:
"""
Coelho et al, 2008
https://www.sinaldetransito.com.br/artigos/semaforos_x_acidentes.pdf
"""
df = pd.DataFrame()
df["Semáforo"] = [470,472,473,476,477,478,479,480,481,482,483,484,485,486,487,488]
df["Período Antes"] = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
df["Sinistros Antes"] = [20, 15, 1, 13, 8, 11, 5, 12, 8, 6, 3, 1, 10, 10, 11, 2]
df["Período Depois"] = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
df["Sinistros Depois"] = [16, 8, 1, 11, 16, 33, 10, 10, 17, 15, 13, 7, 11, 6, 20, 3]

_, result = hauer.NaiveBeforeAfter(df,'Período Antes','Período Depois','Sinistros Antes','Sinistros Depois')

result

0.3043375222522672


Unnamed: 0,NAive BA
lambda,197.0
var_lambda,197.0
std_lambda,14.035669
pi,136.0
var_pi,136.0
std_pi,11.661904
delta,-61.0
delta_norm,-1.90625
var_delta,333.0
std_delta,18.248288


In [43]:
df = pd.DataFrame()
df["treatment_before_duration"] = [56]
df["treatment_after_duration"] = [38]
df['treatment_before_count'] = [34]
df['treatment_after_count'] = [14]

# SPF
df_SPF = pd.DataFrame()
# Duration
df_SPF["reference"] = ["1990","1991","1992","1993","Jan-Aug 1994","Nov-Dec 1994","1995","1996","1997"]
df_SPF["period"] = ["before","before","before","before","before","after","after","after","after"]
df_SPF["duration"] = [1,1,1,1,8/12,2/12,1,1,1]

# Function
SPF = lambda alpha,major_AADT,minor_AADT,duration=1:[duration*alpha*(major_AADT**0.256)*(minor_AADT**0.831),0.25]
df_SPF["alpha"]= [0.000383, 0.000388, 0.000392, 0.000358, 0.000391, 0.000391, 0.000389, 0.000362, 0.000367]
df_SPF["major_AADT"] = [10228, 10441, 10761, 10867, 10974, 12076, 11597, 11836, 12315]
df_SPF["minor_AADT"] = [4503, 4597, 4738, 4785, 4832, 5317, 5106, 5211, 5422]

# Inc Estimated
df_SPF[["tau_par","b"]] = df_SPF.apply(lambda row: SPF(row["alpha"],row["major_AADT"],row["minor_AADT"],row["duration"]),axis=1,result_type="expand")
df_SPF_agg = df_SPF.groupby("period").agg({"tau_par":"sum","b":"mean"})
df_SPF_agg["var_tau_par"] = df_SPF_agg["b"]*(df_SPF_agg["tau_par"]**2)

k_par = df_SPF_agg.loc["before","tau_par"]
k_var_var = df_SPF_agg.loc["before","var_tau_par"]

#
alpha_weight = 1/(1+(k_var_var/k_par))
E_k_K = lambda alpha_weight,k,K = (alpha_weight*k) + ((1-alpha_weight)*K)
df["E_k_K_par"] = df

np.float64(0.15711932111125093)

In [45]:
df_SPF

Unnamed: 0,reference,period,duration,alpha,major_AADT,minor_AADT,tau_par,b
0,1990,before,1.0,0.000383,10228,4503,4.423493,0.25
1,1991,before,1.0,0.000388,10441,4597,4.582959,0.25
2,1992,before,1.0,0.000392,10761,4738,4.784756,0.25
3,1993,before,1.0,0.000358,10867,4785,4.416813,0.25
4,Jan-Aug 1994,before,0.666667,0.000391,10974,4832,3.250337,0.25
5,Nov-Dec 1994,after,0.166667,0.000391,12076,5317,0.901627,0.25
6,1995,after,1.0,0.000389,11597,5106,5.150356,0.25
7,1996,after,1.0,0.000362,11836,5211,4.900162,0.25
8,1997,after,1.0,0.000367,12315,5422,5.186852,0.25


In [None]:
def EmpiricalBayes(
    df_treatment=None,
    before_duration="treatment_before_duration",
    after_duration="treatment_after_duration",
    before_count="treatment_before_count",
    after_count="treatment_after_count",
    ):

    """
    Calculates the Naive BA by the 4-step expanded

    Based on:
    Chapter 7, "The Naive before-after study"
    Chapter 10, "The variability of treatment effect"
    """

    # Copy dataset
    df_treatment = df_treatment.copy()

    # For composite entities
    df_treatment["rdj"] = df_treatment[after_duration]/df_treatment[before_duration]
    df_treatment["rdj_x_before_count"] = df_treatment["rdj"]*df_treatment[before_count]
    df_treatment["rdj2_x_before_count"] = df_treatment["rdj"]*df_treatment["rdj_x_before_count"]

    # Step 1
    lambda_par = sum(df_treatment[after_count]) # estimated after with treatment

    pi_par = sum(df_treatment["rdj_x_before_count"])# predict without treatment

    # Step 2 
    var_lambda_par = sum(df_treatment[after_count]) # assumed to be Poisson distributed
    std_lambda_par = var_lambda_par**0.5

    var_pi_par = sum(df_treatment["rdj2_x_before_count"]) # assumed to be Poisson distributed
    std_pi_par = var_pi_par**0.5

    # Step 3
    delta_par = pi_par - lambda_par
    delta_norm_par = delta_par/sum(df_treatment[after_duration])

    teta_par = (lambda_par/pi_par) / (1+(var_pi_par/(pi_par**2)))

    # Step 4
    var_delta_par = var_lambda_par + var_pi_par
    std_delta_par = var_delta_par**0.5

    var_teta_par = (teta_par**2)*((var_lambda_par/(lambda_par**2))+(var_pi_par/(pi_par**2)))/((1+(var_pi_par/(pi_par**2)))**2)
    std_teta_par = var_teta_par**0.5

    # For single entities (variability of treatment effect)
    df_treatment["lambda_par"] = df_treatment[after_count]
    df_treatment["var_lambda_par"] = df_treatment[after_count]
    df_treatment["pi_par"] = df_treatment["rdj_x_before_count"]
    df_treatment["var_pi_par"] = df_treatment["rdj2_x_before_count"]

    df_treatment,s2_teta,avg_V,var_teta_vte,std_teta_vte = VariabilityTreatmentEffect(
        df_treatment,
        lambda_par="lambda_par",
        var_lambda_par="var_lambda_par",
        pi_par="pi_par",
        var_pi_par="var_pi_par",
        )

    result = {
        "lambda":lambda_par,
        "var_lambda":var_lambda_par,
        "std_lambda":std_lambda_par,
        "pi":pi_par,
        "var_pi":var_pi_par,
        "std_pi":std_pi_par,
        "delta":delta_par,
        "delta_norm":delta_norm_par,
        "var_delta":var_delta_par,
        "std_delta":std_delta_par,
        "teta":teta_par,
        "var_teta":var_teta_par,
        "std_teta":std_teta_par,
        "s2_teta":s2_teta,
        "avg_V":avg_V,
        "var_teta_vte":var_teta_vte,
        "std_teta_vte":std_teta_vte
      }
    result = pd.DataFrame(pd.Series(result,name="NAive BA"))

    df_treatment = df_treatment[[
        before_duration,after_duration,
        before_count,after_count,
        "rdj","rdj_x_before_count","rdj2_x_before_count",
        "lambda_par","var_lambda_par","pi_par","var_pi_par",
        "teta_par","var_teta_par","std_teta_par"
        ]]

    return df_treatment,result