In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Setting up paths

In [2]:
lab_path = r"D:\Accesos directos\Trabajo\World Bank\Peru Amag\peru-amag-stats\Data"
lab_data_name = "\Clean_Full_Data12.dta"

In [3]:
lab_data = pd.read_stata(lab_path + lab_data_name)

In [4]:
# renaming gender column
lab_data.columns = lab_data.columns.str.replace('GÃ©nero', 'Gender')

In [5]:
judges_characteristics = ["Age_rounded", "Cargo", "Género"]

# Preparing data for OLS

## Generating dummies

In [6]:
position_dummies = pd.get_dummies(lab_data["Cargo"]).drop(["", "ASIS"], axis=1)
gender_dummies = pd.get_dummies(lab_data["Gender"]).drop(["", "Masculino"], axis=1)
course_dummies = pd.get_dummies(lab_data["Curso"]).drop([""], axis=1)
course_dummies.columns = ["Control", "Interpretacion", "Jurisprudencia", "Razonamiento", "Virtudes", "Etica"]
course_dummies = course_dummies.drop(columns = ["Control"])

## Creating outcomes

In [7]:
# merging data with dummies
lab_reg_data = pd.concat([lab_data, position_dummies, gender_dummies, course_dummies], axis=1)

# squaring age
lab_reg_data["Age_squared"] = lab_reg_data["Age_rounded"]**2

# generating outcomes
lab_reg_data["iat_score_change"] = lab_reg_data["en_iat_score"] - lab_reg_data["bs_iat_score"]

## OLS regression

In [8]:
covariates = ["Age_rounded", "Age_squared", "bs_iat_score"] + list(position_dummies.columns) + list(gender_dummies.columns) + list(course_dummies.columns)

In [9]:
score_change_reg_output = sm.OLS(lab_reg_data["iat_score_change"], sm.add_constant(lab_reg_data[covariates]), missing="drop").fit()
en_score_reg_output = sm.OLS(lab_reg_data["en_iat_score"], sm.add_constant(lab_reg_data[covariates]), missing="drop").fit()

In [10]:
en_score_reg_output.summary()

0,1,2,3
Dep. Variable:,en_iat_score,R-squared:,0.139
Model:,OLS,Adj. R-squared:,0.042
Method:,Least Squares,F-statistic:,1.439
Date:,"Fri, 09 Sep 2022",Prob (F-statistic):,0.16
Time:,12:24:05,Log-Likelihood:,-14.574
No. Observations:,120,AIC:,55.15
Df Residuals:,107,BIC:,91.39
Df Model:,12,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.2378,0.594,-0.400,0.690,-1.416,0.941
Age_rounded,0.0115,0.030,0.388,0.699,-0.047,0.071
Age_squared,-0.0002,0.000,-0.493,0.623,-0.001,0.001
bs_iat_score,0.0417,0.086,0.485,0.628,-0.128,0.212
AUX,0.0385,0.089,0.435,0.665,-0.137,0.214
FISCAL,-0.1294,0.090,-1.438,0.153,-0.308,0.049
JUEZ,-0.0663,0.098,-0.678,0.499,-0.260,0.128
Femenino,0.0054,0.058,0.094,0.926,-0.110,0.121
Interpretacion,-0.0691,0.095,-0.727,0.469,-0.258,0.119

0,1,2,3
Omnibus:,0.419,Durbin-Watson:,2.098
Prob(Omnibus):,0.811,Jarque-Bera (JB):,0.299
Skew:,0.122,Prob(JB):,0.861
Kurtosis:,3.0,Cond. No.,40900.0


### Substracting the variables that have a significant impact over our outcome

- For the IAT Score Change, only the Baseline IAT Score is significant
- For the IAT Endline Score, there are no significant covariates

In [11]:
lab_reg_data["iat_sc_residuzalized"] = lab_reg_data["iat_score_change"] - score_change_reg_output.params["bs_iat_score"]*lab_reg_data["bs_iat_score"]

# Preparing Data for A1 and A2 calculations

## Baseline

### Filtering the data for calculating A1 and A2

- A1_{baseline} = E[Y| request between 0.1 and 0.5_{baseline} ; seeing for sure_{baseline}]
- A2_{baseline} = E[Y| request between 0.1 and 0.5_{baseline} ; seeing according to demand_{baseline}]

In [12]:
a1_data_bs = lab_reg_data[(lab_reg_data["bs_iat_show_feedback"]==1) & ((lab_reg_data["bs_iat_feedback_level"]>=1) & (lab_reg_data["bs_iat_feedback_level"]<=5))]
a2_data_bs = lab_reg_data[((lab_reg_data["bs_iat_feedback_level"]>=1) & (lab_reg_data["bs_iat_feedback_level"]<=5))]

In [35]:
a1_data_bs[["iat_sc_residuzalized", "en_iat_score"]].count()

iat_sc_residuzalized    5
en_iat_score            5
dtype: int64

In [36]:
a2_data_bs[["iat_sc_residuzalized", "en_iat_score"]].count()

iat_sc_residuzalized    10
en_iat_score            10
dtype: int64

## Endline

### Filtering the data for calculating A1 and A2

- A1_{endline} = E[Y| request between 0.1 and 0.5_{endline} ; seeing for sure_{endline}]
- A2_{endline} = E[Y| request between 0.1 and 0.5_{endline} ; seeing according to demand_{endline}]

In [15]:
a1_data_en = lab_reg_data[(lab_reg_data["en_iat_show_feedback"]==1) & ((lab_reg_data["en_iat_feedback_level"]>=1) & (lab_reg_data["en_iat_feedback_level"]<=5))]
a2_data_en = lab_reg_data[((lab_reg_data["en_iat_feedback_level"]>=1) & (lab_reg_data["en_iat_feedback_level"]<=5))]

### Displaying the data

In [33]:
a1_data_en[["iat_sc_residuzalized", "en_iat_score"]].count()

iat_sc_residuzalized    5
en_iat_score            7
dtype: int64

In [34]:
a2_data_en[["iat_sc_residuzalized", "en_iat_score"]].count()

iat_sc_residuzalized     9
en_iat_score            19
dtype: int64

# IAT Algebra for E2

- A1_{stage} = E1_{stage}
- A2_{stage} = p_{stage} * E1_{stage} - (1 - p_{stage}) * E2_{stage}

## Baseline

### Obtaining A1 and A2

In [18]:
a1_iat_sc_bs = a1_data_bs["iat_sc_residuzalized"].mean() # iat score change
a1_iat_bs = a1_data_bs["en_iat_score"].mean() # iat score endline

a2_iat_sc_bs = a2_data_bs["iat_sc_residuzalized"].mean() # iat score change
a2_iat_bs = a2_data_bs["en_iat_score"].mean() # iat score endline

### Obtaining probabilities

In [19]:
freq_iat_score_bs = a2_data_bs["bs_iat_feedback_level"].value_counts()
freq_iat_score_bs

5.0    20
1.0    12
2.0     3
3.0     2
4.0     2
Name: bs_iat_feedback_level, dtype: int64

In [20]:
total_participants_bs_1_5 = freq_iat_score_bs.sum() 
p_bs = (12/total_participants_bs_1_5)*0.1 \
    + (3/total_participants_bs_1_5)*0.2 \
    + (2/total_participants_bs_1_5)*0.3 \
    + (2/total_participants_bs_1_5)*0.4 \
    + (20/total_participants_bs_1_5)*0.5 

In [21]:
p_bs

0.3384615384615384

### Calculating E2

- E2_{baseline} = ( A2_{baseline} - p_{baseline} * E1_{baseline} ) / ( 1 - p_{baseline} ) 

In [22]:
e2_iat_sc_bs = (a2_iat_sc_bs - p_bs*a1_iat_sc_bs)/(1-p_bs)
e2_iat_bs = (a2_iat_bs - p_bs*a1_iat_bs)/(1-p_bs)

### iat score change

In [23]:
e2_iat_sc_bs

-0.2964768058992315

### endline iat score

In [24]:
e2_iat_bs

-0.31052600679442627

## Endline

### Obtaining A1 and A2

In [25]:
a1_iat_sc_en = a1_data_en["iat_sc_residuzalized"].mean() # iat score change
a1_iat_en = a1_data_en["en_iat_score"].mean() # iat score endline

a2_iat_sc_en = a2_data_en["iat_sc_residuzalized"].mean() # iat score change
a2_iat_en = a2_data_en["en_iat_score"].mean() # iat score endline

### Obtaining probabilities

In [26]:
freq_iat_score_en = a2_data_en["en_iat_feedback_level"].value_counts()
freq_iat_score_en

1.0    7
5.0    6
4.0    3
3.0    2
2.0    1
Name: en_iat_feedback_level, dtype: int64

In [27]:
total_participants_en_1_5 = freq_iat_score_en.sum() 
p_en = (7/total_participants_en_1_5)*0.1 \
    + (1/total_participants_en_1_5)*0.2 \
    + (2/total_participants_en_1_5)*0.3 \
    + (3/total_participants_en_1_5)*0.4 \
    + (6/total_participants_en_1_5)*0.5 

In [28]:
p_en

0.3

### Calculating E2

- E2_{endline} = ( A2_{endline} - p_{endline} * E1_{endline} ) / ( 1 - p_{endline} ) 

In [29]:
e2_iat_sc_en = (a2_iat_sc_en - p_en*a1_iat_sc_en)/(1-p_en)
e2_iat_en = (a2_iat_en - p_en*a1_iat_en)/(1-p_en)

In [30]:
e2_iat_sc_en

-0.18215627154310238

In [31]:
e2_iat_en

-0.18287389752656125