In [43]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

### Setting up paths

In [23]:
lab_path = r"D:\Accesos directos\Trabajo\World Bank\Peru Amag\peru-amag-stats\Data"
lab_data_name = "\Clean_Full_Data12.dta"

In [24]:
lab_data = pd.read_stata(lab_path + lab_data_name)

In [25]:
# renaming gender column
lab_data.columns = lab_data.columns.str.replace('GÃ©nero', 'Gender')

In [26]:
judges_characteristics = ["Age_rounded", "Cargo", "Género"]

### Generating dummies

In [107]:
position_dummies = pd.get_dummies(lab_data["Cargo"]).drop(["", "ASIS"], axis=1)
gender_dummies = pd.get_dummies(lab_data["Gender"]).drop(["", "Masculino"], axis=1)
course_dummies = pd.get_dummies(lab_data["Curso"]).drop([""], axis=1)
course_dummies.columns = ["Control", "Interpretacion", "Jurisprudencia", "Razonamiento", "Virtudes", "Etica"]
course_dummies = course_dummies.drop(columns = ["Control"])

### Preparing data for OLS

In [111]:
# merging data with dummies
lab_reg_data = pd.concat([lab_data, position_dummies, gender_dummies, department_dummies, course_dummies], axis=1)

# squaring age
lab_reg_data["Age_squared"] = lab_reg_data["Age_rounded"]**2

# generating outcomes
lab_reg_data["iat_score_change"] = lab_reg_data["en_iat_score"] - lab_reg_data["bs_iat_score"]

### OLS regression

In [112]:
covariates = ["Age_rounded", "Age_squared", "bs_iat_score"] + list(position_dummies.columns) + list(gender_dummies.columns) + list(course_dummies.columns)

In [117]:
score_change_reg_output = sm.OLS(lab_reg_data["iat_score_change"], sm.add_constant(lab_reg_data[covariates]), missing="drop").fit()
en_score_reg_output = sm.OLS(lab_reg_data["en_iat_score"], sm.add_constant(lab_reg_data[covariates]), missing="drop").fit()

In [119]:
lab_reg_data["iat_sc_residuzalized"] = lab_reg_data["iat_score_change"] - score_change_reg_output.params["bs_iat_score"]*lab_reg_data["bs_iat_score"]

### Filtering the data

In [142]:
e1_data = lab_reg_data[(lab_reg_data["en_iat_show_feedback"]==1) | (lab_reg_data["en_iat_feedback_level"]==10) | ((lab_reg_data["en_iat_feedback_level"]>=1) & (lab_reg_data["en_iat_feedback_level"]<=5))]
e2_data = lab_reg_data[((lab_reg_data["en_iat_feedback_level"]>=1) & (lab_reg_data["en_iat_feedback_level"]<=5))]

In [143]:
e2_data

Unnamed: 0,en_participant_index_in_pages,en_participant_current_app_name,en_participant_current_page_name,en_participant_time_started,en_participant_payoff,DNI,en_survey_qvsr1playerqvsr_4,en_survey_qvsr1playerqvsr_8,en_survey_qvsr1playerqvsr_6,en_survey_qvsr1playerqvsr_5,...,TUMBES,UCAYALI,Interpretacion,Jurisprudencia,Razonamiento,Virtudes,Etica,Age_squared,iat_score_change,iat_sc_residuzalized
23,40.0,prize,ThankYou,2021-07-14 04:45:17.169171+00:00,10.0,41632419.0,1.0,1.0,1.0,1.0,...,0,0,0,0,1,0,0,1444.0,,
35,40.0,prize,ThankYou,2021-07-13 02:31:22.323671+00:00,5.0,46874623.0,1.0,1.0,1.0,1.0,...,0,0,0,1,0,0,0,900.0,-0.09807,-0.172974
107,31.0,motivated_reasoning,CaseDescription,2021-07-14 03:09:26.317158+00:00,10.0,45758347.0,1.0,1.0,1.0,1.0,...,0,0,1,0,0,0,0,1024.0,,
126,40.0,prize,ThankYou,2021-07-14 21:35:00.221102+00:00,5.0,16716284.0,0.0,0.0,1.0,1.0,...,0,0,0,0,0,0,1,2209.0,,
141,40.0,prize,ThankYou,2021-07-14 15:36:56.520682+00:00,5.0,1888713.0,1.0,1.0,1.0,1.0,...,0,0,0,0,0,0,0,1936.0,-0.18305,-0.697558
204,40.0,prize,ThankYou,2021-07-11 16:47:47.907876+00:00,5.0,41888215.0,1.0,1.0,1.0,1.0,...,0,0,0,0,1,0,0,1444.0,-0.295152,-0.421948
209,40.0,prize,ThankYou,2021-07-10 19:38:28.674139+00:00,8.0,43719141.0,1.0,1.0,1.0,1.0,...,0,0,0,0,0,0,0,1225.0,,
211,40.0,prize,ThankYou,2021-07-04 14:49:12.848518+00:00,9.0,10542219.0,1.0,1.0,1.0,1.0,...,0,0,0,1,0,0,0,2809.0,0.134916,0.202687
254,40.0,prize,ThankYou,2021-07-03 15:09:59.021212+00:00,7.0,8571704.0,0.0,1.0,1.0,1.0,...,0,0,0,0,0,1,0,3721.0,,
275,40.0,prize,ThankYou,2021-07-04 00:19:30.430183+00:00,5.0,23946540.0,1.0,1.0,1.0,1.0,...,0,0,0,0,0,1,0,2304.0,0.287112,-0.261897


In [139]:
e2_data = e2_data[e2_data["iat_score_change"].notna()]

In [130]:
e1 = e1_data["iat_score_change"].mean()