In [2]:
import numpy as np
import pandas as pd

%matplotlib inline

# Functions

Confusion matrix: By definition a confusion matrix C is such that $C_{i, j}$ is equal to the number of observations known to be in group i but predicted to be in group j

In [3]:
def delta_method(beta, grad_beta, sd_beta):
    
    var_beta = sd_beta**2
    
    var_beta_mm = (grad_beta**2)*var_beta
    
    return var_beta_mm  

In [4]:
def transf_v1(P, P_hat, r0, r1, termo_erro):
    
    numerador = ((1- r0 - r1)*P*(1-P)) - termo_erro

    denominador = (P_hat*(1-P_hat)) - termo_erro
    
    transf = numerador/denominador
    
    return transf

In [5]:
def transf_v2(P, r0, r1, termo_cov):
    
    var_u = r1 + (r0 - r1)*P - (r1 - (r0 + r1)*P)**2
    cov_x_u = -(r1 + r0)*P*(1-P)
    
    numerador = var_u + cov_x_u
    denominador = termo_cov #nesse caso o termo já é o inverso
    
    transf = 1 - (numerador*denominador)
    
    return transf

## Testing

In [11]:
var_c = 0.25004973120490126 #\% of schools in municipalities with corruption
termo = 5.36
covs = var_c - (1/termo)
covs

0.0634825670257968

In [12]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = 0.05540628 #R
r_1 = 0.1860478 #R

In [13]:
1/transf_v1(P_true, P_est, r_0, r_1, covs)

1.6562451958336055

In [14]:
1/transf_v2(P_true, r_0, r_1, termo)

1.6734756132693105

### Negative Binomial

In [15]:
matrix_total = np.array([[903, 50],[46, 252]])
print(matrix_total)

[[903  50]
 [ 46 252]]


In [16]:
sum(sum(matrix_total))

1251

In [17]:
pi_00 = matrix_total[0][0]/sum(sum(matrix_total))
pi_01 = matrix_total[0][1]/sum(sum(matrix_total))
pi_11 = matrix_total[1][1]/sum(sum(matrix_total))
pi_10 = matrix_total[1][0]/sum(sum(matrix_total))

In [18]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = 0.05540628 #R
r_1 = 0.1860478 #R
termo_bin_neg_1 = transf_v1(P_true, P_est, r_0, r_1, covs)
termo_bin_neg_2 = transf_v2(P_true, r_0, r_1, termo)
print(round(1/termo_bin_neg_1,2),'\n', 1/termo_bin_neg_2)

1.66 
 1.6734756132693105


### Classification

In [19]:
matrix_educ = np.array([[200, 17],[10, 86]])
print(matrix_educ)

[[200  17]
 [ 10  86]]


In [20]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = matrix_educ[0][1]/sum(matrix_educ.T)[0]
r_1 = matrix_educ[1][0]/sum(matrix_educ.T)[1]
termo_class_1 = transf_v1(P_true, P_est, r_0, r_1, covs)
termo_class_2 = transf_v2(P_true, r_0, r_1, termo)
print(round(1/termo_class_1,2),'\n', 1/termo_class_2)

1.47 
 1.38625919645


## Betas

In [6]:
variables = ['mat' ,'port', 'fail', 'drop']
betas = [-0.1098, -0.1105, 0.0059, 0.0030]
sd = [0.025, 0.024, 0.003, 0.001]
vec_termos = [5.3444757158, 5.3444757158, 5.35881757256, 5.35881757256]

In [9]:
var_c = 0.25004973120490126 #\% of schools in municipalities with corruption
termo = 5.35
covs = var_c - (1/termo)
covs

0.06313384335443395

### Negative Binomial

In [7]:
matrix_educ = np.array([[200, 17],[10, 86]])

In [10]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = 0.05540628 #R
r_1 = 0.1860478 #R

betas_mm_nb = []
sd_mm_nb = []
test_nb = []
for i in range(len(variables)):
    grad_beta_nb = 1/transf_v1(P_true, P_est, r_0, r_1,covs)
    var = delta_method(betas[i], grad_beta_nb, sd[i])

    betas_mm_nb.append(grad_beta_nb*betas[i])
    sd_mm_nb.append(np.sqrt(var))
    test_nb.append(grad_beta_nb*betas[i]/np.sqrt(var))

print('coef:', betas_mm_nb,
      '\n sd:', sd_mm_nb,
      '\n t-test:', test_nb)

coef: [-0.1816261117295699, -0.1827840195456965, 0.009759508735924067, 0.004962462069113932] 
 sd: [0.041353850575949438, 0.039699696552911459, 0.0049624620691139324, 0.0016541540230379773] 
 t-test: [-4.3919999999999995, -4.604166666666667, 1.9666666666666666, 3.0000000000000004]


#### We used this transformation:

In [11]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = 0.05540628 #R
r_1 = 0.1860478 #R

betas_mm_nb = []
sd_mm_nb = []
test_nb = []
for i in range(len(variables)):
    termo = vec_termos[i]
    grad_beta_nb = 1/transf_v2(P_true, r_0, r_1, termo)
    print(grad_beta_nb)
    var = delta_method(betas[i], grad_beta_nb, sd[i])

    betas_mm_nb.append(grad_beta_nb*betas[i])
    sd_mm_nb.append(np.sqrt(var))
    test_nb.append(grad_beta_nb*betas[i]/np.sqrt(var))

print('coef:', betas_mm_nb,
      '\n sd:', sd_mm_nb,
      '\n t-test:', test_nb)

1.6702176832840263
1.6702176832840263
1.6732270216668244
1.6732270216668244
coef: [-0.18338990162458607, -0.18455905400288491, 0.009872039427834264, 0.0050196810650004735] 
 sd: [0.041755442082100663, 0.040085224398816628, 0.0050196810650004735, 0.0016732270216668244] 
 t-test: [-4.3919999999999986, -4.604166666666667, 1.9666666666666666, 3.0000000000000004]


### Classification

In [12]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = matrix_educ[0][1]/sum(matrix_educ.T)[0]
r_1 = matrix_educ[1][0]/sum(matrix_educ.T)[1]

betas_mm_class = []
sd_mm_class = []
test_class = []
for i in range(len(variables)):
    grad_beta_class = 1/transf_v1(P_true, P_est, r_0, r_1,covs)
    var = delta_method(betas[i], grad_beta_class, sd[i])

    betas_mm_class.append(grad_beta_class*betas[i])
    sd_mm_class.append(np.sqrt(var))
    test_class.append(grad_beta_class*betas[i]/np.sqrt(var))

print('coef:', betas_mm_class,
      '\n sd:', sd_mm_class,
      '\n t-test:', test_class)

coef: [-0.16179910498970762, -0.16283061112352182, 0.0086941231278622501, 0.0044207405734892798] 
 sd: [0.036839504779077333, 0.035365924587914238, 0.0044207405734892798, 0.0014735801911630933] 
 t-test: [-4.3919999999999995, -4.604166666666667, 1.9666666666666666, 3.0]


#### We used this transformation:

In [13]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = matrix_educ[0][1]/sum(matrix_educ.T)[0]
r_1 = matrix_educ[1][0]/sum(matrix_educ.T)[1]

betas_mm_class = []
sd_mm_class = []
test_class = []
for i in range(len(variables)):
    termo = vec_termos[i]
    grad_beta_class = 1/transf_v2(P_true, r_0, r_1, termo)
    print(grad_beta_class)
    var = delta_method(betas[i], grad_beta_class, sd[i])

    betas_mm_class.append(grad_beta_class*betas[i])
    sd_mm_class.append(np.sqrt(var))
    test_class.append(grad_beta_class*betas[i]/np.sqrt(var))

print('coef:', betas_mm_class,
      '\n sd:', sd_mm_class,
      '\n t-test:', test_class)

1.38471007852
1.38471007852
1.38614108391
1.38614108391
coef: [-0.1520411666210354, -0.15301046367599647, 0.008178232395098229, 0.0041584232517448624] 
 sd: [0.034617751962895127, 0.033233041884379325, 0.0041584232517448624, 0.0013861410839149542] 
 t-test: [-4.3920000000000003, -4.6041666666666661, 1.9666666666666666, 3.0]


### Private schools

In [15]:
variables = ['fail', 'drop']
betas = [0.0007, 0.0891]
sd = [0.002, 0.076]
vec_termos = [74.516442587, 74.516442587]

In [16]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = 0.05540628 #R
r_1 = 0.1860478 #R

betas_mm_nb = []
sd_mm_nb = []
test_nb = []
for i in range(len(variables)):
    termo = vec_termos[i]
    grad_beta_nb = 1/transf_v2(P_true, r_0, r_1, termo)
    print(grad_beta_nb)
    var = delta_method(betas[i], grad_beta_nb, sd[i])

    betas_mm_nb.append(grad_beta_nb*betas[i])
    sd_mm_nb.append(np.sqrt(var))
    test_nb.append(grad_beta_nb*betas[i]/np.sqrt(var))

print('coef:', betas_mm_nb,
      '\n sd:', sd_mm_nb,
      '\n t-test:', test_nb)

-0.21763412929866913
-0.21763412929866913
coef: [-0.00015234389050906838, -0.019391200920511418] 
 sd: [0.00043526825859733825, 0.016540193826698853] 
 t-test: [-0.34999999999999998, -1.1723684210526315]
