In [2]:
import numpy as np
import pandas as pd

%matplotlib inline

# Functions

Confusion matrix: By definition a confusion matrix C is such that $C_{i, j}$ is equal to the number of observations known to be in group i but predicted to be in group j

In [6]:
def delta_method(beta, grad_beta, sd_beta):
    
    var_beta = sd_beta**2
    
    var_beta_mm = (grad_beta**2)*var_beta
    
    return var_beta_mm  

In [7]:
def transf_v1(P, P_hat, r0, r1, termo_erro):
    
    numerador = ((1- r0 - r1)*P*(1-P)) - termo_erro

    denominador = (P_hat*(1-P_hat)) - termo_erro
    
    transf = numerador/denominador
    
    return transf

In [8]:
def transf_v2(P, r0, r1, termo_cov):
    
    var_u = r1 + (r0 - r1)*P - (r1 - (r0 + r1)*P)**2
    cov_x_u = -(r1 + r0)*P*(1-P)
    
    numerador = var_u + cov_x_u
    denominador = termo_cov #nesse caso o termo já é o inverso
    
    transf = 1 - (numerador*denominador)
    
    return transf

## Testing

In [11]:
var_c = 0.25004973120490126 #\% of schools in municipalities with corruption
termo = 5.36
covs = var_c - (1/termo)
covs

0.0634825670257968

In [12]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = 0.05540628 #R
r_1 = 0.1860478 #R

In [13]:
1/transf_v1(P_true, P_est, r_0, r_1, covs)

1.6562451958336055

In [14]:
1/transf_v2(P_true, r_0, r_1, termo)

1.6734756132693105

### Negative Binomial

In [15]:
matrix_total = np.array([[903, 50],[46, 252]])
print(matrix_total)

[[903  50]
 [ 46 252]]


In [16]:
sum(sum(matrix_total))

1251

In [17]:
pi_00 = matrix_total[0][0]/sum(sum(matrix_total))
pi_01 = matrix_total[0][1]/sum(sum(matrix_total))
pi_11 = matrix_total[1][1]/sum(sum(matrix_total))
pi_10 = matrix_total[1][0]/sum(sum(matrix_total))

In [18]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = 0.05540628 #R
r_1 = 0.1860478 #R
termo_bin_neg_1 = transf_v1(P_true, P_est, r_0, r_1, covs)
termo_bin_neg_2 = transf_v2(P_true, r_0, r_1, termo)
print(round(1/termo_bin_neg_1,2),'\n', 1/termo_bin_neg_2)

1.66 
 1.6734756132693105


### Classification

In [19]:
matrix_educ = np.array([[200, 17],[10, 86]])
print(matrix_educ)

[[200  17]
 [ 10  86]]


In [20]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = matrix_educ[0][1]/sum(matrix_educ.T)[0]
r_1 = matrix_educ[1][0]/sum(matrix_educ.T)[1]
termo_class_1 = transf_v1(P_true, P_est, r_0, r_1, covs)
termo_class_2 = transf_v2(P_true, r_0, r_1, termo)
print(round(1/termo_class_1,2),'\n', 1/termo_class_2)

1.47 
 1.38625919645


## Betas

In [21]:
variables = ['mat' ,'port', 'fail', 'drop']
betas = [-0.1101, -0.1110, 0.0059, 0.0030]
sd = [0.025, 0.024, 0.003, 0.001]
vec_termos = [5.34589020938, 5.34589020938, 5.36021844346, 5.36021844346]

### Negative Binomial

In [36]:
matrix_educ = np.array([[200, 17],[10, 86]])

In [30]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = 0.05540628 #R
r_1 = 0.1860478 #R

betas_mm_nb = []
sd_mm_nb = []
test_nb = []
for i in range(len(variables)):
    grad_beta_nb = 1/transf_v1(P_true, P_est, r_0, r_1,covs)
    var = delta_method(betas[i], grad_beta_nb, sd[i])

    betas_mm_nb.append(grad_beta_nb*betas[i])
    sd_mm_nb.append(np.sqrt(var))
    test_nb.append(grad_beta_nb*betas[i]/np.sqrt(var))

print('coef:', betas_mm_nb,
      '\n sd:', sd_mm_nb,
      '\n t-test:', test_nb)

coef: [-0.18235259606128, -0.1838432167375302, 0.009771846655418272, 0.004968735587500816] 
 sd: [0.041406129895840145, 0.039749884700006531, 0.0049687355875008164, 0.0016562451958336056] 
 t-test: [-4.4039999999999999, -4.625, 1.9666666666666668, 2.9999999999999996]


#### We used this transformation:

In [37]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = 0.05540628 #R
r_1 = 0.1860478 #R

betas_mm_nb = []
sd_mm_nb = []
test_nb = []
for i in range(len(variables)):
    termo = vec_termos[i]
    grad_beta_nb = 1/transf_v2(P_true, r_0, r_1, termo)
    print(grad_beta_nb)
    var = delta_method(betas[i], grad_beta_nb, sd[i])

    betas_mm_nb.append(grad_beta_nb*betas[i])
    sd_mm_nb.append(np.sqrt(var))
    test_nb.append(grad_beta_nb*betas[i]/np.sqrt(var))

print('coef:', betas_mm_nb,
      '\n sd:', sd_mm_nb,
      '\n t-test:', test_nb)

1.6705140039052677
1.6705140039052677
1.6735215465473279
1.6735215465473279
coef: [-0.18392359182996998, -0.18542705443348473, 0.009873777124629234, 0.005020564639641983] 
 sd: [0.041762850097631699, 0.040092336093726429, 0.0050205646396419834, 0.0016735215465473279] 
 t-test: [-4.4039999999999999, -4.625, 1.9666666666666668, 3.0]


### Classification

In [33]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = matrix_educ[0][1]/sum(matrix_educ.T)[0]
r_1 = matrix_educ[1][0]/sum(matrix_educ.T)[1]

betas_mm_class = []
sd_mm_class = []
test_class = []
for i in range(len(variables)):
    grad_beta_class = 1/transf_v1(P_true, P_est, r_0, r_1,covs)
    var = delta_method(betas[i], grad_beta_class, sd[i])

    betas_mm_class.append(grad_beta_class*betas[i])
    sd_mm_class.append(np.sqrt(var))
    test_class.append(grad_beta_class*betas[i]/np.sqrt(var))

print('coef:', betas_mm_class,
      '\n sd:', sd_mm_class,
      '\n t-test:', test_class)

coef: [-0.16238961429801332, -0.16371704983723412, 0.0087020774237809122, 0.0044247851307360579] 
 sd: [0.036873209422800479, 0.035398281045888463, 0.0044247851307360579, 0.0014749283769120191] 
 t-test: [-4.4040000000000008, -4.6249999999999991, 1.9666666666666663, 3.0000000000000004]


#### We used this transformation:

In [38]:
P_est = 0.423559 #municipalities audited in Education
P_true = 0.35 #Paper Ferraz, Finan, Moreira
r_0 = matrix_educ[0][1]/sum(matrix_educ.T)[0]
r_1 = matrix_educ[1][0]/sum(matrix_educ.T)[1]

betas_mm_class = []
sd_mm_class = []
test_class = []
for i in range(len(variables)):
    termo = vec_termos[i]
    grad_beta_class = 1/transf_v2(P_true, r_0, r_1, termo)
    print(grad_beta_class)
    var = delta_method(betas[i], grad_beta_class, sd[i])

    betas_mm_class.append(grad_beta_class*betas[i])
    sd_mm_class.append(np.sqrt(var))
    test_class.append(grad_beta_class*betas[i]/np.sqrt(var))

print('coef:', betas_mm_class,
      '\n sd:', sd_mm_class,
      '\n t-test:', test_class)

1.38485108287
1.38485108287
1.38628101894
1.38628101894
coef: [-0.15247210422354662, -0.15371847019812601, 0.0081790580117448494, 0.0041588430568194152] 
 sd: [0.034621277071650004, 0.033236425988784006, 0.0041588430568194152, 0.0013862810189398051] 
 t-test: [-4.4039999999999999, -4.6249999999999991, 1.9666666666666666, 3.0]
