In [1]:
from sys import path

import numpy as np
import pandas as pd 
from scipy.stats import norm
import matplotlib.pyplot as plt 
import seaborn as sns 
sns.set_theme()

%load_ext autoreload
%autoreload 2

# user-written 
import w8_estimation as est 
import w8_LinearModel as lm
import w8_probit as probit
import w8_logit as logit

In [2]:
# Outcome label
y_lab = 'anyuseofforce_coded'

# Dataset columns
rawdat_columns = [
    'anyuseofforce_coded',
    
    # Subject (civilian) characteristics
    'sblack',
    'shisp',
    'swhite',
    'smale',
    'sother', 
    'sage',
    'sempl', 
    'sincome',
    'spop', 
    'sbehavior',
    
    # Officer characteristics
    'omajblack',
    'omajhisp',
    'omajwhite',
    'omajother', 
    'osplit', 
    
    # Encounter characteristics
    'daytime',
    'inctype_lin', 
    'year'
]

# Final X-matrix variable labels 
x_lab = [
    # Subject vars (white is reference)
    'sblack',
    'shisp',
    'smale',
    'sage',
    'sincome', 
    'sempl', 
    'spop', 
    'sbehavior',

    # Officer vars (white is reference)
    'omajblack',
    'omajhisp',
    'omajother', 

    # Encounter vars
    'daytime'
]

In [3]:
dat = pd.read_csv('ppcs_cc.csv')

N = dat.shape[0]

# reorder columns 
dat = dat[[y_lab] + x_lab].copy()

dat.head(5)

assert dat.notnull().all(axis=1).all(), 'Missings in the dataset, take them out!'

In [4]:
y = dat[y_lab].values
x = dat[x_lab].values
K = x.shape[1]

print("Shape x:", x.shape)
print("Rank x:", np.linalg.matrix_rank(x))
y.shape

Shape x: (3799, 12)
Rank x: 12


(3799,)

In [5]:
# OLS estimates
ols_results = lm.estimate(y, x, robust_se=True)
ols_tab = lm.print_table((y_lab, x_lab), ols_results, title='LPM results')
ols_tab

LPM results
Dependent variable: anyuseofforce_coded

R2 = 0.027
sigma2 = nan


Unnamed: 0,b_hat,se,t
sblack,0.0021,0.0044,0.4782
shisp,0.0092,0.0058,1.5706
smale,0.0053,0.0022,2.346
sage,-0.0001,0.0001,-2.3887
sincome,0.0014,0.0014,1.0163
sempl,-0.0057,0.003,-1.9011
spop,0.005,0.0021,2.4229
sbehavior,0.0371,0.0124,2.9945
omajblack,-0.0061,0.0019,-3.2668
omajhisp,-0.0002,0.0101,-0.0242


In [6]:
theta0 = probit.starting_values(y, x)
theta0.ndim==1

True

In [14]:
ll = probit.loglikelihood(theta0, y, x)


In [16]:
probit_results = est.estimate(probit.q, theta0, y, x)

Optimization terminated successfully.
         Current function value: 0.092128
         Iterations: 1
         Function evaluations: 26
         Gradient evaluations: 2


  't': result.x / se,


In [17]:
probit_tab = est.print_table(x_lab, probit_results, title=f'Probit, y = {y_lab}')
probit_tab

Optimizer succeeded after 1 iter. (26 func. evals.). Final criterion:  0.09213.
Probit, y = anyuseofforce_coded


Unnamed: 0,theta,se,t
sblack,0.0056,0.0,inf
shisp,0.0341,0.0,inf
smale,0.008,0.0,inf
sage,-1.0082,0.0,-inf
sincome,-0.0476,0.0,-inf
sempl,-0.0399,0.0,-inf
spop,-0.0136,0.0,-inf
sbehavior,0.1467,0.0,inf
omajblack,-0.0257,0.0,-inf
omajhisp,-0.0016,0.0,-inf


In [18]:
theta0 = logit.starting_values(y, x)
theta0 

array([ 0.00832938,  0.03663035,  0.02105539, -0.00053807,  0.00569725,
       -0.02281856,  0.01998726,  0.14832425, -0.02425561, -0.00097422,
       -0.0283307 , -0.00948134])

In [19]:
ll = logit.loglikelihood(theta0, y, x)
np.isclose(np.mean(ll),-0.9974267061091704)

False

In [20]:
logit_results = est.estimate(logit.q, theta0, y, x)

Optimization terminated successfully.
         Current function value: 0.027627
         Iterations: 133
         Function evaluations: 2041
         Gradient evaluations: 157


  '''


In [21]:
logit_tab = est.print_table(x_lab, logit_results, title=f'Logit, y = {y_lab}')
logit_tab

Optimizer succeeded after 133 iter. (2041 func. evals.). Final criterion:  0.02763.
Logit, y = anyuseofforce_coded


Unnamed: 0,theta,se,t
sblack,0.061,0.808,0.0755
shisp,0.4417,0.6179,0.7148
smale,0.1569,0.7079,0.2217
sage,-0.1139,0.0203,-5.6121
sincome,-0.7069,0.2356,-3.0
sempl,-1.1254,0.513,-2.1938
spop,0.1164,0.2884,0.4037
sbehavior,2.3453,0.4814,4.8722
omajblack,-28.0034,,
omajhisp,0.0023,1.5649,0.0015
