In [1]:
from sys import path

import numpy as np
import pandas as pd 
from scipy.stats import norm
import matplotlib.pyplot as plt 
import seaborn as sns 
sns.set_theme()

%load_ext autoreload
%autoreload 2

# user-written 
import w8_estimation as est 
import w8_LinearModel as lm
import w8_probit as probit
import w8_logit as logit
import marginal_effects as me


In [2]:
# Outcome label
y_lab = 'anyuseofforce_coded'

# Dataset columns
rawdat_columns = [
    'anyuseofforce_coded',
    
    # Subject (civilian) characteristics
    'sblack',
    'shisp',
    'swhite',
    'smale',
    'sother', 
    'sage',
    'sempl', 
    'sincome',
    'spop', 
    'sbehavior',
    
    # Officer characteristics
    'omajblack',
    'omajhisp',
    'omajwhite',
    'omajother', 
    'osplit', 
    
    # Encounter characteristics
    'daytime',
    'inctype_lin', 
    'year'
]


In [3]:
# Final X-matrix variable labels 
x_lab = [
    # Subject vars (white is reference)
    'sblack',
    'shisp',
    'smale',
    'sage',
    'sincome',

    # Officer vars (white is reference)
    #'omajblack', --> Udelukkes fordi der ikke er nogen
    'omajhisp',
    #'omajother',

    # Encounter vars
    'daytime'
]


In [4]:
dat = pd.read_csv('ppcs_cc.csv')

N = dat.shape[0]

# reorder columns 
dat = dat[[y_lab] + x_lab].copy()

dat.head(5)

assert dat.notnull().all(axis=1).all(), 'Missings in the dataset, take them out!'

In [5]:
y = dat[y_lab].values
x = dat[x_lab].values
K = x.shape[1]

print("Shape x:", x.shape)
print("Rank x:", np.linalg.matrix_rank(x))
y.shape

Shape x: (3799, 7)
Rank x: 7


(3799,)

In [6]:
# OLS estimates
ols_results = lm.estimate(y, x, robust_se=True)
ols_tab = lm.print_table((y_lab, x_lab), ols_results, title='LPM results')
ols_tab

LPM results
Dependent variable: anyuseofforce_coded

R2 = 0.005
sigma2 = nan


Unnamed: 0,b_hat,se,t
sblack,0.0046,0.0042,1.1116
shisp,0.0123,0.0063,1.9493
smale,0.0061,0.0023,2.6734
sage,-0.0001,0.0,-1.372
sincome,0.0015,0.0012,1.2573
omajhisp,0.0041,0.0107,0.3799
daytime,-0.0018,0.0025,-0.7288


In [7]:
theta0 = probit.starting_values(y, x)
theta0.ndim==1

True

In [8]:
ll = probit.loglikelihood(theta0, y, x)
np.isclose(np.mean(ll), -1.0411283428047824)

False

In [9]:
#probit_results = est.estimate(probit.q, theta0, y, x)

In [10]:
#probit_tab = est.print_table(x_lab, probit_results, title=f'Probit, y = {y_lab}')
#probit_tab

In [11]:
theta0 = logit.starting_values(y, x)
theta0 

array([ 0.01851166,  0.04912049,  0.02430241, -0.00025994,  0.00592229,
        0.01625578, -0.00735115])

In [12]:
ll = logit.loglikelihood(theta0, y, x)
print("Logit log-likelihood at starting values:", ll)

Logit log-likelihood at starting values: [-0.71166872 -0.71818285 -0.71415777 ... -0.68261107 -0.68810016
 -0.68567423]


In [13]:
logit_results = est.estimate(logit.q, theta0, y, x)

Optimization terminated successfully.
         Current function value: 0.031885
         Iterations: 74
         Function evaluations: 776
         Gradient evaluations: 97


In [14]:
logit_tab = est.print_table(x_lab, logit_results, title=f'Logit, y = {y_lab}')
logit_tab

Optimizer succeeded after 74 iter. (776 func. evals.). Final criterion:  0.03189.
Logit, y = anyuseofforce_coded


Unnamed: 0,theta,se,t
sblack,0.01,0.7271,0.0138
shisp,0.5832,0.5309,1.0985
smale,0.2462,0.6762,0.3641
sage,-0.1136,0.0189,-6.005
sincome,-0.7457,0.2088,-3.5714
omajhisp,0.4801,1.318,0.3642
daytime,-0.9084,0.5815,-1.5622


In [15]:
# Reference covariate profile used for marginal effects
# sincome takes values 1 (low) - 3 (high); use 2 as the midpoint reference
x_ref = np.array([0, 0, 1, 25, 2, 1, 1], dtype=float)
pd.DataFrame(x_ref.reshape(1, -1), columns=x_lab, index=['x_ref'])


Unnamed: 0,sblack,shisp,smale,sage,sincome,omajhisp,daytime
x_ref,0.0,0.0,1.0,25.0,2.0,1.0,1.0


In [16]:
# Delta-method partial effects for logit across all regressors
b_lg = logit_results['theta']
cov_lg = logit_results['cov']

binary_vars = ['sblack', 'shisp', 'smale', 'omajhisp', 'daytime']
continuous_vars = ['sage', 'sincome']

me_rows = []

for var in binary_vars:
    idx = x_lab.index(var)
    x0 = x_ref.copy()
    x1 = x_ref.copy()
    x0[idx] = 0.0
    x1[idx] = 1.0
    effect, se = me.discrete_effect_delta(b_lg, cov_lg, x0, x1, logit.G)
    me_rows.append({
        'Var': var,
        'Effect (LPM)': ols_results['b_hat'][idx],
        's.e. (LPM)': ols_results['se'][idx],
        'Effect (Logit)': effect,
        's.e. (Logit)': se,
    })

for var in continuous_vars:
    idx = x_lab.index(var)
    effect, se = me.continuous_effect_delta(b_lg, cov_lg, x_ref, idx, logit.Gprime)
    me_rows.append({
        'Var': var,
        'Effect (LPM)': ols_results['b_hat'][idx],
        's.e. (LPM)': ols_results['se'][idx],
        'Effect (Logit)': effect,
        's.e. (Logit)': se,
    })

me_tab = pd.DataFrame(me_rows).set_index('Var')
me_tab['t (LPM)'] = me_tab['Effect (LPM)'] / me_tab['s.e. (LPM)']
me_tab['t (Logit)'] = me_tab['Effect (Logit)'] / me_tab['s.e. (Logit)']


In [17]:
me_tab.round(4)


Unnamed: 0_level_0,Effect (LPM),s.e. (LPM),Effect (Logit),s.e. (Logit),t (LPM),t (Logit)
Var,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
sblack,0.0046,0.0042,0.0001,0.0079,1.1116,0.0137
shisp,0.0123,0.0063,0.0084,0.0104,1.9493,0.8078
smale,0.0061,0.0023,0.0023,0.0079,2.6734,0.2982
omajhisp,0.0041,0.0107,0.0041,0.014,0.3799,0.2934
daytime,-0.0018,0.0025,-0.0156,0.0274,-0.7288,-0.5691
sage,-0.0001,0.0,-0.0012,0.0017,-1.372,-0.7333
sincome,0.0015,0.0012,-0.008,0.0109,1.2573,-0.7354
