# Final Project - NOPD Misconduct Complaints

Source: City of New Orleans Open Data, https://catalog.data.gov/dataset/nopd-misconduct-complaints



In [1]:
# Reading & cleaning the data

In [2]:
import pandas as pd
import statsmodels.formula.api as smf
import numpy as np

pd.set_option("display.max_columns", 100)
pd.set_option("display.max_colwidth", 100)



In [3]:
df = pd.read_csv('NOPD_Misconduct_Complaints.csv')
df.head()

Unnamed: 0,Incident Type,Complaint Tracking Number,Date Complaint Occurred,Date Complaint Received by NOPD (PIB),Date Complaint Investigation Complete,Complaint classification,Investigation status,Disposition,Bureau of Complainant,Division of Complainant,Unit of Complainant,Unit Additional Details of Complainant,Working Status of Complainant,Shift of Complainant,Rule Violation,Paragraph Violation,Unique Officer Allegation ID,Officer Race Ethnicity,Officer Gender,Officer Age,Officer years of service,Complainant Gender,Complainant Ethnicity,Complainant Age
0,Public Initiated,2016-0001-P,2016-01-01,2016-01-01,2016-07-21,DI-1,Completed,Unfounded,,8th District,,,,,RULE 3: PROF CONDUCT,PARAGRAPH 01 - Professionalism,30664.0,,,,,Male,Black,
1,Public Initiated,2016-0002-P,2016-01-02,2016-01-01,2016-08-03,DI-1,Completed,Exonerated,FOB - Field Operations Bureau,7th District,Night Watch,Patrol,Regular Working,Between 11pm-7am,RULE 4: PERF OF DUTY,PARAGRAPH 04 - NEGLECT OF DUTY,30667.0,Black,Male,60.0,,Female,White,
2,Public Initiated,2016-0002-P,2016-01-02,2016-01-01,2016-08-03,DI-1,Completed,Exonerated,FOB - Field Operations Bureau,7th District,Night Watch,Patrol,Regular Working,Between 11pm-7am,RULE 4: PERF OF DUTY,PARAGRAPH 04 - NEGLECT OF DUTY,30669.0,Black,Male,44.0,,Female,White,
3,Public Initiated,2016-0009-P,2016-01-04,2016-01-04,2017-03-20,DI-1,Completed,Unfounded,FOB - Field Operations Bureau,8th District,8th District,Patrol,Regular Working,Between 3pm-11pm,RULE 2: MORAL CONDUCT,PARAGRAPH 01 - ADHERENCE TO LAW,30671.0,White,Male,,,,,
4,Public Initiated,2016-0006-P,2016-12-30,2016-01-04,2016-07-25,DI-1,Completed,Exonerated,FOB - Field Operations Bureau,Command Staff,Admin,,,,RULE 4: PERF OF DUTY,PARAGRAPH 02 - INSTRUCTIONS FROM AUTHORITATIVE SOURCE,30674.0,Black,Male,54.0,,Female,Black,50.0


In [4]:
df['Date Complaint Investigation Complete'] = pd.to_datetime(df["Date Complaint Investigation Complete"], format='%Y-%m-%d')

In [5]:
df['year_complete'] = df['Date Complaint Investigation Complete'].dt.year

In [6]:
df['complainant_race_clean'] = df["Complainant Ethnicity"].replace({
    'B':'Black',
    'b' : 'Black',
    'w' : 'White',
    'W':'White',
    'BLACK':'Black',
    'white':'White',
    'black':'Black',
    'Race-Unknown': np.nan,
    'Unknown': np.nan,
    'Unkown':np.nan
})
df.complainant_race_clean.value_counts()

Black       2092
White        684
Hispanic      72
Asian         11
Indian         1
Name: complainant_race_clean, dtype: int64

In [7]:
df['officer_race_clean'] = df['Officer Race Ethnicity'].replace({
    'Black':'Black',
    'White':'White',
    'Hispanic':'Hispanic',
    'Asian/Pacifi':'Asian',
    'Not Specifie':np.nan,
    'Race-Unknown':np.nan,
    'American Ind':'Indigenous',
    'Asian/Pacif':'Asian',
    ' Giving Anything of Value':np.nan,
    'PARAGRAPH 01 - Professionalism':np.nan  
})
df.officer_race_clean.value_counts()

Black         2595
White         1817
Hispanic       238
Asian           77
Indigenous       9
Name: officer_race_clean, dtype: int64

In [8]:
df['officer_age_clean'] = df['Officer Age'].replace({
    '-38': np.nan,
    '-8': np.nan,
    'Female': np.nan,
    'Male': np.nan
})
# df['officer_age_clean'].value_counts()

In [9]:
df['officer_age_clean'] = df['officer_age_clean'].astype(float)

In [10]:
df['officer_gender_clean'] = df['Officer Gender'].replace({
    'Male':'Male',
    'Female':'Female',
    'N': np.nan,
    'Black': np.nan,
    'White': np.nan
})
df.officer_gender_clean.value_counts()

Male      3779
Female     996
Name: officer_gender_clean, dtype: int64

In [11]:
df['incident_type'] = df['Incident Type']
df.incident_type.value_counts()

Public Initiated    3460
Rank Initiated      1830
Name: incident_type, dtype: int64

In [12]:
df['minority'] = df['officer_race_clean'].replace({
    'Black':'M',
    'White':'W',
    'Hispanic':'M',
    'Asian':'M',
    'Indigenous':'M'
})
df.minority.value_counts()

M    2919
W    1817
Name: minority, dtype: int64

In [13]:
#df.head()

# Brainstorming the regression

Definition of each disposition (pg. 18): https://www.nola.gov/getattachment/NOPD/Policies/Chapter-52-1-1-Misconduct-Intake-and-Complaint-Investigation-EFFECTIVE-3-18-18.pdf/

- Is the police department's discplinary board racist? 
     - Were complaints filed against Black officers marked as sustained more often than others? 
         - Controlling for gender, age? 7th district? Year?
         - Public initiated vs. rank initiated? Does who made the complaint factor into the decision? Is the Public more likely to accuse a Black officer?
         - Control for race of the complainant
     - Are they dismissing complaints against white police officers more often than Black?
     - Use unfounded vs. sustained?
       - Unfounded—the investigation determines by a preponderance of the evidence that the alleged misconduct did not occur or did not involve the accused officer.
       - Sustained—the investigation determines by a preponderance of the evidence that the alleged misconduct did occur.
- Are Black officers more often reported for "serious" offenses? By who (public vs. rank)? Are complaints against Black officers more likely to result in a sustained conviction?

In [14]:
# df.Disposition.value_counts()


In [15]:
# df['Division of Complainant'].value_counts()

In [16]:
# df.year_complete.value_counts()

In [17]:
# df['incident_type'].value_counts()

In [18]:
# df['Officer Race Ethnicity'].value_counts()

In [19]:
# df['Complainant Ethnicity'].value_counts()

In [20]:
# df['Officer Age'].value_counts().tail(40)

# Preparing DataFrame for logistic regression

In [21]:
df2 = df[df['Disposition'] != 'Pending']
# df2.shape

In [22]:
df2.Disposition.value_counts()

Unfounded                       1121
Sustained                        801
Not Sustained                    582
Other                            540
Exonerated                       526
NFIM                             359
Withdrawn - Mediation            177
Negotiated Settlement            106
Resigned under investigation       2
Name: Disposition, dtype: int64

In [23]:
df2['dispostion_new'] = df2.Disposition.replace({
    'Sustained':'S',
    'Unfounded': 'O',
    'Not Sustained':'O',
    'Other':'O',
    'Exonerated':'O',
    'NFIM':'O',
    'Withdrawn - Mediation':'O',
    'Negotiated Settlement':'O',
    'Resigned under investigation':'O'
})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['dispostion_new'] = df2.Disposition.replace({


In [24]:
# df2.head()

In [25]:
df2.dispostion_new.value_counts()

O    3413
S     801
Name: dispostion_new, dtype: int64

In [26]:
df2['sustained'] = df2.dispostion_new.replace({'S':1,'O': 0})
df2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['sustained'] = df2.dispostion_new.replace({'S':1,'O': 0})


Unnamed: 0,Incident Type,Complaint Tracking Number,Date Complaint Occurred,Date Complaint Received by NOPD (PIB),Date Complaint Investigation Complete,Complaint classification,Investigation status,Disposition,Bureau of Complainant,Division of Complainant,Unit of Complainant,Unit Additional Details of Complainant,Working Status of Complainant,Shift of Complainant,Rule Violation,Paragraph Violation,Unique Officer Allegation ID,Officer Race Ethnicity,Officer Gender,Officer Age,Officer years of service,Complainant Gender,Complainant Ethnicity,Complainant Age,year_complete,complainant_race_clean,officer_race_clean,officer_age_clean,officer_gender_clean,incident_type,minority,dispostion_new,sustained
0,Public Initiated,2016-0001-P,2016-01-01,2016-01-01,2016-07-21,DI-1,Completed,Unfounded,,8th District,,,,,RULE 3: PROF CONDUCT,PARAGRAPH 01 - Professionalism,30664.0,,,,,Male,Black,,2016.0,Black,,,,Public Initiated,,O,0
1,Public Initiated,2016-0002-P,2016-01-02,2016-01-01,2016-08-03,DI-1,Completed,Exonerated,FOB - Field Operations Bureau,7th District,Night Watch,Patrol,Regular Working,Between 11pm-7am,RULE 4: PERF OF DUTY,PARAGRAPH 04 - NEGLECT OF DUTY,30667.0,Black,Male,60.0,,Female,White,,2016.0,White,Black,60.0,Male,Public Initiated,M,O,0
2,Public Initiated,2016-0002-P,2016-01-02,2016-01-01,2016-08-03,DI-1,Completed,Exonerated,FOB - Field Operations Bureau,7th District,Night Watch,Patrol,Regular Working,Between 11pm-7am,RULE 4: PERF OF DUTY,PARAGRAPH 04 - NEGLECT OF DUTY,30669.0,Black,Male,44.0,,Female,White,,2016.0,White,Black,44.0,Male,Public Initiated,M,O,0
3,Public Initiated,2016-0009-P,2016-01-04,2016-01-04,2017-03-20,DI-1,Completed,Unfounded,FOB - Field Operations Bureau,8th District,8th District,Patrol,Regular Working,Between 3pm-11pm,RULE 2: MORAL CONDUCT,PARAGRAPH 01 - ADHERENCE TO LAW,30671.0,White,Male,,,,,,2017.0,,White,,Male,Public Initiated,W,O,0
4,Public Initiated,2016-0006-P,2016-12-30,2016-01-04,2016-07-25,DI-1,Completed,Exonerated,FOB - Field Operations Bureau,Command Staff,Admin,,,,RULE 4: PERF OF DUTY,PARAGRAPH 02 - INSTRUCTIONS FROM AUTHORITATIVE SOURCE,30674.0,Black,Male,54.0,,Female,Black,50.0,2016.0,Black,Black,54.0,Male,Public Initiated,M,O,0


In [27]:
labels = [
    'under 25',
    '25-38',
    '39-54',
    '55-69',
    'over 70'
]
breaks = [0, 25, 39, 55, 70, 999]
df2['officer_age_bin'] = pd.cut(df2['officer_age_clean'], bins=breaks, labels=labels)
df2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['officer_age_bin'] = pd.cut(df2['officer_age_clean'], bins=breaks, labels=labels)


Unnamed: 0,Incident Type,Complaint Tracking Number,Date Complaint Occurred,Date Complaint Received by NOPD (PIB),Date Complaint Investigation Complete,Complaint classification,Investigation status,Disposition,Bureau of Complainant,Division of Complainant,Unit of Complainant,Unit Additional Details of Complainant,Working Status of Complainant,Shift of Complainant,Rule Violation,Paragraph Violation,Unique Officer Allegation ID,Officer Race Ethnicity,Officer Gender,Officer Age,Officer years of service,Complainant Gender,Complainant Ethnicity,Complainant Age,year_complete,complainant_race_clean,officer_race_clean,officer_age_clean,officer_gender_clean,incident_type,minority,dispostion_new,sustained,officer_age_bin
0,Public Initiated,2016-0001-P,2016-01-01,2016-01-01,2016-07-21,DI-1,Completed,Unfounded,,8th District,,,,,RULE 3: PROF CONDUCT,PARAGRAPH 01 - Professionalism,30664.0,,,,,Male,Black,,2016.0,Black,,,,Public Initiated,,O,0,
1,Public Initiated,2016-0002-P,2016-01-02,2016-01-01,2016-08-03,DI-1,Completed,Exonerated,FOB - Field Operations Bureau,7th District,Night Watch,Patrol,Regular Working,Between 11pm-7am,RULE 4: PERF OF DUTY,PARAGRAPH 04 - NEGLECT OF DUTY,30667.0,Black,Male,60.0,,Female,White,,2016.0,White,Black,60.0,Male,Public Initiated,M,O,0,55-69
2,Public Initiated,2016-0002-P,2016-01-02,2016-01-01,2016-08-03,DI-1,Completed,Exonerated,FOB - Field Operations Bureau,7th District,Night Watch,Patrol,Regular Working,Between 11pm-7am,RULE 4: PERF OF DUTY,PARAGRAPH 04 - NEGLECT OF DUTY,30669.0,Black,Male,44.0,,Female,White,,2016.0,White,Black,44.0,Male,Public Initiated,M,O,0,39-54
3,Public Initiated,2016-0009-P,2016-01-04,2016-01-04,2017-03-20,DI-1,Completed,Unfounded,FOB - Field Operations Bureau,8th District,8th District,Patrol,Regular Working,Between 3pm-11pm,RULE 2: MORAL CONDUCT,PARAGRAPH 01 - ADHERENCE TO LAW,30671.0,White,Male,,,,,,2017.0,,White,,Male,Public Initiated,W,O,0,
4,Public Initiated,2016-0006-P,2016-12-30,2016-01-04,2016-07-25,DI-1,Completed,Exonerated,FOB - Field Operations Bureau,Command Staff,Admin,,,,RULE 4: PERF OF DUTY,PARAGRAPH 02 - INSTRUCTIONS FROM AUTHORITATIVE SOURCE,30674.0,Black,Male,54.0,,Female,Black,50.0,2016.0,Black,Black,54.0,Male,Public Initiated,M,O,0,39-54


In [28]:
df2.officer_age_bin.value_counts()

25-38       1659
39-54       1450
55-69        227
under 25     179
over 70        8
Name: officer_age_bin, dtype: int64

In [29]:
new_df = df2.drop(columns = ['Incident Type', 'Date Complaint Received by NOPD (PIB)', 'Complaint classification',
                  'Bureau of Complainant','Division of Complainant','Unit of Complainant','Date Complaint Occurred',
                  'Unit Additional Details of Complainant','Working Status of Complainant','Shift of Complainant',
                 'Unique Officer Allegation ID','Officer Race Ethnicity','Officer Age','Officer years of service',
                 'Officer Gender','Complainant Gender','Complainant Ethnicity','Complainant Age'])

In [30]:
new_df = new_df.dropna()
new_df.shape

(2050, 16)

In [31]:
new_df.head()

Unnamed: 0,Complaint Tracking Number,Date Complaint Investigation Complete,Investigation status,Disposition,Rule Violation,Paragraph Violation,year_complete,complainant_race_clean,officer_race_clean,officer_age_clean,officer_gender_clean,incident_type,minority,dispostion_new,sustained,officer_age_bin
1,2016-0002-P,2016-08-03,Completed,Exonerated,RULE 4: PERF OF DUTY,PARAGRAPH 04 - NEGLECT OF DUTY,2016.0,White,Black,60.0,Male,Public Initiated,M,O,0,55-69
2,2016-0002-P,2016-08-03,Completed,Exonerated,RULE 4: PERF OF DUTY,PARAGRAPH 04 - NEGLECT OF DUTY,2016.0,White,Black,44.0,Male,Public Initiated,M,O,0,39-54
4,2016-0006-P,2016-07-25,Completed,Exonerated,RULE 4: PERF OF DUTY,PARAGRAPH 02 - INSTRUCTIONS FROM AUTHORITATIVE SOURCE,2016.0,Black,Black,54.0,Male,Public Initiated,M,O,0,39-54
5,2016-0007-P,2016-07-25,Completed,Unfounded,RULE 4: PERF OF DUTY,PARAGRAPH 02 - INSTRUCTIONS FROM AUTHORITATIVE SOURCE,2016.0,Black,Black,53.0,Male,Public Initiated,M,O,0,39-54
8,2016-0003-P,2016-06-09,Completed,Unfounded,RULE 4: PERF OF DUTY,PARAGRAPH 04 - NEGLECT OF DUTY,2016.0,White,Black,41.0,Male,Public Initiated,M,O,0,39-54


# Testing logistic regressions

In [33]:
model = smf.logit("""
    sustained ~ 
        C(officer_race_clean, Treatment('White'))       
""", data=df2)
results = model.fit()
results.summary()

Optimization terminated successfully.
         Current function value: 0.510232
         Iterations 5


0,1,2,3
Dep. Variable:,sustained,No. Observations:,3817.0
Model:,Logit,Df Residuals:,3812.0
Method:,MLE,Df Model:,4.0
Date:,"Wed, 07 Apr 2021",Pseudo R-squ.:,6.371e-05
Time:,12:15:38,Log-Likelihood:,-1947.6
converged:,True,LL-Null:,-1947.7
Covariance Type:,nonrobust,LLR p-value:,0.9929

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.3558,0.065,-20.940,0.000,-1.483,-1.229
"C(officer_race_clean, Treatment('White'))[T.Asian]",-0.0094,0.330,-0.029,0.977,-0.656,0.637
"C(officer_race_clean, Treatment('White'))[T.Black]",0.0294,0.084,0.349,0.727,-0.135,0.194
"C(officer_race_clean, Treatment('White'))[T.Hispanic]",-0.0435,0.192,-0.226,0.821,-0.421,0.333
"C(officer_race_clean, Treatment('White'))[T.Indigenous]",0.1031,0.804,0.128,0.898,-1.474,1.680


In [34]:
coefs = pd.DataFrame({
    'coef': results.params.values,
    'odds ratio': np.exp(results.params.values),
    'pvalue': results.pvalues,
    'name': results.params.index
})
coefs

Unnamed: 0,coef,odds ratio,pvalue,name
Intercept,-1.355835,0.257732,2.322119e-97,Intercept
"C(officer_race_clean, Treatment('White'))[T.Asian]",-0.009406,0.990638,0.9772512,"C(officer_race_clean, Treatment('White'))[T.Asian]"
"C(officer_race_clean, Treatment('White'))[T.Black]",0.029383,1.029819,0.726845,"C(officer_race_clean, Treatment('White'))[T.Black]"
"C(officer_race_clean, Treatment('White'))[T.Hispanic]",-0.043531,0.957403,0.8209657,"C(officer_race_clean, Treatment('White'))[T.Hispanic]"
"C(officer_race_clean, Treatment('White'))[T.Indigenous]",0.103072,1.108571,0.898041,"C(officer_race_clean, Treatment('White'))[T.Indigenous]"


In [35]:
df2.head()

Unnamed: 0,Incident Type,Complaint Tracking Number,Date Complaint Occurred,Date Complaint Received by NOPD (PIB),Date Complaint Investigation Complete,Complaint classification,Investigation status,Disposition,Bureau of Complainant,Division of Complainant,Unit of Complainant,Unit Additional Details of Complainant,Working Status of Complainant,Shift of Complainant,Rule Violation,Paragraph Violation,Unique Officer Allegation ID,Officer Race Ethnicity,Officer Gender,Officer Age,Officer years of service,Complainant Gender,Complainant Ethnicity,Complainant Age,year_complete,complainant_race_clean,officer_race_clean,officer_age_clean,officer_gender_clean,incident_type,minority,dispostion_new,sustained,officer_age_bin
0,Public Initiated,2016-0001-P,2016-01-01,2016-01-01,2016-07-21,DI-1,Completed,Unfounded,,8th District,,,,,RULE 3: PROF CONDUCT,PARAGRAPH 01 - Professionalism,30664.0,,,,,Male,Black,,2016.0,Black,,,,Public Initiated,,O,0,
1,Public Initiated,2016-0002-P,2016-01-02,2016-01-01,2016-08-03,DI-1,Completed,Exonerated,FOB - Field Operations Bureau,7th District,Night Watch,Patrol,Regular Working,Between 11pm-7am,RULE 4: PERF OF DUTY,PARAGRAPH 04 - NEGLECT OF DUTY,30667.0,Black,Male,60.0,,Female,White,,2016.0,White,Black,60.0,Male,Public Initiated,M,O,0,55-69
2,Public Initiated,2016-0002-P,2016-01-02,2016-01-01,2016-08-03,DI-1,Completed,Exonerated,FOB - Field Operations Bureau,7th District,Night Watch,Patrol,Regular Working,Between 11pm-7am,RULE 4: PERF OF DUTY,PARAGRAPH 04 - NEGLECT OF DUTY,30669.0,Black,Male,44.0,,Female,White,,2016.0,White,Black,44.0,Male,Public Initiated,M,O,0,39-54
3,Public Initiated,2016-0009-P,2016-01-04,2016-01-04,2017-03-20,DI-1,Completed,Unfounded,FOB - Field Operations Bureau,8th District,8th District,Patrol,Regular Working,Between 3pm-11pm,RULE 2: MORAL CONDUCT,PARAGRAPH 01 - ADHERENCE TO LAW,30671.0,White,Male,,,,,,2017.0,,White,,Male,Public Initiated,W,O,0,
4,Public Initiated,2016-0006-P,2016-12-30,2016-01-04,2016-07-25,DI-1,Completed,Exonerated,FOB - Field Operations Bureau,Command Staff,Admin,,,,RULE 4: PERF OF DUTY,PARAGRAPH 02 - INSTRUCTIONS FROM AUTHORITATIVE SOURCE,30674.0,Black,Male,54.0,,Female,Black,50.0,2016.0,Black,Black,54.0,Male,Public Initiated,M,O,0,39-54


In [36]:
model = smf.logit("""
    sustained ~ 
        C(minority, Treatment('W'))
        + C(officer_gender_clean, Treatment('Female'))
        + C(incident_type, Treatment('Public Initiated'))
""", data=df2)
results = model.fit()
results.summary()

Optimization terminated successfully.
         Current function value: 0.473037
         Iterations 6


0,1,2,3
Dep. Variable:,sustained,No. Observations:,3817.0
Model:,Logit,Df Residuals:,3813.0
Method:,MLE,Df Model:,3.0
Date:,"Wed, 07 Apr 2021",Pseudo R-squ.:,0.07296
Time:,12:15:38,Log-Likelihood:,-1805.6
converged:,True,LL-Null:,-1947.7
Covariance Type:,nonrobust,LLR p-value:,2.619e-61

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-2.1208,0.123,-17.193,0.000,-2.363,-1.879
"C(minority, Treatment('W'))[T.M]",0.0177,0.087,0.203,0.839,-0.153,0.188
"C(officer_gender_clean, Treatment('Female'))[T.Male]",0.2653,0.106,2.512,0.012,0.058,0.472
"C(incident_type, Treatment('Public Initiated'))[T.Rank Initiated]",1.3862,0.083,16.612,0.000,1.223,1.550


In [37]:
coefs = pd.DataFrame({
    'coef': results.params.values,
    'odds ratio': np.exp(results.params.values),
    'pvalue': results.pvalues,
    'name': results.params.index
})
coefs

Unnamed: 0,coef,odds ratio,pvalue,name
Intercept,-2.12077,0.119939,3.00027e-66,Intercept
"C(minority, Treatment('W'))[T.M]",0.017657,1.017814,0.8389938,"C(minority, Treatment('W'))[T.M]"
"C(officer_gender_clean, Treatment('Female'))[T.Male]",0.265258,1.303767,0.01202003,"C(officer_gender_clean, Treatment('Female'))[T.Male]"
"C(incident_type, Treatment('Public Initiated'))[T.Rank Initiated]",1.386168,3.999495,5.730806e-62,"C(incident_type, Treatment('Public Initiated'))[T.Rank Initiated]"


In [42]:
model = smf.logit("""
    sustained ~ 
        C(minority, Treatment('W'))
        + C(officer_gender_clean, Treatment('Male'))
        + C(incident_type, Treatment('Public Initiated'))
        + C(officer_age_bin, Treatment('25-38'))
""", data=df2)
results = model.fit()
results.summary()

         Current function value: 0.472642
         Iterations: 35




0,1,2,3
Dep. Variable:,sustained,No. Observations:,3498.0
Model:,Logit,Df Residuals:,3490.0
Method:,MLE,Df Model:,7.0
Date:,"Wed, 07 Apr 2021",Pseudo R-squ.:,0.07999
Time:,12:23:54,Log-Likelihood:,-1653.3
converged:,False,LL-Null:,-1797.1
Covariance Type:,nonrobust,LLR p-value:,2.824e-58

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.8492,0.092,-20.118,0.000,-2.029,-1.669
"C(minority, Treatment('W'))[T.M]",0.0128,0.092,0.139,0.890,-0.168,0.194
"C(officer_gender_clean, Treatment('Male'))[T.Female]",-0.2332,0.112,-2.085,0.037,-0.452,-0.014
"C(incident_type, Treatment('Public Initiated'))[T.Rank Initiated]",1.4482,0.087,16.622,0.000,1.277,1.619
"C(officer_age_bin, Treatment('25-38'))[T.under 25]",0.0146,0.201,0.073,0.942,-0.379,0.408
"C(officer_age_bin, Treatment('25-38'))[T.39-54]",-0.0650,0.094,-0.693,0.488,-0.249,0.119
"C(officer_age_bin, Treatment('25-38'))[T.55-69]",0.0484,0.180,0.269,0.788,-0.304,0.400
"C(officer_age_bin, Treatment('25-38'))[T.over 70]",-17.6936,8762.720,-0.002,0.998,-1.72e+04,1.72e+04


In [43]:
coefs = pd.DataFrame({
    'coef': results.params.values,
    'odds ratio': np.exp(results.params.values),
    'pvalue': results.pvalues,
    'name': results.params.index
})
coefs

Unnamed: 0,coef,odds ratio,pvalue,name
Intercept,-1.849183,0.1573657,5.177685999999999e-90,Intercept
"C(minority, Treatment('W'))[T.M]",0.012795,1.012877,0.8897253,"C(minority, Treatment('W'))[T.M]"
"C(officer_gender_clean, Treatment('Male'))[T.Female]",-0.233205,0.7919913,0.03703136,"C(officer_gender_clean, Treatment('Male'))[T.Female]"
"C(incident_type, Treatment('Public Initiated'))[T.Rank Initiated]",1.448157,4.255267,4.856171000000001e-62,"C(incident_type, Treatment('Public Initiated'))[T.Rank Initiated]"
"C(officer_age_bin, Treatment('25-38'))[T.under 25]",0.014646,1.014754,0.9418631,"C(officer_age_bin, Treatment('25-38'))[T.under 25]"
"C(officer_age_bin, Treatment('25-38'))[T.39-54]",-0.064998,0.9370691,0.4884349,"C(officer_age_bin, Treatment('25-38'))[T.39-54]"
"C(officer_age_bin, Treatment('25-38'))[T.55-69]",0.048351,1.049539,0.7877151,"C(officer_age_bin, Treatment('25-38'))[T.55-69]"
"C(officer_age_bin, Treatment('25-38'))[T.over 70]",-17.693635,2.068959e-08,0.9983889,"C(officer_age_bin, Treatment('25-38'))[T.over 70]"


# Testing the regression with new dataframe

In [None]:
model = smf.logit("""
    sustained ~ 
        C(minority, Treatment('W'))
        + C(officer_gender_clean, Treatment('Male'))
        + C(incident_type, Treatment('Public Initiated'))
        + C(officer_age_bin, Treatment('25-38'))
""", data=new_df)
results = model.fit()
results.summary()

In [None]:
coefs = pd.DataFrame({
    'coef': results.params.values,
    'odds ratio': np.exp(results.params.values),
    'pvalue': results.pvalues,
    'name': results.params.index
})
coefs