# Set Up Data

In [2]:
from google.colab import files
uploaded = files.upload()

Saving ML_2007-2017_Arrestee.csv to ML_2007-2017_Arrestee.csv
Saving ML_2018_demographics_arrestee.csv to ML_2018_demographics_arrestee.csv


In [3]:
import pandas as pd
import io

df = pd.read_csv(io.StringIO(uploaded['ML_2007-2017_Arrestee.csv'].decode('utf-8')))
df.head()

Unnamed: 0,a_age,a_gender,a_race,crime,weapon
0,29.0,F,Black or African American,Negligent Manslaughter,Fire/Incendiary Device
1,29.0,F,Black or African American,Negligent Manslaughter,Fire/Incendiary Device
2,30.0,M,Black or African American,Murder and Nonnegligent Manslaughter,Other
3,49.0,M,White,Murder and Nonnegligent Manslaughter,Handgun
4,33.0,M,White,Murder and Nonnegligent Manslaughter,Firearm


# Create Pipeline

In [0]:
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelBinarizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV

numeric_features = ['a_age']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')), 
    ('scaler', StandardScaler())])
categorical_features = ['a_gender', 'a_race']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features)])

clf_weapon = Pipeline(steps=[('preprocessor', preprocessor), 
                             ('classifier', LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000))])
clf_crime = Pipeline(steps=[('preprocessor', preprocessor),
                            ('classifier', LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000))])

# Train, Test, Split

#### Define x and y's

In [0]:
y1 = df['weapon']
y2 = df['crime']
X = df.drop(['weapon'], axis=1).drop(['crime'], axis=1)

#### Train, Test, Split: Weapon Model

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y1, test_size=0.2)
print(X_train) 
print(y_train)
model_weapon = clf_weapon.fit(X_train, y_train)
print("model score: %.3f" % clf_weapon.score(X_test, y_test))

      a_age a_gender                     a_race
396    27.0        M  Black or African American
4458   22.0        M  Black or African American
7122   17.0        M  Black or African American
4652   18.0        M  Black or African American
8255   21.0        M                      White
...     ...      ...                        ...
1371   59.0        M                      White
2280   49.0        M                      White
7801   36.0        M                      White
6552   29.0        M                    Unknown
4096   20.0        M  Black or African American

[8607 rows x 3 columns]
396                              Rifle
4458                           Firearm
7122                           Handgun
4652                           Handgun
8255                           Handgun
                     ...              
1371                  Personal Weapons
2280    Drugs/Narcotics/Sleeping Pills
7801                           Firearm
6552                     Other Firearm
4096     

#### Train, Test, Split: Crime Model

In [8]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y2, test_size=0.2)
print(X_train2) 
print(y_train2)
model_crime = clf_crime.fit(X_train2, y_train2)
print("model score: %.3f" % clf_crime.score(X_test2, y_test2))

       a_age a_gender                     a_race
1871    18.0        M  Black or African American
22      22.0        M  Black or African American
5514    24.0        M  Black or African American
9223    55.0        F                      White
4298    43.0        M  Black or African American
...      ...      ...                        ...
10354   21.0        M  Black or African American
4077    22.0        M                      White
10559   18.0        M                      White
5651    22.0        M  Black or African American
1640    32.0        M  Black or African American

[8607 rows x 3 columns]
1871     Murder and Nonnegligent Manslaughter
22       Murder and Nonnegligent Manslaughter
5514     Murder and Nonnegligent Manslaughter
9223     Murder and Nonnegligent Manslaughter
4298     Murder and Nonnegligent Manslaughter
                         ...                 
10354    Murder and Nonnegligent Manslaughter
4077     Murder and Nonnegligent Manslaughter
10559              

# Predict 2018

#### Import Demographic-Only Data

In [11]:
MLdf = pd.read_csv(io.StringIO(uploaded['ML_2018_demographics_arrestee.csv'].decode('utf-8')))
MLdf.head()

Unnamed: 0,a_age,a_gender,a_race
0,31,M,Black or African American
1,31,M,Black or African American
2,37,M,White
3,31,M,White
4,34,M,White


#### Predict Which Weapon an Arrestee Might Use

In [0]:
MLdf_weapon = model_weapon.predict(MLdf)

#### Predict Which Type of Murder an Arrestee Might Commit

In [0]:
MLdf_crime = model_crime.predict(MLdf)

#### Combine Final Demographic Data and Weapon/Crime Predictions

In [14]:
MLdf["weapon"] = MLdf_weapon.tolist()
MLdf["crime"] = MLdf_crime.tolist()
MLdf.head()

Unnamed: 0,a_age,a_gender,a_race,weapon,crime
0,31,M,Black or African American,Handgun,Murder and Nonnegligent Manslaughter
1,31,M,Black or African American,Handgun,Murder and Nonnegligent Manslaughter
2,37,M,White,Handgun,Murder and Nonnegligent Manslaughter
3,31,M,White,Handgun,Murder and Nonnegligent Manslaughter
4,34,M,White,Handgun,Murder and Nonnegligent Manslaughter


In [0]:
MLdf.to_csv("ML_predictions_arrestee.csv", index=False)