# Set Up Data

In [3]:
from google.colab import files
uploaded = files.upload()

Saving ML_2007-2017_Victim.csv to ML_2007-2017_Victim.csv
Saving ML_2018_demographics_victim.csv to ML_2018_demographics_victim.csv


In [4]:
import pandas as pd
import io

df = pd.read_csv(io.StringIO(uploaded['ML_2007-2017_Victim.csv'].decode('utf-8')))
df.head()

Unnamed: 0,v_age,v_gender,v_race,crime,weapon
0,7.0,M,Black or African American,Negligent Manslaughter,Fire/Incendiary Device
1,4.0,M,Black or African American,Negligent Manslaughter,Fire/Incendiary Device
2,22.0,M,Black or African American,Murder and Nonnegligent Manslaughter,Other
3,49.0,M,White,Murder and Nonnegligent Manslaughter,Handgun
4,69.0,M,White,Murder and Nonnegligent Manslaughter,Firearm


# Create Pipeline

In [0]:
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelBinarizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV

numeric_features = ['v_age']
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')), 
    ('scaler', StandardScaler())])
categorical_features = ['v_gender', 'v_race']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(transformers=[
    ('num', numeric_transformer, numeric_features),
    ('cat', categorical_transformer, categorical_features)])

clf_weapon = Pipeline(steps=[('preprocessor', preprocessor), 
                             ('classifier', LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000))])
clf_crime = Pipeline(steps=[('preprocessor', preprocessor),
                            ('classifier', LogisticRegression(solver='lbfgs', multi_class='multinomial', max_iter=1000))])

# Train, Test, Split

#### Define x and y's

In [0]:
y1 = df['weapon']
y2 = df['crime']
X = df.drop(['weapon'], axis=1).drop(['crime'], axis=1)

#### Train, Test, Split: Weapon Model

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y1, test_size=0.2)
print(X_train) 
print(y_train)
model_weapon = clf_weapon.fit(X_train, y_train)
print("model score: %.3f" % clf_weapon.score(X_test, y_test))

      v_age v_gender                     v_race
9858   51.0        F                      White
7476   18.0        M                      White
2444   55.0        M                      White
9235    8.0        M                      White
7111   22.0        M  Black or African American
...     ...      ...                        ...
3551    3.0        M  Black or African American
4860   22.0        M  Black or African American
649    30.0        M  Black or African American
4720    1.0        F                      White
826    38.0        F                      White

[8607 rows x 3 columns]
9858                Blunt Object
7476                     Handgun
2444    Knife/Cutting Instrument
9235                     Handgun
7111                     Handgun
                  ...           
3551            Personal Weapons
4860                     Handgun
649                      Handgun
4720            Personal Weapons
826             Personal Weapons
Name: weapon, Length: 8607, dtype: o

#### Train, Test, Split: Crime Model

In [8]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y2, test_size=0.2)
print(X_train2) 
print(y_train2)
model_crime = clf_crime.fit(X_train2, y_train2)
print("model score: %.3f" % clf_crime.score(X_test2, y_test2))

       v_age v_gender                     v_race
7647    18.0        F  Black or African American
4128    18.0        M                      White
8063    15.0        M  Black or African American
4973     3.0        M                      White
10548   52.0        M                      White
...      ...      ...                        ...
6104    18.0        M  Black or African American
6271     4.0        M  Black or African American
5367    26.0        M  Black or African American
9038    26.0        M  Black or African American
9762    20.0        M  Black or African American

[8607 rows x 3 columns]
7647     Murder and Nonnegligent Manslaughter
4128     Murder and Nonnegligent Manslaughter
8063     Murder and Nonnegligent Manslaughter
4973     Murder and Nonnegligent Manslaughter
10548    Murder and Nonnegligent Manslaughter
                         ...                 
6104     Murder and Nonnegligent Manslaughter
6271     Murder and Nonnegligent Manslaughter
5367     Murder and

# Predict 2018

#### Import Demographic-Only Data

In [9]:
MLdf = pd.read_csv(io.StringIO(uploaded['ML_2018_demographics_victim.csv'].decode('utf-8')))
MLdf.head()

Unnamed: 0,v_age,v_gender,v_race
0,29.0,F,Black or African American
1,29.0,F,Black or African American
2,31.0,F,White
3,60.0,M,White
4,60.0,M,White


#### Predict Which Weapon an Arrestee Might Use

In [0]:
MLdf_weapon = model_weapon.predict(MLdf)

#### Predict Which Type of Murder an Arrestee Might Commit

In [0]:
MLdf_crime = model_crime.predict(MLdf)

#### Combine Final Demographic Data and Weapon/Crime Predictions

In [12]:
MLdf["weapon"] = MLdf_weapon.tolist()
MLdf["crime"] = MLdf_crime.tolist()
MLdf.head()

Unnamed: 0,v_age,v_gender,v_race,weapon,crime
0,29.0,F,Black or African American,Handgun,Murder and Nonnegligent Manslaughter
1,29.0,F,Black or African American,Handgun,Murder and Nonnegligent Manslaughter
2,31.0,F,White,Handgun,Murder and Nonnegligent Manslaughter
3,60.0,M,White,Handgun,Murder and Nonnegligent Manslaughter
4,60.0,M,White,Handgun,Murder and Nonnegligent Manslaughter


In [0]:
MLdf.to_csv("ML_predictions_victim.csv", index=False)