In [429]:
import warnings
warnings.filterwarnings('ignore')

In [430]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
import tensorflow as tf
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.model_selection import train_test_split

In [431]:
cleaned_crime = pd.read_csv('Resources/crime_chip_for_model.csv')
cleaned_crime.head()

Unnamed: 0.1,Unnamed: 0,ZIP,Ward,Primary_Type,rankings,Latitude,Longitude,Arrest,Domestic,Population,...,National_Rank,state,location,address,chip_latitude,chip_longitude,chipotle,Safety,Arrest_1,Domestic_1
0,0,60601,27,THEFT,3,,,0,0,5591,...,#271,Illinois,Chicago,"316 N Michigan Ave Chicago, IL 60601 US",41.887288,-87.624848,1,1,False,False
1,1,60601,27,CRIMINAL DAMAGE,4,41.883932,-87.679964,0,0,5591,...,#271,Illinois,Chicago,"316 N Michigan Ave Chicago, IL 60601 US",41.887288,-87.624848,1,0,False,False
2,2,60601,27,THEFT,3,41.896569,-87.636063,0,0,5591,...,#271,Illinois,Chicago,"316 N Michigan Ave Chicago, IL 60601 US",41.887288,-87.624848,1,1,False,False
3,3,60601,27,SEX OFFENSE,3,41.883937,-87.683368,1,0,5591,...,#271,Illinois,Chicago,"316 N Michigan Ave Chicago, IL 60601 US",41.887288,-87.624848,1,1,True,False
4,4,60601,27,NARCOTICS,4,41.892856,-87.710137,1,0,5591,...,#271,Illinois,Chicago,"316 N Michigan Ave Chicago, IL 60601 US",41.887288,-87.624848,1,0,True,False


In [432]:
cleaned_crime = cleaned_crime.drop(columns=['Unnamed: 0', 'state', 'location', 'address' ,'chip_latitude', 'chip_longitude', 'Arrest_1', 'Domestic_1'])
cleaned_crime = cleaned_crime.dropna()
cleaned_crime['National_Rank'] = cleaned_crime['National_Rank'].str.replace('#', '')
cleaned_crime['National_Rank'] = cleaned_crime['National_Rank'].str.replace(',', '')
cleaned_crime['National_Rank'] = cleaned_crime['National_Rank'].astype('int')
cleaned_crime['People/Sq.Mile'] = cleaned_crime['People/Sq.Mile'].str.replace(',', '')
cleaned_crime['People/Sq.Mile'] = cleaned_crime['People/Sq.Mile'].astype('float')
cleaned_crime['Population'] = cleaned_crime['Population'].str.replace(',', '')
cleaned_crime['Population'] = cleaned_crime['Population'].astype('int')
cleaned_crime

Unnamed: 0,ZIP,Ward,Primary_Type,rankings,Latitude,Longitude,Arrest,Domestic,Population,People/Sq.Mile,National_Rank,chipotle,Safety
1,60601,27,CRIMINAL DAMAGE,4,41.883932,-87.679964,0,0,5591,17101.15,271,1,0
2,60601,27,THEFT,3,41.896569,-87.636063,0,0,5591,17101.15,271,1,1
3,60601,27,SEX OFFENSE,3,41.883937,-87.683368,1,0,5591,17101.15,271,1,1
4,60601,27,NARCOTICS,4,41.892856,-87.710137,1,0,5591,17101.15,271,1,0
5,60601,27,OFFENSE INVOLVING CHILDREN,7,41.901683,-87.718962,0,1,5591,17101.15,271,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
121078,60661,30,ASSAULT,7,41.931096,-87.778958,0,0,4382,15455.28,318,1,0
121081,60661,30,OTHER OFFENSE,3,41.941430,-87.737110,0,1,4382,15455.28,318,1,1
121083,60661,30,OFFENSE INVOLVING CHILDREN,3,41.940857,-87.728919,0,0,4382,15455.28,318,1,1
121084,60661,30,BATTERY,4,41.940049,-87.719873,0,1,4382,15455.28,318,1,0


In [433]:
primary_type = cleaned_crime.Primary_Type.value_counts()

In [434]:
# Determine which values to replace if counts are less than ...?
replace_primary = list(primary_type[primary_type < 1000].index)

# Replace in dataframe
for primary in replace_primary:
    cleaned_crime.Primary_Type = cleaned_crime.Primary_Type.replace(primary,"OTHER OFFENSE")
    
# Check to make sure binning was successful
cleaned_crime.Primary_Type.value_counts()

BATTERY                       25230
THEFT                         23517
CRIMINAL DAMAGE               14638
OTHER OFFENSE                 11487
ASSAULT                       10578
DECEPTIVE PRACTICE             7339
BURGLARY                       5386
MOTOR VEHICLE THEFT            5107
NARCOTICS                      4712
WEAPONS VIOLATION              4314
ROBBERY                        4164
CRIMINAL TRESPASS              2646
OFFENSE INVOLVING CHILDREN     1149
Name: Primary_Type, dtype: int64

In [435]:
# Generate our categorical variable lists
crime_obj = list(cleaned_crime.dtypes[cleaned_crime.dtypes == 'object'].index)

In [436]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(cleaned_crime[crime_obj]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(crime_obj)
encode_df.head()

Unnamed: 0,Primary_Type_ASSAULT,Primary_Type_BATTERY,Primary_Type_BURGLARY,Primary_Type_CRIMINAL DAMAGE,Primary_Type_CRIMINAL TRESPASS,Primary_Type_DECEPTIVE PRACTICE,Primary_Type_MOTOR VEHICLE THEFT,Primary_Type_NARCOTICS,Primary_Type_OFFENSE INVOLVING CHILDREN,Primary_Type_OTHER OFFENSE,Primary_Type_ROBBERY,Primary_Type_THEFT,Primary_Type_WEAPONS VIOLATION
0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [437]:
# Merge one-hot encoded features and drop the originals
# cleaned_crime = cleaned_crime.merge(encode_df, left_index=True, right_index=True)
cleaned_crime = cleaned_crime.drop(columns=crime_obj,axis=1)
cleaned_crime.head()

Unnamed: 0,ZIP,Ward,rankings,Latitude,Longitude,Arrest,Domestic,Population,People/Sq.Mile,National_Rank,chipotle,Safety
1,60601,27,4,41.883932,-87.679964,0,0,5591,17101.15,271,1,0
2,60601,27,3,41.896569,-87.636063,0,0,5591,17101.15,271,1,1
3,60601,27,3,41.883937,-87.683368,1,0,5591,17101.15,271,1,1
4,60601,27,4,41.892856,-87.710137,1,0,5591,17101.15,271,1,0
5,60601,27,7,41.901683,-87.718962,0,1,5591,17101.15,271,1,0


In [438]:
# Split our preprocessed data into our features and target arrays
X = cleaned_crime.drop(columns=['chipotle','Ward','Population','rankings','Latitude','Longitude'],axis=1).values
X_raw = cleaned_crime.drop(columns=['chipotle'],axis=1)
y = cleaned_crime['chipotle'].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .3, random_state=78, shuffle=True)

# Neural Network

In [439]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [440]:
print(X_raw.columns)

Index(['ZIP', 'Ward', 'rankings', 'Latitude', 'Longitude', 'Arrest',
       'Domestic', 'Population', 'People/Sq.Mile', 'National_Rank', 'Safety'],
      dtype='object')


In [441]:
X_raw.corr()

Unnamed: 0,ZIP,Ward,rankings,Latitude,Longitude,Arrest,Domestic,Population,People/Sq.Mile,National_Rank,Safety
ZIP,1.0,-0.200817,0.009385,-0.186629,0.234126,-0.037836,0.035799,0.256096,0.539912,-0.34416,-0.018259
Ward,-0.200817,1.0,-0.030639,0.673083,-0.543381,-0.016582,-0.075179,-0.121844,-0.434486,0.334451,0.057294
rankings,0.009385,-0.030639,1.0,-0.049223,-0.006258,0.023873,-0.041675,0.047103,0.024505,-0.042541,-0.741688
Latitude,-0.186629,0.673083,-0.049223,1.0,-0.612932,-0.022643,-0.106606,-0.117427,-0.203547,0.209745,0.08608
Longitude,0.234126,-0.543381,-0.006258,-0.612932,1.0,-0.032044,0.017651,0.009771,0.30432,-0.021037,0.016633
Arrest,-0.037836,-0.016582,0.023873,-0.022643,-0.032044,1.0,-0.035036,0.037339,0.003531,-0.020695,-0.129618
Domestic,0.035799,-0.075179,-0.041675,-0.106606,0.017651,-0.035036,1.0,0.032034,0.033582,-0.066224,-0.158891
Population,0.256096,-0.121844,0.047103,-0.117427,0.009771,0.037339,0.032034,1.0,0.554773,-0.54514,-0.069454
People/Sq.Mile,0.539912,-0.434486,0.024505,-0.203547,0.30432,0.003531,0.033582,0.554773,1.0,-0.63502,-0.038486
National_Rank,-0.34416,0.334451,-0.042541,0.209745,-0.021037,-0.020695,-0.066224,-0.54514,-0.63502,1.0,0.077296


In [442]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 15
hidden_nodes_layer2 = 10

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu")
)

# Third hidden layer
nn.add(
    tf.keras.layers.Dropout(.2)
)

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_60 (Dense)            (None, 15)                105       
                                                                 
 dense_61 (Dense)            (None, 10)                160       
                                                                 
 dropout_18 (Dropout)        (None, 10)                0         
                                                                 
 dense_62 (Dense)            (None, 1)                 11        
                                                                 
Total params: 276
Trainable params: 276
Non-trainable params: 0
_________________________________________________________________


In [443]:
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights_opt_1.{epoch:02d}.hdf5"

In [444]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq='epoch',
    period=5
)

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50, batch_size = 32, callbacks=[cp_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 5: saving model to checkpoints\weights_opt_1.05.hdf5
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 10: saving model to checkpoints\weights_opt_1.10.hdf5
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 15: saving model to checkpoints\weights_opt_1.15.hdf5
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 20: saving model to checkpoints\weights_opt_1.20.hdf5
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 25: saving model to checkpoints\weights_opt_1.25.hdf5
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 30: saving model to checkpoints\weights_opt_1.30.hdf5
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 35: saving model to checkpoints\weights_opt_1.35.hdf5
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 40: saving model to checkpoints\weights_opt_1.40.hdf5
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Ep

In [445]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1128/1128 - 2s - loss: 0.0023 - accuracy: 0.9996 - 2s/epoch - 2ms/step
Loss: 0.0023187659680843353, Accuracy: 0.9996119737625122
