# Neural Network

In [95]:
import warnings
warnings.filterwarnings('ignore')

In [96]:
import numpy as np
import pandas as pd
import csv
import os
import psycopg2
import boto3
from config import *
from pprint import pprint
from collections import Counter
import tensorflow as tf
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.model_selection import train_test_split

In [97]:
def download_file_using_client():
    s3 = boto3.client('s3',
                     aws_access_key_id=ACCESS_ID,
                     aws_secret_access_key=ACCESS_KEY)
    s3.download_file('cagurr-capstone', 'final_data.csv', 'Resources/final_data.csv')
    
download_file_using_client()

In [98]:
cleaned_crime = pd.read_csv('Resources/final_data.csv')
cleaned_crime.head()

Unnamed: 0,index,zip,ward,primary_type,rankings,latitude,longitude,population,population_density,national_rank,arrest,domestic,chipotle,safe_status
0,0,60626,1.0,THEFT,3.0,41.896025,-87.67021,59251,34520.21,98.0,0.0,0.0,1.0,1.0
1,1,60626,1.0,THEFT,3.0,41.920451,-87.687502,59251,34520.21,98.0,0.0,0.0,1.0,1.0
2,2,60626,1.0,CRIMINAL DAMAGE,4.0,41.908246,-87.678015,59251,34520.21,98.0,0.0,0.0,1.0,0.0
3,3,60626,1.0,THEFT,3.0,41.906276,-87.670012,59251,34520.21,98.0,1.0,0.0,1.0,1.0
4,4,60626,1.0,OTHER OFFENSE,3.0,41.904148,-87.675588,59251,34520.21,98.0,0.0,0.0,1.0,1.0


In [99]:
X_raw = cleaned_crime.drop(columns=['chipotle'],axis=1)

In [100]:
X_raw.corr()

Unnamed: 0,index,zip,ward,rankings,latitude,longitude,national_rank,arrest,domestic,safe_status
index,1.0,0.150236,0.908705,-0.023632,0.553632,-0.517904,0.260325,-0.029731,-0.04029,0.038623
zip,0.150236,1.0,0.092869,-0.015519,0.067237,0.067864,-0.354315,-0.037788,-0.015392,0.028684
ward,0.908705,0.092869,1.0,-0.036288,0.662226,-0.487775,0.358652,-0.017782,-0.075267,0.064919
rankings,-0.023632,-0.015519,-0.036288,1.0,-0.052943,-0.007955,-0.045349,0.023143,-0.040169,-0.743631
latitude,0.553632,0.067237,0.662226,-0.052943,1.0,-0.556226,0.232252,-0.022301,-0.108847,0.091793
longitude,-0.517904,0.067864,-0.487775,-0.007955,-0.556226,1.0,-0.009724,-0.02394,0.010789,0.019663
national_rank,0.260325,-0.354315,0.358652,-0.045349,0.232252,-0.009724,1.0,-0.023331,-0.072154,0.083658
arrest,-0.029731,-0.037788,-0.017782,0.023143,-0.022301,-0.02394,-0.023331,1.0,-0.032899,-0.125806
domestic,-0.04029,-0.015392,-0.075267,-0.040169,-0.108847,0.010789,-0.072154,-0.032899,1.0,-0.161053
safe_status,0.038623,0.028684,0.064919,-0.743631,0.091793,0.019663,0.083658,-0.125806,-0.161053,1.0


In [101]:
cleaned_crime = cleaned_crime.drop(columns=['index', 'primary_type', 'latitude', 'longitude', 'ward'])
cleaned_crime = cleaned_crime.dropna()
cleaned_crime['population_density'] = cleaned_crime['population_density'].str.replace(',', '')
cleaned_crime['population_density'] = cleaned_crime['population_density'].astype('float')
cleaned_crime['population'] = cleaned_crime['population'].str.replace(',', '')
cleaned_crime['population'] = cleaned_crime['population'].astype('int')
cleaned_crime

Unnamed: 0,zip,rankings,population,population_density,national_rank,arrest,domestic,chipotle,safe_status
0,60626,3.0,59251,34520.21,98.0,0.0,0.0,1.0,1.0
1,60626,3.0,59251,34520.21,98.0,0.0,0.0,1.0,1.0
2,60626,4.0,59251,34520.21,98.0,0.0,0.0,1.0,0.0
3,60626,3.0,59251,34520.21,98.0,1.0,0.0,1.0,1.0
4,60626,3.0,59251,34520.21,98.0,0.0,0.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...
110923,60659,2.0,39155,19859.02,213.0,0.0,0.0,1.0,1.0
110924,60659,7.0,39155,19859.02,213.0,0.0,1.0,1.0,0.0
110925,60659,3.0,39155,19859.02,213.0,0.0,0.0,1.0,1.0
110926,60659,7.0,39155,19859.02,213.0,0.0,0.0,1.0,0.0


In [102]:
# Split our preprocessed data into our features and target arrays
X = cleaned_crime.drop(columns=['chipotle','population','rankings'],axis=1).values
y = cleaned_crime['chipotle'].values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .3, random_state=78, shuffle=True)

In [103]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [104]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 15
hidden_nodes_layer2 = 10

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)


# Second hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu")
)

# Third hidden layer
nn.add(
    tf.keras.layers.Dropout(.2)
)

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_18 (Dense)            (None, 15)                105       
                                                                 
 dense_19 (Dense)            (None, 10)                160       
                                                                 
 dropout_6 (Dropout)         (None, 10)                0         
                                                                 
 dense_20 (Dense)            (None, 1)                 11        
                                                                 
Total params: 276
Trainable params: 276
Non-trainable params: 0
_________________________________________________________________


In [105]:
# Import checkpoint dependencies
import os
from tensorflow.keras.callbacks import ModelCheckpoint

# Define the checkpoint path and filenames
os.makedirs("checkpoints/",exist_ok=True)
checkpoint_path = "checkpoints/weights_opt_1.{epoch:02d}.hdf5"

In [106]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Create a callback that saves the model's weights every epoch
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq='epoch',
    period=5
)

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50, batch_size = 32, callbacks=[cp_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 5: saving model to checkpoints\weights_opt_1.05.hdf5
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 10: saving model to checkpoints\weights_opt_1.10.hdf5
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 15: saving model to checkpoints\weights_opt_1.15.hdf5
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 20: saving model to checkpoints\weights_opt_1.20.hdf5
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 25: saving model to checkpoints\weights_opt_1.25.hdf5
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 30: saving model to checkpoints\weights_opt_1.30.hdf5
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 35: saving model to checkpoints\weights_opt_1.35.hdf5
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 40: saving model to checkpoints\weights_opt_1.40.hdf5
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Ep

In [107]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

991/991 - 1s - loss: 0.0414 - accuracy: 0.9846 - 1s/epoch - 1ms/step
Loss: 0.04135726019740105, Accuracy: 0.9846289753913879
