In [1]:
import pandas as pd
import requests
import json
import datetime
import pickle
import math
import numpy as np
from sklearn.cluster import KMeans
from sklearn.linear_model import LinearRegression

In [2]:
# define the current and previous year
current_year = datetime.datetime.now().year
last_year = current_year - 1

# function that calls api and returns the features for a given year
def callAndStore(year):
    url = f'https://services.arcgis.com/afSMGVsC7QlRK1kZ/arcgis/rest/services/Police_Incidents_{year}/FeatureServer/0/query?where=1%3D1&outFields=reportedDateTime,offense,description,centerLong,centerLat&outSR=4326&f=json'
    # make api request
    response = requests.get(url).json()

    features = [x for x in response['features']]

    return features

crime_list=[]

# for all features in the current and prior year.....
for crime in (callAndStore(current_year)+callAndStore(last_year)):
    clean_crime={
        'date':datetime.datetime.fromtimestamp(crime['attributes']['reportedDateTime']/1000).strftime("%m/%d/%Y"),
        'time':datetime.datetime.fromtimestamp(crime['attributes']['reportedDateTime']/1000).strftime("%H:%M:%S"),
        'centerLong': crime['attributes']['centerLong'],
        'centerLat':crime['attributes']['centerLat'],
        'description':crime['attributes']['description'].strip()
        }
    crime_list.append(clean_crime)

In [3]:
incidents = pd.DataFrame(crime_list)

Clusters=500

# Initialize and Fit KMeans Model
clusterer = KMeans(n_clusters=Clusters,random_state=42).fit(incidents[["centerLong","centerLat"]])

# Run Predictions
predictions = clusterer.predict(incidents[["centerLong","centerLat"]])

# Add column for clusters to incidents dataframe
incidents["cluster"] = predictions

# Save Model using Pickle
# pickle.dump(clusterer, open("../models/clusterer.pkl", "wb"))

In [4]:
crime_severity={
"AUTOMOBILE THEFT": 4,
"THEFT-MOTR VEH PARTS": 2.5,
"OTHER THEFT": 2.5,
"THEFT FROM MOTR VEHC": 2,
"BURGLARY OF DWELLING": 5,
"BURGLARY OF BUSINESS": 4,
"ROBBERY PER AGG": 8,
"ASSLT W/DNGRS WEAPON": 6,
"ROBBERY INCLUDING AUTO THEFT": 4,
"ROBBERY OF PERSON": 4,
"BIKE THEFT": 2,
"CSC - RAPE": 9,
"SHOPLIFTING": 4,
"2ND DEG DOMES ASLT": 4,
"THEFT BY SWINDLE": 3,
"DOMESTIC ASSAULT/STRANGULATION": 4,
"ROBBERY OF BUSINESS": 4,
"ASLT-SGNFCNT BDLY HM": 4,
"ASLT4-LESS THAN SUBST HARM": 4,
"THEFT FROM PERSON SNATCH/GRAB": 4,
"ARSON": 8,
"MURDER (GENERAL)": 11,
"CSC - SODOMY": 9,
"THEFT FROM BUILDING": 4,
"3RD DEG DOMES ASLT": 6,
"CSC - PENETRATE WITH OBJECT": 9,
"ASLT-GREAT BODILY HM": 9,
"OTHER VEHICLE THEFT": 4,
"ASLT4-SUBST HARM OR WEAPON": 6,
"OBS - PETTY THEFT": 2,
"ON-LINE THEFT": 2.5,
"FAIL TO PAY - TAXI/HOTEL/REST": 2.5,
"ARSON-3RD DEGREE": 3,
"OBS-CSCR - USE EXT 1, 2 OR 3": 8,
"POCKET-PICKING": 4,
"LOOTING": 5,
"SCRAPPING-RECYCLING THEFT": 2,
"1ST DEG DOMES ASLT": 4,
"MURDER - 2ND DEGREE": 11,
"HACKING - THEFT OF SERVICE": 3,
"ARSON-1ST DEGREE": 8,
"ACCESS/ALTER SYSTEM/NETWORK": 3,
"ARSON-5TH DEGREE": 3,
"GAS STATION DRIV-OFF": 2.5,
"DO NOT USE": 0
}

In [5]:
#Set parameters for machine learning algorithm
PriorDays=120
# today=datetime.date.today()
today=datetime.date(2021,1,1)
InitDay=today-datetime.timedelta(days=PriorDays)

#This assigns a danger value to each cluster that is not normalized
Cluster_Danger=[[0 for x in range(Clusters)] for y in range(PriorDays)]
#Only used for testing
Today_Danger=[0 for x in range(Clusters)]

for crime in crime_list:
    MDY = [int(x) for x in crime["date"].split("/")]
    date = datetime.date(MDY[2],MDY[0],MDY[1])
    if date == InitDay:
        try:
            Cluster_Danger[0][clusterer.predict([[crime["centerLong"],crime["centerLat"]]])[0]]+=crime_severity[crime["description"]]
        except KeyError:
            print("An error occured on the keys")
            print(crime["description"])
            print("")
    elif date > InitDay and date < today:
        num=int(str(date-InitDay).split(",")[0].split()[0])
        try:
            Cluster_Danger[num][clusterer.predict([[crime["centerLong"],crime["centerLat"]]])[0]]+=crime_severity[crime["description"]]
        except KeyError:
            print("An error occured on the keys")
            print(crime["description"])
            print("")
#Only used for testing
    elif date == today:
        try:
            Today_Danger[clusterer.predict([[crime["centerLong"],crime["centerLat"]]])[0]]+=crime_severity[crime["description"]]
        except KeyError:
            print("An error occured on the keys")
            print(crime["description"])
            print("")


MaxDanger=0
for day in Cluster_Danger:
    if MaxDanger<max(day):
        MaxDanger=max(day)
#This creates a normalized danger value for each cluster between 0 and 10
Normal_Cluster_Danger=[[] for y in range(PriorDays)]

for day in range (PriorDays):
    for cluster in Cluster_Danger[day]:
        Normal_Cluster_Danger[day].append(math.ceil(cluster/MaxDanger*10))

#Only used for testing
Normal_Today_Danger=[]
for cluster in Today_Danger:
    Normal_Today_Danger.append(math.ceil(cluster/MaxDanger*10))

In [6]:
del Cluster_Danger, MaxDanger, clean_crime, crime, crime_list, crime_severity, date, current_year, incidents, last_year, num, predictions, MDY, cluster, day

In [7]:
Training_Data=[]
for d,day in enumerate(Normal_Cluster_Danger):
    for c,cluster in enumerate(day):
        Training_Data.append({
            "Day": d,
            "Cluster": c,
            "Danger": cluster
        })
Training=pd.DataFrame(Training_Data)

#Only used for testing
Testing_Data=[]
for c,cluster in enumerate(Normal_Today_Danger):
    Testing_Data.append({
        "Day": PriorDays,
        "Cluster": c,
        "Danger": cluster
    })
Testing=pd.DataFrame(Testing_Data)


del Training_Data, Normal_Cluster_Danger, d, day, c, cluster

In [8]:
#Setting up X and y to train our linear model
X_train = Training[["Day", "Cluster"]].values
y_train = Training["Danger"].values.reshape(-1, 1)

#Only used for testing
X_test = Testing[["Day", "Cluster"]].values
y_test = Testing["Danger"].values.reshape(-1, 1)

#Create the model
model = LinearRegression()

#Fit the model to the training data. 
model.fit(X_train, y_train)

# Use our model to predict a value
predicted = model.predict(X_test)

In [9]:
print(predicted)

[[0.23177479]
 [0.23156817]
 [0.23136156]
 [0.23115494]
 [0.23094832]
 [0.23074171]
 [0.23053509]
 [0.23032848]
 [0.23012186]
 [0.22991524]
 [0.22970863]
 [0.22950201]
 [0.2292954 ]
 [0.22908878]
 [0.22888216]
 [0.22867555]
 [0.22846893]
 [0.22826232]
 [0.2280557 ]
 [0.22784908]
 [0.22764247]
 [0.22743585]
 [0.22722924]
 [0.22702262]
 [0.226816  ]
 [0.22660939]
 [0.22640277]
 [0.22619616]
 [0.22598954]
 [0.22578292]
 [0.22557631]
 [0.22536969]
 [0.22516308]
 [0.22495646]
 [0.22474984]
 [0.22454323]
 [0.22433661]
 [0.22413   ]
 [0.22392338]
 [0.22371676]
 [0.22351015]
 [0.22330353]
 [0.22309692]
 [0.2228903 ]
 [0.22268368]
 [0.22247707]
 [0.22227045]
 [0.22206383]
 [0.22185722]
 [0.2216506 ]
 [0.22144399]
 [0.22123737]
 [0.22103075]
 [0.22082414]
 [0.22061752]
 [0.22041091]
 [0.22020429]
 [0.21999767]
 [0.21979106]
 [0.21958444]
 [0.21937783]
 [0.21917121]
 [0.21896459]
 [0.21875798]
 [0.21855136]
 [0.21834475]
 [0.21813813]
 [0.21793151]
 [0.2177249 ]
 [0.21751828]
 [0.21731167]
 [0.21