In [1]:
import pickle
from crime_clustering import CrimeCluster
import pandas as pd
import json

In [2]:
df = pd.read_csv('data/crime-clean.csv')
df = df[(df['Year'] < 2021)]

In [3]:
with open('data/geo/Neighborhoods.geojson') as Neigh:
    geodict = {'json_neigh': json.load(Neigh)}

In [4]:
with open('data/geo/ZIP.geojson') as ZIP22:
    geodict['json_zip'] = json.load(ZIP22)

In [5]:
cCluster = CrimeCluster(df, geodict['json_neigh'], geodict['json_zip'])

In [6]:
crime_types = ['ALL'] + list(df['Crime Type'].unique())

In [7]:
crime_types

['ALL',
 'BATTERY',
 'THEFT',
 'PUBLIC PEACE VIOLATION',
 'BURGLARY',
 'ROBBERY',
 'OTHER OFFENSE',
 'MOTOR VEHICLE THEFT',
 'NARCOTICS',
 'CRIMINAL DAMAGE',
 'ASSAULT',
 'WEAPONS VIOLATION',
 'CRIMINAL TRESPASS',
 'DECEPTIVE PRACTICE',
 'STALKING',
 'OFFENSE INVOLVING CHILDREN',
 'LIQUOR LAW VIOLATION',
 'INTERFERENCE WITH PUBLIC OFFICER',
 'SEX OFFENSE',
 'CRIM SEXUAL ASSAULT',
 'PROSTITUTION',
 'ARSON',
 'GAMBLING',
 'KIDNAPPING',
 'INTIMIDATION',
 'OTHER NARCOTIC VIOLATION',
 'OBSCENITY',
 'PUBLIC INDECENCY',
 'CRIMINAL SEXUAL ASSAULT',
 'NON-CRIMINAL',
 'CONCEALED CARRY LICENSE VIOLATION',
 'HOMICIDE',
 'NON-CRIMINAL (SUBJECT SPECIFIED)',
 'NON - CRIMINAL',
 'HUMAN TRAFFICKING',
 'RITUALISM']

In [8]:
cluster_data = {
    'Crime Type': [],
    'num centers': [],
    'centers': [],
    'covariances': [],
    'Successful': []
}

In [9]:
for crime in crime_types:
    print(crime)
    try:
        gmm_data = cCluster.GMM_Chicago(crime)
        cluster_data['Crime Type'].append(crime)
        cluster_data['num centers'].append(gmm_data[0])
        cluster_data['centers'].append(gmm_data[1])
        cluster_data['covariances'].append(gmm_data[2])
        cluster_data['Successful'].append(True)
    except ValueError:
        cluster_data['Crime Type'].append(crime)
        cluster_data['num centers'].append(None)
        cluster_data['centers'].append(None)
        cluster_data['covariances'].append(None)
        cluster_data['Successful'].append(False)

ALL
BATTERY
THEFT
PUBLIC PEACE VIOLATION
BURGLARY
ROBBERY
OTHER OFFENSE
MOTOR VEHICLE THEFT
NARCOTICS
CRIMINAL DAMAGE
ASSAULT
WEAPONS VIOLATION
CRIMINAL TRESPASS
DECEPTIVE PRACTICE
STALKING
OFFENSE INVOLVING CHILDREN
LIQUOR LAW VIOLATION
INTERFERENCE WITH PUBLIC OFFICER
SEX OFFENSE
CRIM SEXUAL ASSAULT
PROSTITUTION
ARSON
GAMBLING
KIDNAPPING
INTIMIDATION
OTHER NARCOTIC VIOLATION
OBSCENITY
PUBLIC INDECENCY
CRIMINAL SEXUAL ASSAULT
NON-CRIMINAL
CONCEALED CARRY LICENSE VIOLATION
HOMICIDE
NON-CRIMINAL (SUBJECT SPECIFIED)
NON - CRIMINAL
HUMAN TRAFFICKING
RITUALISM


In [10]:
with open('data/clusters/chicago/crime_types_clusters.pickle', 'wb') as handle:
    pickle.dump(cluster_data, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [11]:
with open('data/clusters/chicago/crime_types_clusters.pickle', 'rb') as handle:
    data = pickle.load(handle)

In [12]:
df = pd.DataFrame.from_dict(data)

In [13]:
df.head()

Unnamed: 0,Crime Type,num centers,centers,covariances,Successful
0,ALL,3.0,"[[41.91711227863076, -87.68435037388016], [41....","[[[0.002331452306958472, -0.000919354105798237...",True
1,BATTERY,3.0,"[[41.74370059308402, -87.61236831426683], [41....","[[[0.0014780444723145117, -6.310274189314449e-...",True
2,THEFT,3.0,"[[41.91090539478702, -87.70717608929465], [41....","[[[0.001825855961936727, -0.000816415207915068...",True
3,PUBLIC PEACE VIOLATION,3.0,"[[41.83833037485503, -87.68785686693771], [41....","[[[0.002824335771503128, -0.000375614913996201...",True
4,BURGLARY,3.0,"[[41.741514279389456, -87.61156340980443], [41...","[[[0.0014202456001993914, 0.000116370230504126...",True
