# Data Cleaning

### Imports and Initial Checks

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import gmaps
from sodapy import Socrata
import requests
import json
import dateutil
import pytz
import datetime

%matplotlib inline

In [2]:
df = pd.read_csv("Crimes_-_2001_to_present.csv", index_col = "ID")
df.head()

  mask |= (ar1 == a)


Unnamed: 0_level_0,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10000092,HY189866,03/18/2015 07:44:00 PM,047XX W OHIO ST,041A,BATTERY,AGGRAVATED: HANDGUN,STREET,False,False,1111,...,28.0,25.0,04B,1144606.0,1903566.0,2015,02/10/2018 03:50:01 PM,41.891399,-87.744385,"(41.891398861, -87.744384567)"
10000094,HY190059,03/18/2015 11:00:00 PM,066XX S MARSHFIELD AVE,4625,OTHER OFFENSE,PAROLE VIOLATION,STREET,True,False,725,...,15.0,67.0,26,1166468.0,1860715.0,2015,02/10/2018 03:50:01 PM,41.773372,-87.665319,"(41.773371528, -87.665319468)"
10000095,HY190052,03/18/2015 10:45:00 PM,044XX S LAKE PARK AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,222,...,4.0,39.0,08B,1185075.0,1875622.0,2015,02/10/2018 03:50:01 PM,41.813861,-87.596643,"(41.81386068, -87.596642837)"
10000096,HY190054,03/18/2015 10:30:00 PM,051XX S MICHIGAN AVE,0460,BATTERY,SIMPLE,APARTMENT,False,False,225,...,3.0,40.0,08B,1178033.0,1870804.0,2015,02/10/2018 03:50:01 PM,41.800802,-87.622619,"(41.800802415, -87.622619343)"
10000097,HY189976,03/18/2015 09:00:00 PM,047XX W ADAMS ST,031A,ROBBERY,ARMED: HANDGUN,SIDEWALK,False,False,1113,...,28.0,25.0,03,1144920.0,1898709.0,2015,02/10/2018 03:50:01 PM,41.878065,-87.743354,"(41.878064761, -87.743354013)"


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6583286 entries, 10000092 to 9780607
Data columns (total 21 columns):
Case Number             object
Date                    object
Block                   object
IUCR                    object
Primary Type            object
Description             object
Location Description    object
Arrest                  bool
Domestic                bool
Beat                    int64
District                float64
Ward                    float64
Community Area          float64
FBI Code                object
X Coordinate            float64
Y Coordinate            float64
Year                    int64
Updated On              object
Latitude                float64
Longitude               float64
Location                object
dtypes: bool(2), float64(7), int64(2), object(10)
memory usage: 1017.1+ MB


In [4]:
df.isnull().sum()

Case Number                  4
Date                         0
Block                        0
IUCR                         0
Primary Type                 0
Description                  0
Location Description      3398
Arrest                       0
Domestic                     0
Beat                         0
District                    51
Ward                    614854
Community Area          616029
FBI Code                     0
X Coordinate             67651
Y Coordinate             67651
Year                         0
Updated On                   0
Latitude                 67651
Longitude                67651
Location                 67651
dtype: int64

### Typos and Redundancies

We first look at just the primary type and description fields, using value counts to detect possible typos. 

#### Primary Type

In [5]:
df["Primary Type"].value_counts()

THEFT                                1377760
BATTERY                              1201381
CRIMINAL DAMAGE                       754293
NARCOTICS                             703168
OTHER OFFENSE                         408322
ASSAULT                               405870
BURGLARY                              380748
MOTOR VEHICLE THEFT                   308451
DECEPTIVE PRACTICE                    250560
ROBBERY                               249926
CRIMINAL TRESPASS                     189367
PROSTITUTION                           67828
WEAPONS VIOLATION                      67228
PUBLIC PEACE VIOLATION                 46948
OFFENSE INVOLVING CHILDREN             43884
CRIM SEXUAL ASSAULT                    25980
SEX OFFENSE                            24288
INTERFERENCE WITH PUBLIC OFFICER       14337
GAMBLING                               14245
LIQUOR LAW VIOLATION                   13875
ARSON                                  10917
HOMICIDE                                9041
KIDNAPPING

The single "DOMESTIC VIOLENCE" case seems like an error, and "NON-CRIMINAL" has a low incidence number and is redundant to "NON-CRIMINAL". 

In [6]:
df.loc[df["Primary Type"] == "NON - CRIMINAL", "Primary Type"] = "NON-CRIMINAL"

In [7]:
df[df["Primary Type"] == "DOMESTIC VIOLENCE"]

Unnamed: 0_level_0,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1326195,G021609,01/11/2001 02:30:41 AM,087XX S ESCANABA AV,9901,DOMESTIC VIOLENCE,DOMESTIC VIOLENCE,APARTMENT,True,True,423,...,,,08B,1196869.0,1847416.0,2001,08/17/2015 03:03:40 PM,41.736176,-87.55432,"(41.73617608, -87.554319607)"


In [8]:
df[df["FBI Code"] == "08B"]

Unnamed: 0_level_0,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10000095,HY190052,03/18/2015 10:45:00 PM,044XX S LAKE PARK AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,222,...,4.0,39.0,08B,1185075.0,1875622.0,2015,02/10/2018 03:50:01 PM,41.813861,-87.596643,"(41.81386068, -87.596642837)"
10000096,HY190054,03/18/2015 10:30:00 PM,051XX S MICHIGAN AVE,0460,BATTERY,SIMPLE,APARTMENT,False,False,225,...,3.0,40.0,08B,1178033.0,1870804.0,2015,02/10/2018 03:50:01 PM,41.800802,-87.622619,"(41.800802415, -87.622619343)"
10000098,HY190032,03/18/2015 10:00:00 PM,049XX S DREXEL BLVD,0460,BATTERY,SIMPLE,APARTMENT,False,False,223,...,4.0,39.0,08B,1183018.0,1872537.0,2015,02/10/2018 03:50:01 PM,41.805443,-87.604284,"(41.805443345, -87.604283976)"
10000099,HY190047,03/18/2015 11:00:00 PM,070XX S MORGAN ST,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,733,...,17.0,68.0,08B,1170859.0,1858210.0,2015,02/10/2018 03:50:01 PM,41.766403,-87.649296,"(41.766402779, -87.649296123)"
10000100,HY189988,03/18/2015 09:35:00 PM,042XX S PRAIRIE AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,213,...,3.0,38.0,08B,1178746.0,1876914.0,2015,02/10/2018 03:50:01 PM,41.817553,-87.619819,"(41.817552577, -87.619818523)"
10000104,HY189964,03/18/2015 09:25:00 PM,097XX S PRAIRIE AVE,0460,BATTERY,SIMPLE,RESIDENCE PORCH/HALLWAY,False,False,511,...,6.0,49.0,08B,1179637.0,1840444.0,2015,02/10/2018 03:50:01 PM,41.717455,-87.617663,"(41.71745472, -87.617663257)"
10000110,HY190056,03/18/2015 10:50:00 PM,014XX S ASHLAND AVE,0460,BATTERY,SIMPLE,SIDEWALK,False,False,1233,...,2.0,28.0,08B,1165950.0,1893388.0,2015,02/10/2018 03:50:01 PM,41.863041,-87.666289,"(41.86304084, -87.666288555)"
10000124,HY190060,03/18/2015 11:30:00 PM,024XX S BELL AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,True,True,1034,...,25.0,31.0,08B,1161687.0,1887883.0,2015,02/10/2018 03:50:01 PM,41.848024,-87.682091,"(41.848024395, -87.682090877)"
10000126,HY190035,03/18/2015 10:45:00 PM,003XX E 59TH ST,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,True,True,232,...,20.0,40.0,08B,1179208.0,1865959.0,2015,02/10/2018 03:50:01 PM,41.787481,-87.618458,"(41.787480544, -87.618458018)"
10000129,HY190096,03/19/2015 01:20:00 AM,078XX S EMERALD AVE,0460,BATTERY,SIMPLE,APARTMENT,False,False,621,...,17.0,71.0,08B,1172661.0,1852870.0,2015,02/10/2018 03:50:01 PM,41.751710,-87.642848,"(41.751709641, -87.642848244)"


So going off the FBI Code, the actual crime for the entry is most likely simple domestic battery. 

In [9]:
df.loc[df["Primary Type"] == "DOMESTIC VIOLENCE", ["Primary Type", "Description"]] = ("BATTERY", "SIMPLE DOMESTIC BATTERY")

#### Description

In [10]:
df.groupby("Primary Type")["Description"].value_counts()

Primary Type       Description                                    
ARSON              BY FIRE                                              7095
                   AGGRAVATED                                           1819
                   ATTEMPT ARSON                                        1789
                   BY EXPLOSIVE                                           95
                   POS: EXPLOSIVE/INCENDIARY DEV                          79
                   POS: CHEMICAL/DRY-ICE DEVICE                           40
ASSAULT            SIMPLE                                             282173
                   AGGRAVATED: HANDGUN                                 44157
                   AGGRAVATED:KNIFE/CUTTING INSTR                      28696
                   AGGRAVATED: OTHER DANG WEAPON                       24290
                   PRO EMP HANDS NO/MIN INJURY                         13894
                   AGG PO HANDS NO/MIN INJURY                           6135
         

There are a few backwards phrases, but otherwise, nothing immediately stands out and most of the low frequency counts are just clearly uncommon crimes. A strategy for automatically detecting probable typos in the future is to group attributes by frequency, and compare the Levenshtein distance between high frequency and low frequency groups; attributes below a certain distance threshold, or attributes that "fuzzily match" and are not common are more likely to be typos.  

In [11]:
df.loc[df["Description"] == "THEFT RETAIL", "Description"] = "RETAIL THEFT"
df.loc[df["Description"] == "SIMPLE DOMESTIC BATTERY", "Description"] = "DOMESTIC BATTERY SIMPLE"

### Missing Values

The general approach here is to first impute matching values, then geocode still missing location coordinates, and finally use those coordinates and Chicago shapefiles to fill in the rest of missing fields.  

We assume that fields that have the same block field, also have the same location fields (and by extension, the same community areas, wards, etc.). This is justified by the dataset description:
>In order to protect the privacy of crime victims, addresses are shown at the block level only and specific locations are not identified. The intent is to not make crimes exactly traceable.

In other words, that blocks are the lowest level of geogrphical specificity given. Since the block field is not an actual address, there's an issue that the block may cross a given community area or ward boundary, and thus make those fields ambigiuous; in practice, however, those values are infrequent enough to not affect distributions (as shown later in the validation section, where we compare matching imputed values with already present values). 

#### Internal Imputation

In [12]:
# get counts of missing values again
df.isnull().sum()

Case Number                  4
Date                         0
Block                        0
IUCR                         0
Primary Type                 0
Description                  0
Location Description      3398
Arrest                       0
Domestic                     0
Beat                         0
District                    51
Ward                    614854
Community Area          616029
FBI Code                     0
X Coordinate             67651
Y Coordinate             67651
Year                         0
Updated On                   0
Latitude                 67651
Longitude                67651
Location                 67651
dtype: int64

Then let's see how many missing values belong to unique blocks, ignoring the ward field for now as ward boundaries switched and thus require an extra conditional on the year field. 

In [13]:
# list of missing field values (ignoring location and X-Y coorindates, since they're redundant
missing_fields = ["Community Area", "Ward", "Latitude", "Longitude"]

for field in missing_fields:
    print("{} has {} unique missing values.".format(field, df.loc[df[field].isnull(), "Block"].unique().shape[0]))

Community Area has 31321 unique missing values.
Ward has 31222 unique missing values.
Latitude has 22409 unique missing values.
Longitude has 22409 unique missing values.


So we just have to impute these missing values by matching on block.

In [14]:
for field in missing_fields:
    # rows that have this missing field filled in as a reference
    known_field = df[df[field].notnull()]
    
    # dictionary mapping known block to this field
    block_to_field = pd.Series(known_field[field].values, index = known_field.Block).to_dict()
    
    # convert known blocks to set for faster lookup
    known_blocks = set(known_field.Block)
    
    # use known block to field relation to impute
    df.loc[df[field].isnull() & df["Block"].isin(known_blocks), field] = \
        df.loc[df[field].isnull() & df["Block"].isin(known_blocks), "Block"].map(block_to_field)

In [15]:
df.isnull().sum()

Case Number                  4
Date                         0
Block                        0
IUCR                         0
Primary Type                 0
Description                  0
Location Description      3398
Arrest                       0
Domestic                     0
Beat                         0
District                    51
Ward                    438000
Community Area          438399
FBI Code                     0
X Coordinate             67651
Y Coordinate             67651
Year                         0
Updated On                   0
Latitude                  2472
Longitude                 2472
Location                 67651
dtype: int64

In [22]:
# list of missing field values (ignoring location and X-Y coorindates, since they're redundant
missing_fields = ["Community Area", "Ward", "Latitude", "Longitude"]

for field in missing_fields:
    print("{} has {} unique missing values.".format(field, df.loc[df[field].isnull(), "Block"].unique().shape[0]))

Community Area has 24233 unique missing values.
Ward has 24185 unique missing values.
Latitude has 1189 unique missing values.
Longitude has 1189 unique missing values.


So we're now well within the limit of most free geocoders. 

#### Fuzzy Match Blocks

We fuzzy match block names with the official Chicago street name open dataset, looking only at values that are missing their longitude and latitude so as to improve the accuracy/consistency of future geocoding. 

In [23]:
from sodapy import Socrata

# load soda authentication token
with open("app_token") as f:
    app_token = f.read().rstrip("\n")
    
# open soda client object for Chicago city data
client = Socrata("data.cityofchicago.org", app_token)

# get street names
street_names = client.get("pasq-g8mx", limit = 5000000)

In [24]:
from fuzzywuzzy import process
from fuzzywuzzy import fuzz

In [39]:
street_names = pd.DataFrame(street_names)

# drop block number, get only unique street names
missing_streets = pd.Series(df[df.Latitude.isnull()].Block.apply(lambda x: " ".join(x.split()[1:]).upper()).unique())

In [51]:
# look only at missing streets that are not already a perfect match with an official Chicago street
imperfect_streets = missing_streets[~missing_streets.isin(street_names["full_street_name"])]

street_matches = [process.extractOne(street, street_names["full_street_name"].values) for street in imperfect_streets]

# use another scorer that works better with highways
street_matches_set = [process.extractOne(street, street_names["full_street_name"].values, scorer = fuzz.token_set_ratio) 
                       for street in imperfect_streets]

In [94]:
matches = [(x[0][0], x[1][0]) for x in zip(street_matches, street_matches_set)]

In [100]:
# store matches in dataframe for ease of use
imperfect_matches = pd.DataFrame(dict(zip(imperfect_streets, matches))).transpose()
imperfect_matches = imperfect_matches.rename(columns = {0: "Candidate", 1: "Alt Candidate"})
imperfect_matches.index = imperfect_matches.index.rename("Street")

We now export these matches to a csv to be manually checked in Excel. The candidate column is edited to be correct; the edited version can then be imported to construct a dictionary mapping streets to their proper official names. 

In [102]:
imperfect_matches.to_csv("fuzzy_matches.csv")

In [107]:
# reimport externally edited csv
matched = pd.read_csv("fuzzy_matches_edited.csv", index_col = "Street")

# create mapping dictionary
matched_streets = matched.Candidate.to_dict()

In [158]:
def apply_matching(row):
    ''' helper function for mapping substring '''
    street = " ".join(row.Block.split()[1:]).upper()
    
    if street in matched_streets:
        return row.Block.replace(street, matched_streets[street])
    else:
        return row.Block


In [159]:
df.loc[df["Latitude"].isnull(), "Block"] = df[df["Latitude"].isnull()].apply(apply_matching, axis = 1)

In [160]:
# for formatting consistency, make everything upper case
df.Block = df.Block.str.upper()

#### Geocoding

In [161]:
# unique blocks we'll need to geocode
blocks_to_geocode = df[df["Latitude"].isnull()].Block.unique()

We use both Google Maps and ArcGIS to geocode, for some level of validation. 

In [181]:
# load Google maps api key from file
f = open("api_key")
gmaps_key = f.read().split("\n")[0]
f.close()

# load arcgis password from file
f = open("arc_pass")
arc_pass = f.read().split("\n")[0]
f.close()

In [186]:
import googlemaps

gmaps = googlemaps.Client(key = gmaps_key)

from arcgis.geocoding import get_geocoders, geocode

gis = GIS("http://www.arcgis.com", "neurite", arc_pass)

# use the first of GIS's configured geocoders
geocoder = get_geocoders(gis)[0]

In [188]:
arc_geocoded = [geocode(block + "Chicago, IL") for block in blocks_to_geocode]

KeyboardInterrupt: 

In [192]:
gmaps_geocoded = [gmaps.geocode(block + "Chicago, IL") for block in blocks_to_geocode]

KeyboardInterrupt: 

The vast majority of failed geocodes are from highways. At the very least, the fuzzy matching above standardized these entries so that future work can be done to either manually geocode them, or to locally store location values for all highways/exits to use for lookups. 

A possible strategy here would be to use either OpenStreetMaps, which has this information but is incomplete, or to construct an approximation algorithm using Google Maps nearest road API: i.e., to segment a highway into individual boundaries (the size of which should be experimentally determined), then use the nearest road function restricted to that boundary to geocode a given block number, and then construct a shortest distance line from that road to the highway to approximate a location.  

#### Date Formatting

For ease of comparison, we format the dates as pd datetime objects. 

In [189]:
datetime_format = "%m/%d/%Y %I:%M:%S %p"
df["Date"] = pd.to_datetime(df["Date"], format = datetime_format)

#### Missing Community Areas

With filled in location values, we can now use the city of Chicago's shapefiles to fill in the community area values. 

In [198]:
import geopip

# using geopip, we can create an object that cleanly handles simple point in polygon testing
community_bounds = geopip.GeoPIP("Boundaries - Community Areas (current).geojson")

df.loc[df["Community Area"].isnull() & df["Latitude"].notnull(), "Community Area"] = \
    df[df["Community Area"].isnull() & df["Latitude"].notnull()].apply(lambda x: community_bounds.search(x.Longitude, x.Latitude), axis = 1)

#### Missing Ward Areas

In [205]:
current_ward_bounds = geopip.GeoPIP("Boundaries - Wards (2015-).geojson")
old_ward_bounds = geopip.GeoPIP("Boundaries - Wards (2003-2015).geojson")

df.loc[df.Year >= 2015 & df.Latitude.notnull(), "Ward"] = \
    df.loc[df.Year >= 2015 & df.Latitude.notnull()].apply(lambda x: current_ward_bounds.search(x.Longitude, x.Latitude), axis = 1)
    
df.loc[df.Year < 2015 & df.Latitude.notnull(), "Ward"] = \
    df.loc[df.Year < 2015 & df.Latitude.notnull()].apply(lambda x: old_ward_bounds.search(x.Longitude, x.Latitude), axis = 1)

Unnamed: 0_level_0,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,Beat,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10000092,HY189866,2015-03-18 19:44:00,047XX W OHIO ST,041A,BATTERY,AGGRAVATED: HANDGUN,STREET,False,False,1111,...,28.0,25.0,04B,1144606.0,1903566.0,2015,02/10/2018 03:50:01 PM,41.891399,-87.744385,"(41.891398861, -87.744384567)"
10000094,HY190059,2015-03-18 23:00:00,066XX S MARSHFIELD AVE,4625,OTHER OFFENSE,PAROLE VIOLATION,STREET,True,False,725,...,15.0,67.0,26,1166468.0,1860715.0,2015,02/10/2018 03:50:01 PM,41.773372,-87.665319,"(41.773371528, -87.665319468)"
10000095,HY190052,2015-03-18 22:45:00,044XX S LAKE PARK AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,222,...,4.0,39.0,08B,1185075.0,1875622.0,2015,02/10/2018 03:50:01 PM,41.813861,-87.596643,"(41.81386068, -87.596642837)"
10000096,HY190054,2015-03-18 22:30:00,051XX S MICHIGAN AVE,0460,BATTERY,SIMPLE,APARTMENT,False,False,225,...,3.0,40.0,08B,1178033.0,1870804.0,2015,02/10/2018 03:50:01 PM,41.800802,-87.622619,"(41.800802415, -87.622619343)"
10000097,HY189976,2015-03-18 21:00:00,047XX W ADAMS ST,031A,ROBBERY,ARMED: HANDGUN,SIDEWALK,False,False,1113,...,28.0,25.0,03,1144920.0,1898709.0,2015,02/10/2018 03:50:01 PM,41.878065,-87.743354,"(41.878064761, -87.743354013)"
10000098,HY190032,2015-03-18 22:00:00,049XX S DREXEL BLVD,0460,BATTERY,SIMPLE,APARTMENT,False,False,223,...,4.0,39.0,08B,1183018.0,1872537.0,2015,02/10/2018 03:50:01 PM,41.805443,-87.604284,"(41.805443345, -87.604283976)"
10000099,HY190047,2015-03-18 23:00:00,070XX S MORGAN ST,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,733,...,17.0,68.0,08B,1170859.0,1858210.0,2015,02/10/2018 03:50:01 PM,41.766403,-87.649296,"(41.766402779, -87.649296123)"
10000100,HY189988,2015-03-18 21:35:00,042XX S PRAIRIE AVE,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,213,...,3.0,38.0,08B,1178746.0,1876914.0,2015,02/10/2018 03:50:01 PM,41.817553,-87.619819,"(41.817552577, -87.619818523)"
10000101,HY190020,2015-03-18 22:09:00,036XX S WOLCOTT AVE,1811,NARCOTICS,POSS: CANNABIS 30GMS OR LESS,STREET,True,False,912,...,11.0,59.0,18,1164279.0,1880656.0,2015,02/10/2018 03:50:01 PM,41.828138,-87.672782,"(41.828138428, -87.672782106)"
10000104,HY189964,2015-03-18 21:25:00,097XX S PRAIRIE AVE,0460,BATTERY,SIMPLE,RESIDENCE PORCH/HALLWAY,False,False,511,...,6.0,49.0,08B,1179637.0,1840444.0,2015,02/10/2018 03:50:01 PM,41.717455,-87.617663,"(41.71745472, -87.617663257)"
