Library imports

In [1]:
%matplotlib inline
import pandas as pd
import numpy as np

Importing data

In [2]:
crime_chicago = pd.read_csv('Crimes_-_2001_to_present.csv')

`head(x)` displays first `x` rows of data

In [3]:
crime_chicago.head(1)

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Ward,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location
0,10361459,HY553232,12/28/2015 11:39:00 PM,025XX S STEWART AVE,1811,NARCOTICS,POSS: CANNABIS 30GMS OR LESS,STREET,True,False,...,11,34,18,1174099,1887116,2015,01/04/2016 04:02:14 PM,41.845652,-87.636561,"(41.845652363, -87.636561415)"


### Cleaning up the data

Dropping the data outside of our defined boundaries of Chicago

In [4]:
max_lat = 42.017888
min_lat = 41.646487
max_lon = -87.525492
min_lon = -87.821101

In [5]:
crime_chicago.loc[(crime_chicago['Latitude'] > max_lat),['Latitude']] = np.NaN
crime_chicago.loc[(crime_chicago['Longitude'] > max_lon),['Longitude']] = np.NaN
crime_chicago.loc[(crime_chicago['Latitude'] < min_lat),['Latitude']] = np.NaN
crime_chicago.loc[(crime_chicago['Longitude'] < min_lon),['Longitude']] = np.NaN

In [6]:
crime_chicago = crime_chicago.dropna(axis=0, how='any', subset=['Latitude','Longitude'], inplace=False)

In [7]:
len(crime_chicago)

5846691

In [8]:
max_x = crime_chicago['X Coordinate'].max()
max_x

1204865.0

In [9]:
min_x = crime_chicago['X Coordinate'].min()
min_x

1123470.0

In [10]:
max_y = crime_chicago['Y Coordinate'].max()
max_y

1949814.0

In [11]:
min_y = crime_chicago['Y Coordinate'].min()
min_y

1814593.0

Make X Coordinate and Y Coordinate absolute

In [12]:
crime_chicago.loc[:,['X Coordinate']] = crime_chicago['X Coordinate'] - min_x
crime_chicago.loc[:,['Y Coordinate']] = crime_chicago['Y Coordinate'] - min_y

Minimum and maximum dates for the data

In [13]:
crime_chicago['Date'].min()

'01/01/2001 01:00:00 AM'

In [14]:
crime_chicago['Date'].max()

'12/31/2014 12:59:00 PM'

### Manipulating data

Adding `datetime` field and parsing `Date` directly using format inference

In [None]:
#test['datetime'] = pd.to_datetime(test['datetime'], format='%m/%d/%Y ')

In [16]:
crime_chicago['datetime'] = pd.to_datetime(crime_chicago['Date'], infer_datetime_format=True)

In [17]:
crime_chicago

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location,datetime
0,10361459,HY553232,12/28/2015 11:39:00 PM,025XX S STEWART AVE,1811,NARCOTICS,POSS: CANNABIS 30GMS OR LESS,STREET,True,False,...,34,18,50629,72523,2015,01/04/2016 04:02:14 PM,41.845652,-87.636561,"(41.845652363, -87.636561415)",2015-12-28 23:39:00
1,10361538,HY553215,12/28/2015 11:38:00 PM,001XX E 46TH ST,0560,ASSAULT,SIMPLE,APARTMENT,False,True,...,38,08A,54615,59967,2015,01/04/2016 04:02:14 PM,41.811108,-87.622315,"(41.811108036, -87.622314691)",2015-12-28 23:38:00
3,10361475,HY553226,12/28/2015 11:19:00 PM,009XX W 86TH ST,143A,WEAPONS VIOLATION,UNLAWFUL POSS OF HANDGUN,STREET,False,False,...,71,15,47905,33211,2015,01/04/2016 04:02:14 PM,41.737836,-87.647709,"(41.737836084, -87.647708744)",2015-12-28 23:19:00
4,10361479,HY553214,12/28/2015 11:17:00 PM,031XX N NARRAGANSETT AVE,4387,OTHER OFFENSE,VIOLATE ORDER OF PROTECTION,RESIDENCE,False,True,...,19,26,9721,105761,2015,01/04/2016 04:02:14 PM,41.937675,-87.785914,"(41.937674564, -87.785913655)",2015-12-28 23:17:00
5,10361452,HY553222,12/28/2015 11:15:00 PM,079XX S PAXTON AVE,0610,BURGLARY,FORCIBLE ENTRY,RESIDENCE,False,False,...,46,05,68773,38174,2015,01/04/2016 04:02:14 PM,41.750973,-87.571094,"(41.750973348, -87.571093674)",2015-12-28 23:15:00
6,10361455,HY553224,12/28/2015 11:12:00 PM,085XX S MORGAN ST,1812,NARCOTICS,POSS: CANNABIS MORE THAN 30GMS,STREET,True,False,...,71,18,47670,33573,2015,01/04/2016 04:02:14 PM,41.738835,-87.648559,"(41.738834596, -87.64855917)",2015-12-28 23:12:00
7,10362148,HY553207,12/28/2015 11:08:00 PM,016XX S HOMAN AVE,0820,THEFT,$500 AND UNDER,VEHICLE NON-COMMERCIAL,False,False,...,29,06,30499,77017,2015,01/04/2016 04:02:14 PM,41.858409,-87.710317,"(41.858408833, -87.710317158)",2015-12-28 23:08:00
8,10361478,HY553227,12/28/2015 11:08:00 PM,001XX W ELM ST,033A,ROBBERY,ATTEMPT: ARMED-HANDGUN,SIDEWALK,False,False,...,8,03,51682,93477,2015,01/04/2016 04:02:14 PM,41.903128,-87.632069,"(41.903127946, -87.632068763)",2015-12-28 23:08:00
9,10361480,HY553219,12/28/2015 11:05:00 PM,009XX W 19TH PL,0486,BATTERY,DOMESTIC BATTERY SIMPLE,APARTMENT,False,True,...,31,08B,47278,76270,2015,01/04/2016 04:02:14 PM,41.856008,-87.648750,"(41.856008391, -87.648749565)",2015-12-28 23:05:00
10,10361487,HY553263,12/28/2015 11:00:00 PM,059XX S CARPENTER ST,0560,ASSAULT,SIMPLE,RESIDENCE,False,True,...,68,08A,46861,50754,2015,01/04/2016 04:02:14 PM,41.785999,-87.651024,"(41.785999075, -87.651023882)",2015-12-28 23:00:00


Save timestamped data into `pickle` file

In [18]:
crime_chicago.to_pickle('crime_chicago_with_better_timestamp.pkl')