### Import Modules

In [17]:
import geographic as geo
import importlib
import numpy as np
import pandas as pd

### Stations Data Investigation

In [2]:
#Read stations data
stations = pd.read_csv("../../Data/all_stations.csv")

In [4]:
#Retrieve geographic info
lat = stations["latitude"]
long = stations["longitude"]
stations = geo.addGeographic(stations, lat, long)

In [5]:
#View sample of stations
stations.sample(10)

Unnamed: 0.1,Unnamed: 0,id,name,latitude,longitude,is_relocated,neighbourhood,suburb,city,county,zip_code
216,169,237,MLK Jr Dr & 29th St,41.842052,-87.617,0,,Douglas,Chicago,Cook County,60616
487,558,527,Western Ave & Howard St,42.018901,-87.690048,0,,West Ridge,Chicago,Cook County,60645
336,336,365,Halsted St & North Branch St,41.899368,-87.64848,0,River North,Near North Side,Chicago,Cook County,60622
246,253,268,Lake Shore Dr & North Blvd,41.911722,-87.626804,0,,Lincoln Park,Chicago,Cook County,60610
51,136,55,Halsted St & James M Rochford St,41.865861,-87.646611,1,,Near West Side,Chicago,Cook County,60607
72,29,84,Milwaukee Ave & Grand Ave,41.891578,-87.648384,1,,West Town,Chicago,Cook County,60642
317,164,340,Clark St & Wrightwood Ave,41.929546,-87.643118,0,,Lincoln Park,Chicago,Cook County,60614
431,429,466,Ridge Blvd & Touhy Ave,42.012132,-87.682913,0,,West Ridge,Chicago,Cook County,60645
102,60,116,Western Ave & Winnebago Ave,41.915533,-87.687051,0,Bucktown,Logan Square,Chicago,Cook County,60647
219,55,240,Sheridan Rd & Irving Park Rd,41.954245,-87.654406,0,,Lake View,Chicago,Cook County,60626


In [14]:
#Unique neighbourhood count
stations["neighbourhood"].unique().size

68

In [15]:
#Unique suburb count
stations["suburb"].unique().size

44

In [25]:
#Unique cities
print(stations["city"].unique())

#Check stations with missing city value
stations[stations["city"].isnull()]

['Chicago' None]


Unnamed: 0.1,Unnamed: 0,id,name,latitude,longitude,is_relocated,neighbourhood,suburb,city,county,zip_code
556,520,596,Benson Ave & Church St,42.048214,-87.683485,0,Downtown,,,Cook County,60201
557,522,597,Chicago Ave & Washington St,42.032562,-87.679101,0,South Evanston,,,Cook County,60202
558,201,598,Elmwood Ave & Austin St,42.025784,-87.684107,0,,,,Cook County,60202
559,522,599,Valli Produce - Evanston Plaza,42.039742,-87.699413,0,,,,Cook County,60201
560,523,600,Dodge Ave & Church St,42.048308,-87.698224,0,,,,Cook County,60201
561,521,601,Central St Metra,42.063598,-87.69873,0,,,,Cook County,60201
562,91,602,Central St & Girard Ave,42.063999,-87.685962,0,,,,Cook County,60201
563,94,603,Chicago Ave & Sheridan Rd,42.050491,-87.677821,0,Downtown,,,Cook County,60208
564,470,604,Sheridan Rd & Noyes St (NU),42.058239,-87.677432,0,Downtown,,,Cook County,60208
565,521,605,University Library (NU),42.052939,-87.673447,0,Downtown,,,Cook County,60208


In [26]:
#Unique county count
stations["county"].unique().size

1

In [27]:
#Unique zip code count
stations["zip_code"].unique().size

58

In [30]:
#Number of rows with missing neighborhood values
stations["neighbourhood"].isnull().sum()

301

In [31]:
#Examine data with missing neighborhood values
stations[stations["neighbourhood"].isnull()]

Unnamed: 0.1,Unnamed: 0,id,name,latitude,longitude,is_relocated,neighbourhood,suburb,city,county,zip_code
2,2,4,Burnham Harbor,41.856268,-87.613348,0,,Near South Side,Chicago,Cook County,60605
4,4,6,DuSable Harbor,41.885042,-87.612795,0,,Loop,Chicago,Cook County,60603
7,7,11,Jeffery Blvd & 71st St,41.766638,-87.576450,0,,South Shore,Chicago,Cook County,60619
8,8,12,South Shore Dr & 71st St,41.766409,-87.565688,0,,South Shore,Chicago,Cook County,60649
9,58,13,Wilton Ave & Diversey Pkwy,41.932500,-87.652681,0,,Lincoln Park,Chicago,Cook County,60614
...,...,...,...,...,...,...,...,...,...,...,...
577,525,617,East Ave & Garfield St,41.871312,-87.788938,0,,,,Cook County,60304
578,529,618,Lombard Ave & Garfield St,41.870550,-87.779190,0,,,,Cook County,60304
579,279,619,Keystone Ave & Fullerton Ave,41.924361,-87.727752,0,,Hermosa,Chicago,Cook County,60641
581,60,622,California Ave & Cortez St,41.900363,-87.696704,0,,West Town,Chicago,Cook County,60625


### Stations Data

In [2]:
#Read stations data
stations_data = pd.read_csv("../../Data/all_stations.csv")

In [3]:
#Add zip code info
lat = stations_data["latitude"]
long = stations_data["longitude"]
stations_data = geo.addZip(stations_data, lat, long)

In [4]:
#Impute missing zip codes
stations_data = geo.imputeStationsZip(stations_data)

### Landmarks Data

In [6]:
#Read landmarks data
landmarks = pd.read_csv("../../Data/landmarks.csv")

In [8]:
#Add zip code info
lat = landmarks["LATITUDE"]
long = landmarks["LONGITUDE"]
landmarks = geo.addZip(landmarks, lat, long)

In [22]:
#Impute missing zip code
landmarks = geo.imputeLandmarksZip(landmarks)

In [33]:
#Create summarized dataframe for merging with stations
landmarks_merge = landmarks.groupby("zip_code").agg(num_landmarks = ("zip_code", "count")).reset_index()

### Crimes Data


In [79]:
#Read crimes data
crimes = pd.read_csv("../../Data/crimes.csv")

In [80]:
#Keep only relevant years
crimes = crimes[(crimes["Year"] >= 2013) & (crimes["Year"] <= 2019)]

In [81]:
#Drop observations with missing location value 
crimes = crimes.dropna(subset = "Location")

In [84]:
#Investigate crime types
crimes["Primary Type"].unique()

array(['BATTERY', 'THEFT', 'NARCOTICS', 'ASSAULT', 'BURGLARY', 'ROBBERY',
       'OTHER OFFENSE', 'CRIMINAL DAMAGE', 'WEAPONS VIOLATION',
       'DECEPTIVE PRACTICE', 'CRIMINAL TRESPASS', 'MOTOR VEHICLE THEFT',
       'SEX OFFENSE', 'INTERFERENCE WITH PUBLIC OFFICER',
       'OFFENSE INVOLVING CHILDREN', 'PUBLIC PEACE VIOLATION',
       'PROSTITUTION', 'GAMBLING', 'CRIM SEXUAL ASSAULT',
       'LIQUOR LAW VIOLATION', 'ARSON', 'STALKING', 'KIDNAPPING',
       'INTIMIDATION', 'CONCEALED CARRY LICENSE VIOLATION',
       'NON - CRIMINAL', 'HUMAN TRAFFICKING', 'OBSCENITY',
       'CRIMINAL SEXUAL ASSAULT', 'PUBLIC INDECENCY',
       'OTHER NARCOTIC VIOLATION', 'NON-CRIMINAL', 'HOMICIDE',
       'NON-CRIMINAL (SUBJECT SPECIFIED)'], dtype=object)