In [30]:
import geopy
import geopy.distance
import pandas as pd
from scipy.spatial import cKDTree
from geopy.geocoders import Nominatim

In [2]:
coord = (41.8781, -87.6298)

In [3]:
start = geopy.Point(coord[0], coord[1])

In [4]:
d = geopy.distance.geodesic(miles=1)

In [5]:
d.destination(point=start, bearing=0)

Point(41.89258931911811, -87.6298, 0.0)

In [6]:
df = pd.read_csv('data/crime-clean.csv')
df = df[(df['Year'] < 2021)]

In [7]:
df.head()

Unnamed: 0,Crime Type,Latitude,Longitude,Neighborhood,Zip Code,Adult Population,Crime Score,Neigh Score,CSperCapita,Year,Month
0,BATTERY,41.815117,-87.67,New City,60609,45031,0.4678,15798.9836,0.350847,2015,9
1,THEFT,41.803227,-87.65836,New City,60609,45031,0.2839,15798.9836,0.350847,2015,9
2,THEFT,41.800729,-87.667199,New City,60609,45031,0.284,15798.9836,0.350847,2015,9
3,PUBLIC PEACE VIOLATION,41.796919,-87.662233,New City,60609,45031,0.0167,15798.9836,0.350847,2015,9
4,BURGLARY,41.800627,-87.670846,New City,60609,45031,0.401,15798.9836,0.350847,2015,9


In [8]:
len(df.index)

3149595

In [9]:
tree = cKDTree(df[['Latitude', 'Longitude']])

In [10]:
end = d.destination(point=start, bearing=0)

In [11]:
end.latitude - start.latitude

0.01448931911810547

In [12]:
dist = end.latitude - start.latitude

In [13]:
points = tree.query_ball_point(coord, dist)

In [14]:
len(points)

128121

In [15]:
points[:5]

[2517944, 2516656, 2521991, 2524683, 2527377]

In [16]:
test = df.iloc[points]

In [17]:
test.head()

Unnamed: 0,Crime Type,Latitude,Longitude,Neighborhood,Zip Code,Adult Population,Crime Score,Neigh Score,CSperCapita,Year,Month
2547869,CRIMINAL TRESPASS,41.874361,-87.643134,West Loop,60607,26197,0.2064,11080.4543,0.422967,2017,10
2546581,THEFT,41.874361,-87.643164,West Loop,60607,26197,0.3719,11080.4543,0.422967,2017,6
2551916,THEFT,41.874361,-87.643164,West Loop,60607,26197,0.4584,11080.4543,0.422967,2019,3
2554608,CRIMINAL TRESPASS,41.874361,-87.643164,West Loop,60607,26197,0.0109,11080.4543,0.422967,2010,5
2557302,NARCOTICS,41.874362,-87.643222,West Loop,60607,26197,0.0837,11080.4543,0.422967,2011,3


In [18]:
def calc_dist(row):
    return geopy.distance.distance(coord, (row['Latitude'], row['Longitude'])).miles

In [19]:
test = df.iloc[:100].copy()

In [20]:
test['Distance'] = test.apply(calc_dist, axis=1)

In [21]:
test.head()

Unnamed: 0,Crime Type,Latitude,Longitude,Neighborhood,Zip Code,Adult Population,Crime Score,Neigh Score,CSperCapita,Year,Month,Distance
0,BATTERY,41.815117,-87.67,New City,60609,45031,0.4678,15798.9836,0.350847,2015,9,4.816452
1,THEFT,41.803227,-87.65836,New City,60609,45031,0.2839,15798.9836,0.350847,2015,9,5.373519
2,THEFT,41.800729,-87.667199,New City,60609,45031,0.284,15798.9836,0.350847,2015,9,5.677949
3,PUBLIC PEACE VIOLATION,41.796919,-87.662233,New City,60609,45031,0.0167,15798.9836,0.350847,2015,9,5.847496
4,BURGLARY,41.800627,-87.670846,New City,60609,45031,0.401,15798.9836,0.350847,2015,9,5.751225


In [22]:
test2 = df.iloc[points].copy()

In [23]:
test2.head()

Unnamed: 0,Crime Type,Latitude,Longitude,Neighborhood,Zip Code,Adult Population,Crime Score,Neigh Score,CSperCapita,Year,Month
2547869,CRIMINAL TRESPASS,41.874361,-87.643134,West Loop,60607,26197,0.2064,11080.4543,0.422967,2017,10
2546581,THEFT,41.874361,-87.643164,West Loop,60607,26197,0.3719,11080.4543,0.422967,2017,6
2551916,THEFT,41.874361,-87.643164,West Loop,60607,26197,0.4584,11080.4543,0.422967,2019,3
2554608,CRIMINAL TRESPASS,41.874361,-87.643164,West Loop,60607,26197,0.0109,11080.4543,0.422967,2010,5
2557302,NARCOTICS,41.874362,-87.643222,West Loop,60607,26197,0.0837,11080.4543,0.422967,2011,3


In [24]:
test2['Distance'] = test2.apply(calc_dist, axis=1)

In [25]:
test2.head()

Unnamed: 0,Crime Type,Latitude,Longitude,Neighborhood,Zip Code,Adult Population,Crime Score,Neigh Score,CSperCapita,Year,Month,Distance
2547869,CRIMINAL TRESPASS,41.874361,-87.643134,West Loop,60607,26197,0.2064,11080.4543,0.422967,2017,10,0.734599
2546581,THEFT,41.874361,-87.643164,West Loop,60607,26197,0.3719,11080.4543,0.422967,2017,6,0.736013
2551916,THEFT,41.874361,-87.643164,West Loop,60607,26197,0.4584,11080.4543,0.422967,2019,3,0.736013
2554608,CRIMINAL TRESPASS,41.874361,-87.643164,West Loop,60607,26197,0.0109,11080.4543,0.422967,2010,5,0.736013
2557302,NARCOTICS,41.874362,-87.643222,West Loop,60607,26197,0.0837,11080.4543,0.422967,2011,3,0.738843


In [26]:
test3 = test2[test2['Distance'] > 1]

In [27]:
test3.head()

Unnamed: 0,Crime Type,Latitude,Longitude,Neighborhood,Zip Code,Adult Population,Crime Score,Neigh Score,CSperCapita,Year,Month,Distance


In [28]:
test4 = test2.sort_values(by='Distance', ascending=False)

In [29]:
test4.head()

Unnamed: 0,Crime Type,Latitude,Longitude,Neighborhood,Zip Code,Adult Population,Crime Score,Neigh Score,CSperCapita,Year,Month,Distance
2445895,THEFT,41.863619,-87.629384,Near South Side,60605,25954,0.3551,5218.9478,0.201085,2017,2,0.999644
2445569,BURGLARY,41.863619,-87.629384,Near South Side,60605,25954,0.4584,5218.9478,0.201085,2016,6,0.999644
2450280,THEFT,41.863619,-87.629384,Near South Side,60605,25954,0.154,5218.9478,0.201085,2013,1,0.999644
2449853,BURGLARY,41.863619,-87.629384,Near South Side,60605,25954,0.1828,5218.9478,0.201085,2012,8,0.999644
2449157,THEFT,41.863619,-87.629384,Near South Side,60605,25954,0.0762,5218.9478,0.201085,2011,7,0.999644


In [47]:
address = '4149 N Western Ave Chicago'

In [48]:
geolocator = Nominatim(user_agent='team175')
location = geolocator.geocode(address)

In [49]:
location.address

'4149, North Western Avenue, North Center, Chicago, Lake View Township, Cook County, Illinois, 60618-9997, United States'