In [2]:
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
from tqdm import tqdm
import pandas as pd


In [7]:
geolocator = Nominatim(
    user_agent="lumi-poi-enrichment"  # REQUIRED
)

reverse = RateLimiter(
    geolocator.reverse,
    min_delay_seconds=1,   
    swallow_exceptions=True
)

In [9]:
def get_poi_info(lat, lon):
    location = reverse((lat, lon), zoom=18, addressdetails=True)

    if location is None:
        return {
            "poi_name": None,
            "poi_type": None,
            "poi_category": None
        }

    raw = location.raw
    address = raw.get("address", {})

    # POI name (best effort)
    poi_name = (
    raw.get("name")
    or address.get("attraction")
    or address.get("amenity")
    or address.get("shop")
    or address.get("tourism")
    or address.get("leisure")
    or address.get("office")
    or address.get("healthcare")
    or address.get("education")
    or address.get("public_transport")
    or address.get("highway")
    or raw.get("display_name")
)
    
    return {
        "poi_name": poi_name,
        "poi_type": raw.get("type"),
        "poi_category": raw.get("class")
    }

In [63]:
df = pd.read_csv("../../data/Chicago_checkins/raw/checkins_with_crimes.csv")

In [12]:
poi_df = (
    df[["latitude", "longitude"]]
    .drop_duplicates()
    .reset_index(drop=True)
)

poi_df["poi_id"] = poi_df.index

In [13]:
tqdm.pandas()

poi_data_1 = poi_df.iloc[0:1500].progress_apply(
    lambda r: get_poi_info(r["latitude"], r["longitude"]),
    axis=1,
    result_type="expand"
)

100%|██████████| 1500/1500 [25:59<00:00,  1.04s/it]


In [14]:
poi_data_2 = poi_df.iloc[1500:3000].progress_apply(
    lambda r: get_poi_info(r["latitude"], r["longitude"]),
    axis=1,
    result_type="expand"
)

100%|██████████| 1500/1500 [26:34<00:00,  1.06s/it]


In [15]:
poi_data_3 = poi_df.iloc[3000:4500].progress_apply(
    lambda r: get_poi_info(r["latitude"], r["longitude"]),
    axis=1,
    result_type="expand"
)

100%|██████████| 1500/1500 [25:40<00:00,  1.03s/it]


In [16]:
poi_data_4 = poi_df.iloc[4500:6000].progress_apply(
    lambda r: get_poi_info(r["latitude"], r["longitude"]),
    axis=1,
    result_type="expand"
)

100%|██████████| 1500/1500 [26:17<00:00,  1.05s/it]


In [17]:
poi_data_5 = poi_df.iloc[6000:7500].progress_apply(
    lambda r: get_poi_info(r["latitude"], r["longitude"]),
    axis=1,
    result_type="expand"
)

100%|██████████| 1500/1500 [26:15<00:00,  1.05s/it]


In [18]:
poi_data_6 = poi_df.iloc[7500:9000].progress_apply(
    lambda r: get_poi_info(r["latitude"], r["longitude"]),
    axis=1,
    result_type="expand"
)

100%|██████████| 1500/1500 [25:42<00:00,  1.03s/it]


In [19]:
poi_data_7 = poi_df.iloc[9000:].progress_apply(
    lambda r: get_poi_info(r["latitude"], r["longitude"]),
    axis=1,
    result_type="expand"
)

100%|██████████| 142/142 [02:33<00:00,  1.08s/it]


In [20]:
poi_data_all = pd.concat([poi_data_1, poi_data_2, poi_data_3, poi_data_4, poi_data_5, poi_data_6, poi_data_7], axis=0).sort_index()

In [21]:
poi_data_all

Unnamed: 0,poi_name,poi_type,poi_category
0,Car Rentals,car_rental,amenity
1,Riccardo Trattoria,restaurant,amenity
2,South State Street,secondary,highway
3,Field Museum,museum,tourism
4,"640, North Wells Street, River North, Near Nor...",apartments,building
...,...,...,...
9137,"6407-6409, North Newgard Avenue, Glenwood Arts...",yes,building
9138,Great Clips,hairdresser,shop
9139,North Greenwood Avenue,tertiary,highway
9140,Pret A Manger,fast_food,amenity


In [22]:
poi_data_all.isna().sum()

poi_name        0
poi_type        0
poi_category    0
dtype: int64

In [23]:
poi_data_all["poi_type"].value_counts()

poi_type
yes               1612
parking            771
restaurant         696
house              483
apartments         420
                  ... 
milestone            1
armory               1
collector            1
fishing              1
administrative       1
Name: count, Length: 319, dtype: int64

In [24]:
poi_data_all.value_counts(["poi_category", "poi_type"])

poi_category  poi_type               
building      yes                        1598
amenity       parking                     771
              restaurant                  696
place         house                       446
building      apartments                  420
                                         ... 
amenity       stripclub                     1
emergency     emergency_ward_entrance       1
shop          florist                       1
amenity       taxi                          1
leisure       fishing                       1
Name: count, Length: 334, dtype: int64

In [25]:
poi_data_all["poi_category"].nunique(dropna=True)

18

In [26]:
poi_data_all["poi_type"].nunique(dropna=True)

319

In [27]:
poi_data_all.value_counts(["poi_category", "poi_type"]).nunique(dropna=True)

65

In [28]:
poi_data_all["poi_category"].value_counts()

poi_category
amenity       3244
building      2607
highway        823
shop           790
place          446
leisure        306
tourism        230
man_made       186
aeroway        180
office         160
railway         90
emergency       27
historic        23
craft           13
healthcare      11
club             3
military         2
boundary         1
Name: count, dtype: int64

In [29]:
poi_data_all["poi_type"].value_counts()

poi_type
yes               1612
parking            771
restaurant         696
house              483
apartments         420
                  ... 
milestone            1
armory               1
collector            1
fishing              1
administrative       1
Name: count, Length: 319, dtype: int64

In [30]:
poi_data_all.value_counts(["poi_category", "poi_type"]).value_counts()

count
1     88
2     36
3     24
4     22
6     12
      ..
58     1
57     1
50     1
49     1
48     1
Name: count, Length: 65, dtype: int64

In [42]:
poi_df_enriched = poi_df.join(poi_data_all)

In [43]:
poi_df_enriched.head()

Unnamed: 0,latitude,longitude,poi_id,poi_name,poi_type,poi_category
0,41.8787,-87.639656,0,Car Rentals,car_rental,amenity
1,41.920592,-87.637466,1,Riccardo Trattoria,restaurant,amenity
2,41.872821,-87.627609,2,South State Street,secondary,highway
3,41.865485,-87.616997,3,Field Museum,museum,tourism
4,41.893874,-87.634483,4,"640, North Wells Street, River North, Near Nor...",apartments,building


In [45]:
poi_data_all.to_csv("poi_data_all_temp.csv", index=False)

In [44]:
poi_df_enriched.to_csv("poi_with_category.csv", index=False)

In [64]:
df = df.merge(
    poi_df_enriched[
        ["latitude", "longitude", "poi_name", "poi_type", "poi_category"]
    ],
    on=["latitude", "longitude"],
    how="left"
)

In [65]:
df.head()

Unnamed: 0,user_id,utc_time,latitude,longitude,poi_id,local_time,DECEPTIVE PRACTICE,OFFENSE INVOLVING CHILDREN,NARCOTICS,THEFT,...,STALKING,OBSCENITY,OTHER NARCOTIC VIOLATION,PUBLIC INDECENCY,NON-CRIMINAL,CRIMINAL SEXUAL ASSAULT,HOMICIDE,poi_name,poi_type,poi_category
0,17,2010-09-06T00:38:52Z,41.8787,-87.639656,0,2010-09-05 19:38:52-05:00,9,0,6,50,...,0,1,0,0,0,0,0,Car Rentals,car_rental,amenity
1,17,2010-09-05T00:58:36Z,41.920592,-87.637466,1,2010-09-04 19:58:36-05:00,3,0,0,47,...,0,0,0,0,0,0,0,Riccardo Trattoria,restaurant,amenity
2,17,2010-09-01T23:29:34Z,41.872821,-87.627609,2,2010-09-01 18:29:34-05:00,9,0,4,48,...,0,0,0,0,0,0,0,South State Street,secondary,highway
3,17,2010-09-01T17:25:27Z,41.865485,-87.616997,3,2010-09-01 12:25:27-05:00,5,0,3,3,...,0,0,0,0,0,0,0,Field Museum,museum,tourism
4,17,2010-08-29T22:20:38Z,41.893874,-87.634483,4,2010-08-29 17:20:38-05:00,13,0,2,77,...,0,0,0,0,0,0,0,"640, North Wells Street, River North, Near Nor...",apartments,building


In [66]:
poi_cols = ["poi_name", "poi_type", "poi_category"]

cols = df.columns.tolist()

insert_at = cols.index("poi_id") + 1

new_cols = (
    cols[:insert_at]
    + poi_cols
    + [c for c in cols[insert_at:] if c not in poi_cols]
)

df = df[new_cols]

In [67]:
df.head()

Unnamed: 0,user_id,utc_time,latitude,longitude,poi_id,poi_name,poi_type,poi_category,local_time,DECEPTIVE PRACTICE,...,GAMBLING,KIDNAPPING,INTIMIDATION,STALKING,OBSCENITY,OTHER NARCOTIC VIOLATION,PUBLIC INDECENCY,NON-CRIMINAL,CRIMINAL SEXUAL ASSAULT,HOMICIDE
0,17,2010-09-06T00:38:52Z,41.8787,-87.639656,0,Car Rentals,car_rental,amenity,2010-09-05 19:38:52-05:00,9,...,0,0,0,0,1,0,0,0,0,0
1,17,2010-09-05T00:58:36Z,41.920592,-87.637466,1,Riccardo Trattoria,restaurant,amenity,2010-09-04 19:58:36-05:00,3,...,0,0,0,0,0,0,0,0,0,0
2,17,2010-09-01T23:29:34Z,41.872821,-87.627609,2,South State Street,secondary,highway,2010-09-01 18:29:34-05:00,9,...,0,0,0,0,0,0,0,0,0,0
3,17,2010-09-01T17:25:27Z,41.865485,-87.616997,3,Field Museum,museum,tourism,2010-09-01 12:25:27-05:00,5,...,0,0,0,0,0,0,0,0,0,0
4,17,2010-08-29T22:20:38Z,41.893874,-87.634483,4,"640, North Wells Street, River North, Near Nor...",apartments,building,2010-08-29 17:20:38-05:00,13,...,0,0,0,0,0,0,0,0,0,0


In [69]:
df.to_csv("../data/poi_type_checkin_crime.csv", index=False)

---

In [69]:
location = reverse((poi_df.iloc[9141]["latitude"], poi_df.iloc[9141]["longitude"]), zoom=18, addressdetails=True)

In [70]:
location

Location(1800, West Roscoe Street, Roscoe Village, North Center, Chicago, Lake View Township, Cook County, Illinois, 60657, United States, (41.9435174, -87.6748716, 0.0))

In [71]:
location.raw

{'place_id': 342522411,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',
 'osm_type': 'way',
 'osm_id': 209903693,
 'lat': '41.9435174',
 'lon': '-87.6748716',
 'class': 'building',
 'type': 'yes',
 'place_rank': 30,
 'importance': 8.51537346455637e-05,
 'addresstype': 'building',
 'name': '',
 'display_name': '1800, West Roscoe Street, Roscoe Village, North Center, Chicago, Lake View Township, Cook County, Illinois, 60657, United States',
 'address': {'house_number': '1800',
  'road': 'West Roscoe Street',
  'neighbourhood': 'Roscoe Village',
  'suburb': 'North Center',
  'city': 'Chicago',
  'municipality': 'Lake View Township',
  'county': 'Cook County',
  'state': 'Illinois',
  'ISO3166-2-lvl4': 'US-IL',
  'postcode': '60657',
  'country': 'United States',
  'country_code': 'us'},
 'boundingbox': ['41.9433463', '41.9437094', '-87.6757916', '-87.6739863']}

In [66]:
location = reverse((poi_df.iloc[9140]["latitude"], poi_df.iloc[9140]["longitude"]), zoom=18, addressdetails=True)

In [67]:
location

Location(Pret A Manger, 225, South Canal Street, West Loop Gate, Near West Side, Chicago, West Chicago Township, Cook County, Illinois, 60606, United States, (41.8786577, -87.6389762, 0.0))

In [68]:
location.raw

{'place_id': 342355797,
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',
 'osm_type': 'node',
 'osm_id': 6320242859,
 'lat': '41.8786577',
 'lon': '-87.6389762',
 'class': 'amenity',
 'type': 'fast_food',
 'place_rank': 30,
 'importance': 8.51537346455637e-05,
 'addresstype': 'amenity',
 'name': 'Pret A Manger',
 'display_name': 'Pret A Manger, 225, South Canal Street, West Loop Gate, Near West Side, Chicago, West Chicago Township, Cook County, Illinois, 60606, United States',
 'address': {'amenity': 'Pret A Manger',
  'house_number': '225',
  'road': 'South Canal Street',
  'neighbourhood': 'West Loop Gate',
  'suburb': 'Near West Side',
  'city': 'Chicago',
  'municipality': 'West Chicago Township',
  'county': 'Cook County',
  'state': 'Illinois',
  'ISO3166-2-lvl4': 'US-IL',
  'postcode': '60606',
  'country': 'United States',
  'country_code': 'us'},
 'boundingbox': ['41.8786077', '41.8787077', '-87.6390262', '-87.6389262']}

In [47]:
d = get_poi_info(poi_df.iloc[9141]["latitude"], poi_df.iloc[9141]["longitude"])

In [48]:
d

{'poi_name': None, 'poi_type': None, 'poi_category': None}

---