## Use OverPy to collect POI data

In [1]:
import overpy

In [2]:
CITY = 'New Delhi'

query = f'''[out:json];
area[name="{CITY}"];
node["amenity"](area);
out center; '''

In [3]:
api = overpy.Overpass()
res = api.query(query)

<overpy.Result at 0x7f54908ee150>

In [4]:
nodes = []
for node in res.nodes:
    if node.tags:
        nodes.append({
            'id': node.id,
            'lon': float(node.lon),
            'lat': float(node.lat),
            'amenity': node.tags['amenity']
        })
nodes

[{'id': 248852574, 'lon': 77.1518947, 'lat': 28.533492, 'amenity': 'cafe'},
 {'id': 248852583, 'lon': 77.152001, 'lat': 28.5336472, 'amenity': 'bank'},
 {'id': 266442982, 'lon': 77.164493, 'lat': 28.5573088, 'amenity': 'cinema'},
 {'id': 266443204, 'lon': 77.1645488, 'lat': 28.5578459, 'amenity': 'bank'},
 {'id': 266443228,
  'lon': 77.1637622,
  'lat': 28.5573346,
  'amenity': 'fast_food'},
 {'id': 271335768, 'lon': 77.1378574, 'lat': 28.489358, 'amenity': 'school'},
 {'id': 277420968, 'lon': 77.1070198, 'lat': 28.5336932, 'amenity': 'fuel'},
 {'id': 279161182, 'lon': 77.1570574, 'lat': 28.525047, 'amenity': 'bank'},
 {'id': 279161187, 'lon': 77.1561648, 'lat': 28.5262695, 'amenity': 'fuel'},
 {'id': 301037300, 'lon': 77.1850155, 'lat': 28.5499496, 'amenity': 'parking'},
 {'id': 308894803,
  'lon': 77.2299189,
  'lat': 28.6069461,
  'amenity': 'restaurant'},
 {'id': 309861440, 'lon': 77.185421, 'lat': 28.5983754, 'amenity': 'school'},
 {'id': 312102918, 'lon': 77.2089741, 'lat': 28.57

## Load Data to Pandas Dataframe

In [5]:
import pandas as pd

df = pd.DataFrame(nodes)
df

Unnamed: 0,id,lon,lat,amenity
0,248852574,77.151895,28.533492,cafe
1,248852583,77.152001,28.533647,bank
2,266442982,77.164493,28.557309,cinema
3,266443204,77.164549,28.557846,bank
4,266443228,77.163762,28.557335,fast_food
...,...,...,...,...
964,6895185773,77.141847,28.532759,blood_bank
965,6895185775,77.200700,28.626484,blood_bank
966,6898010185,77.217419,28.630726,fast_food
967,6908305664,77.184154,28.570543,place_of_worship


### Data Cleaning

In [7]:
df.dropna(inplace=True)
df

Unnamed: 0,id,lon,lat,amenity
0,248852574,77.151895,28.533492,cafe
1,248852583,77.152001,28.533647,bank
2,266442982,77.164493,28.557309,cinema
3,266443204,77.164549,28.557846,bank
4,266443228,77.163762,28.557335,fast_food
...,...,...,...,...
964,6895185773,77.141847,28.532759,blood_bank
965,6895185775,77.200700,28.626484,blood_bank
966,6898010185,77.217419,28.630726,fast_food
967,6908305664,77.184154,28.570543,place_of_worship


In [8]:
df['amenity'].unique()

array(['cafe', 'bank', 'cinema', 'fast_food', 'school', 'fuel', 'parking',
       'restaurant', 'fire_station', 'pharmacy', 'post_box', 'hospital',
       'toilets', 'atm', 'club', 'bus_station', 'embassy',
       'Netaji Nagar Market', 'Suvidha Market, Netaji Nagar',
       'place_of_worship', 'college', 'police', 'Ayurvedic Hospital',
       'House', 'public_building', 'marketplace', 'library', 'bar',
       'kindergarten', 'taxi', 'waste_basket', 'bench', 'doctors',
       'bureau_de_change', 'pub', 'fast_food;bar', 'post_office',
       'theatre', 'community_centre', 'courthouse', 'car_wash',
       'food_court', 'fountain', 'veterinary', 'training',
       'drinking_water', 'bicycle_parking', 'bicycle_rental',
       'vending_machine', 'money_transfer', 'car_rental', 'telephone',
       'waste_disposal', 'bicycle_repair_station', 'nightclub', 'dentist',
       'arts_centre', 'music_school', 'internet_cafe', 'coworking_space',
       'charging_station', 'shelter', 'conference_centr