In [1]:
import requests
import pandas as pd
import h3

Explain what hexagons are

First of all we wil create a DataFrame with all hexagons where a trip started or ended. This hexagons are of interest to us, as demand to or from this area may be influenced by the number and type of buildings and facilities.

In [2]:
trips_df = pd.read_pickle('../00_data/trips.pkl')

In [3]:
def convert_to_hex(latitude, longitude, resolution):
    return h3.geo_to_h3(
        lat=latitude,
        lng=longitude,
        resolution=resolution
    )

In [4]:
resolution = 8

trips_df['start_hex'] = trips_df.apply(lambda trip: convert_to_hex(
    trip['start_latitude'], 
    trip['start_longitude'], 
    resolution
), axis=1)

trips_df['end_hex'] = trips_df.apply(lambda trip: convert_to_hex(
    trip['end_latitude'],
    trip['end_longitude'],
    resolution
), axis=1)

In [5]:
number_of_unique_hexagons = pd.concat([trips_df['start_hex'], trips_df['end_hex']]).unique().size
print(
    f"We have identified {number_of_unique_hexagons} for h3 hexagons with resolution {resolution}."
)

We have identified 75 for h3 hexagons with resolution 8.


In [6]:
# create a dataframe representing H3 hexagons in LA
# note: only hexagons are depicted where at least one trip started or ended
hexagons_df = pd.DataFrame()
hexagons_df['hex'] = pd.concat([trips_df['start_hex'], trips_df['end_hex']]).unique()
hexagons_df.head(2)

Unnamed: 0,hex
0,8829a1d757fffff
1,8829a1d755fffff


In the next step we will retrieve information about all hexagons in LA, Santa Monica and Burbank. We will query the Openstreetmap (OSM) API. OSM is a free of charge tool. Can be practiced with turbo : link.
................................

In [7]:
overpass_url = "http://overpass-api.de/api/interpreter"

In [8]:
overpass_query = """
  [out:json]; 
  area[name = "Los Angeles"]->.a;
	area[name = "Santa Monica"]->.b; 
	area[name = "Burbank"]->.c; 
    (   
      node(area.a)[amenity=bar];
      node(area.a)[amenity=restaurant]; 
      node(area.a)[amenity=pub];
      node(area.a)[amenity=ice_cream];
      node(area.a)[amenity=food_court];
      node(area.a)[amenity=fast_food];
      node(area.a)[amenity=biergarten];
      node(area.a)[amenity=cafe];
      
      node(area.b)[amenity=bar];
      node(area.b)[amenity=restaurant]; 
      node(area.b)[amenity=pub];
      node(area.b)[amenity=ice_cream];
      node(area.b)[amenity=food_court];
      node(area.b)[amenity=fast_food];
      node(area.b)[amenity=biergarten];
      node(area.b)[amenity=cafe];

      node(area.c)[amenity=bar];
      node(area.c)[amenity=restaurant]; 
      node(area.c)[amenity=pub];
      node(area.c)[amenity=ice_cream];
      node(area.c)[amenity=food_court];
      node(area.c)[amenity=fast_food];
      node(area.c)[amenity=biergarten];
      node(area.c)[amenity=cafe];
      
    ); 
    out;  
"""
response = requests.get(overpass_url, params={"data": overpass_query})
sustenance_data = response.json()


In [9]:
sustenance_data

{'version': 0.6,
 'generator': 'Overpass API 0.7.57 93a4d346',
 'osm3s': {'timestamp_osm_base': '2021-12-11T15:36:44Z',
  'timestamp_areas_base': '2021-12-11T15:13:26Z',
  'copyright': 'The data included in this document is from www.openstreetmap.org. The data is made available under ODbL.'},
 'elements': [{'type': 'node',
   'id': 72448982,
   'lat': 34.0762169,
   'lon': -118.2160197,
   'tags': {'amenity': 'fast_food',
    'cuisine': 'japanese',
    'drive_through': 'yes',
    'name': 'Yoshinoya',
    'source': 'usgs_imagery;survey;image',
    'source_ref': 'AM909_DSCV1655'}},
  {'type': 'node',
   'id': 72448995,
   'lat': 34.0766932,
   'lon': -118.2160126,
   'tags': {'amenity': 'fast_food',
    'cuisine': 'burger',
    'drive_through': 'yes',
    'name': 'Jack in the Box',
    'source': 'usgs_imagery;survey;image',
    'source_ref': 'AM909_DSCV1656'}},
  {'type': 'node',
   'id': 77741928,
   'lat': 34.190943,
   'lon': -118.3305106,
   'tags': {'amenity': 'restaurant',
    'cui

In [10]:
sustenance_df = pd.DataFrame(sustenance_data["elements"])
sustenance_df.head(2)

Unnamed: 0,type,id,lat,lon,tags
0,node,72448982,34.076217,-118.21602,"{'amenity': 'fast_food', 'cuisine': 'japanese'..."
1,node,72448995,34.076693,-118.216013,"{'amenity': 'fast_food', 'cuisine': 'burger', ..."


In [11]:
sustenance_df['category'] = 'sustenance'
sustenance_df['amenity'] = sustenance_df['tags'].apply(lambda tags: tags['amenity'])
sustenance_df.head()

Unnamed: 0,type,id,lat,lon,tags,category,amenity
0,node,72448982,34.076217,-118.21602,"{'amenity': 'fast_food', 'cuisine': 'japanese'...",sustenance,fast_food
1,node,72448995,34.076693,-118.216013,"{'amenity': 'fast_food', 'cuisine': 'burger', ...",sustenance,fast_food
2,node,77741928,34.190943,-118.330511,"{'amenity': 'restaurant', 'cuisine': 'american...",sustenance,restaurant
3,node,344723703,34.185572,-118.316496,"{'amenity': 'fast_food', 'brand': 'Baja Fresh'...",sustenance,fast_food
4,node,344723888,34.185624,-118.316329,"{'amenity': 'fast_food', 'brand': 'Jersey Mike...",sustenance,fast_food


In [12]:
overpass_query = """
    [out:json]; 
    area[name = "Los Angeles"]->.a; 
    (   
      node(area.a)[highway=bus_stop];
      node(area.b)[highway=bus_stop];
      node(area.c)[highway=bus_stop];
    ); 
    out; 
"""
response = requests.get(overpass_url, params={"data": overpass_query})
bus_data = response.json()


In [13]:
bus_df = pd.DataFrame(bus_data["elements"])
bus_df.head(2)

Unnamed: 0,type,id,lat,lon,tags
0,node,349443839,34.079103,-118.291534,"{'bus': 'yes', 'highway': 'bus_stop', 'public_..."
1,node,368062538,34.054432,-118.237308,"{'bus': 'yes', 'highway': 'bus_stop', 'name': ..."


In [14]:
bus_df['category'] = 'public transport'
bus_df['amenity'] = 'bus stop'
bus_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category,amenity
0,node,349443839,34.079103,-118.291534,"{'bus': 'yes', 'highway': 'bus_stop', 'public_...",public transport,bus stop
1,node,368062538,34.054432,-118.237308,"{'bus': 'yes', 'highway': 'bus_stop', 'name': ...",public transport,bus stop


In [16]:
overpass_query = """
    [out:json]; 
    area[name = "Los Angeles"]->.a; 
    (   
      node(area.a)[railway=station];
      node(area.b)[railway=station];
      node(area.c)[railway=station];
    ); 
    out; 
"""
response = requests.get(overpass_url, params={"data": overpass_query})
railway_data = response.json()

In [17]:
railway_df = pd.DataFrame(railway_data["elements"])
railway_df.head(2)

Unnamed: 0,type,id,lat,lon,tags
0,node,123386730,34.026363,-118.372152,"{'addr:city': 'Los Angeles', 'addr:housenumber..."
1,node,267072333,34.087521,-118.475692,"{'name': 'Lower Tram Station', 'railway': 'sta..."


In [18]:
railway_df['category'] = 'public transport'
railway_df['amenity'] = 'railway stop'
railway_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category,amenity
0,node,123386730,34.026363,-118.372152,"{'addr:city': 'Los Angeles', 'addr:housenumber...",public transport,railway stop
1,node,267072333,34.087521,-118.475692,"{'name': 'Lower Tram Station', 'railway': 'sta...",public transport,railway stop


In [19]:
pois_df = pd.concat([sustenance_df, bus_df, railway_df])

In [20]:
pois_df['hex'] = pois_df.apply(lambda poi: convert_to_hex(poi['lat'], poi['lon'], 8), axis=1)
pois_df.head(2)

Unnamed: 0,type,id,lat,lon,tags,category,amenity,hex
0,node,72448982,34.076217,-118.21602,"{'amenity': 'fast_food', 'cuisine': 'japanese'...",sustenance,fast_food,8829a1d73dfffff
1,node,72448995,34.076693,-118.216013,"{'amenity': 'fast_food', 'cuisine': 'burger', ...",sustenance,fast_food,8829a1d73dfffff


In [21]:
pois_df[pois_df['amenity'] == 'bus stop'].head(2)

Unnamed: 0,type,id,lat,lon,tags,category,amenity,hex
0,node,349443839,34.079103,-118.291534,"{'bus': 'yes', 'highway': 'bus_stop', 'public_...",public transport,bus stop,8829a1d459fffff
1,node,368062538,34.054432,-118.237308,"{'bus': 'yes', 'highway': 'bus_stop', 'name': ...",public transport,bus stop,8829a1d757fffff


In [22]:
pois_df[pois_df['amenity'] == 'railway stop'].head(2)

Unnamed: 0,type,id,lat,lon,tags,category,amenity,hex
0,node,123386730,34.026363,-118.372152,"{'addr:city': 'Los Angeles', 'addr:housenumber...",public transport,railway stop,8829a1990bfffff
1,node,267072333,34.087521,-118.475692,"{'name': 'Lower Tram Station', 'railway': 'sta...",public transport,railway stop,8829a198b5fffff


In [23]:
all_hexagons_with_pois = pois_df.groupby(['hex', 'category']).size().to_frame()
all_hexagons_with_pois = all_hexagons_with_pois.reset_index()
all_hexagons_with_pois = all_hexagons_with_pois.rename(columns={0: 'number of pois'})
all_hexagons_with_pois.head(2)

Unnamed: 0,hex,category,number of pois
0,881f9c344dfffff,sustenance,1
1,882664501bfffff,sustenance,1


In [24]:
hexagons_df["hex_and_neighbors"] = hexagons_df.apply(
    lambda row: list(h3.k_ring(row["hex"], 1)), axis=1
)
hexagons_df.head(2)

Unnamed: 0,hex,hex_and_neighbors
0,8829a1d757fffff,"[8829a1d755fffff, 8829a1d753fffff, 8829a1d719f..."
1,8829a1d755fffff,"[8829a1d755fffff, 8829a1d709fffff, 8829a1d757f..."


In [25]:
hexagons_df['hex_and_neighbors'][0]

['8829a1d755fffff',
 '8829a1d753fffff',
 '8829a1d719fffff',
 '8829a1d757fffff',
 '8829a1d71dfffff',
 '8829a1d751fffff',
 '8829a1d70bfffff']

In [26]:
def calculate_pois(hex_and_neighbors, category):
    return all_hexagons_with_pois[
        ((all_hexagons_with_pois["hex"].isin(hex_and_neighbors))
        & (all_hexagons_with_pois["category"] == category))
    ]["number of pois"].sum()

In [27]:
hexagons_df['sustenance_pois'] = hexagons_df["hex_and_neighbors"].apply(lambda row: calculate_pois(row, 'sustenance'))
hexagons_df['public_transport_pois'] = hexagons_df["hex_and_neighbors"].apply(lambda row: calculate_pois(row, 'public transport'))
hexagons_df.head(2)

Unnamed: 0,hex,hex_and_neighbors,sustenance_pois,public_transport_pois
0,8829a1d757fffff,"[8829a1d755fffff, 8829a1d753fffff, 8829a1d719f...",250,133
1,8829a1d755fffff,"[8829a1d755fffff, 8829a1d709fffff, 8829a1d757f...",115,54


In [28]:
pois_df.to_pickle('../00_data/pois.pkl')
hexagons_df.to_pickle('../00_data/hexagons.pkl')
trips_df.to_pickle('../00_data/trips.pkl')