# Singapore Road Data
This is a notebook that will guide you through 
1. Obtaining data from openstreetmaps
2. Generating useful output from the data
  - A visualisation of all the points
  - A text file with the coordinates, road names and number of lanes
  - A dumped dictionary and kd-tree, which can be used to quickly get information regarding a single point


In [12]:
# External imports
import overpy as op
import sys
import concurrent.futures
from tqdm import tqdm

# Internal imports (classes, functions)
from road_point import RoadPoint
from output_text import export_road_points
from output_scatter import make_gmap
from output_kd_tree import make_kdtree
from speed_limit_scraper import get_speed_limit

In [13]:
# Get API
api = op.Overpass()

# Query
query = "".join(l for l in open('query.xml'))
result = api.query(query)

Get all the ways (roads), do some duplicate removal and convert them to `RoadPoint`s. `RoadPoint` is a class used to encompass all the information we want about a coordinate.

In [14]:
# Get ways (roads or part of roads) 
# which have road names and lane count.
# Note that if we exclude the filters, there are many more 
# ponits to process (or even just lane count).
all_ways = result.get_ways()
ways = [w for w in all_ways if 'name' in w.tags and 'lanes' in w.tags]

# Remove duplicates in nodes (e.g 2 connected ways sharing a node)
node_dict = {}
for way in ways:
    for node in way.nodes:
        node_dict[node.id] = node

# Converts a node to a RoadPoint, without speed_limit
# This is done to prevent overloading in the multiprocessing
# step with the large overpy node object.
def node_to_RoadPoint(node):
    lat = float(node.lat)
    lon = float(node.lon)
    road_name = way.tags.get('name')
    lane_count =  way.tags.get('lanes')
    return RoadPoint(node.id, lat, lon, road_name, lane_count) 

unique_nodes = list(node_dict.values())
road_points_without_speedlimit = [node_to_RoadPoint(n) for n in unique_nodes]

Now, we use concurrency to add the speed limit into all the coordinates. We do this because fetching each speed limit takes a while, and it will be hard to process this for all the points sequentially.

**Proceeed to the next step once the success statement prints**

In [None]:
# Variables used to show the progress bar
pbar = tqdm(total=len(road_points_without_speedlimit))


# Process a road point and add a speed limit property.
# Abstracted to enable concurrency for processing nodes.
def append_speed_limit(road_point):
    road_point.speed_limit = get_speed_limit(road_point.road_name)
    pbar.update(1)
    return road_point

    

executor = concurrent.futures.ThreadPoolExecutor()
print("Starting to scrape speed limits!")
road_points = list(executor.map(append_speed_limit, road_points_without_speedlimit))
print("Success!")
pbar.close()


  0%|          | 0/2880 [00:00<?, ?it/s][A

Starting to scrape speed limits!



  0%|          | 1/2880 [00:00<19:37,  2.44it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it/s][A
  1%|          | 20/2880 [00:01<05:58,  7.97it

  6%|▌         | 162/2880 [00:14<06:45,  6.69it/s][A
  6%|▌         | 162/2880 [00:14<06:45,  6.69it/s][A
  6%|▌         | 162/2880 [00:14<06:45,  6.69it/s][A
  6%|▌         | 162/2880 [00:14<06:45,  6.69it/s][A
  6%|▌         | 162/2880 [00:14<06:45,  6.69it/s][A
  6%|▌         | 162/2880 [00:14<06:45,  6.69it/s][A
  6%|▌         | 162/2880 [00:14<06:45,  6.69it/s][A
  6%|▌         | 162/2880 [00:14<06:45,  6.69it/s][A
  6%|▌         | 163/2880 [00:14<06:49,  6.64it/s][A
  6%|▌         | 170/2880 [00:14<06:19,  7.14it/s][A
  6%|▋         | 183/2880 [00:15<14:15,  3.15it/s][A
  6%|▋         | 183/2880 [00:15<14:15,  3.15it/s][A
  6%|▋         | 183/2880 [00:15<14:15,  3.15it/s][A
  6%|▋         | 183/2880 [00:15<14:15,  3.15it/s][A
  6%|▋         | 183/2880 [00:15<14:15,  3.15it/s][A
  6%|▋         | 183/2880 [00:15<14:15,  3.15it/s][A
  6%|▋         | 183/2880 [00:15<14:15,  3.15it/s][A
  6%|▋         | 183/2880 [00:15<14:15,  3.15it/s][A
  6%|▋         | 183/2880 [0

 11%|█         | 306/2880 [00:44<06:28,  6.63it/s][A
 11%|█         | 306/2880 [00:44<06:28,  6.63it/s][A
 11%|█         | 308/2880 [00:44<08:47,  4.87it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [00:45<15:19,  2.78it/s][A
 11%|█▏        | 326/2880 [0

 16%|█▌        | 466/2880 [01:00<13:04,  3.08it/s][A
 16%|█▌        | 466/2880 [01:00<13:04,  3.08it/s][A
 16%|█▌        | 466/2880 [01:00<13:04,  3.08it/s][A
 16%|█▌        | 466/2880 [01:00<13:04,  3.08it/s][A
 16%|█▌        | 466/2880 [01:00<13:04,  3.08it/s][A
 16%|█▌        | 466/2880 [01:00<13:04,  3.08it/s][A
 16%|█▌        | 466/2880 [01:00<13:04,  3.08it/s][A
 16%|█▋        | 469/2880 [01:00<13:30,  2.97it/s][A
 17%|█▋        | 483/2880 [01:01<19:18,  2.07it/s][A
 17%|█▋        | 483/2880 [01:01<19:18,  2.07it/s][A
 17%|█▋        | 483/2880 [01:01<19:18,  2.07it/s][A
 17%|█▋        | 483/2880 [01:01<19:18,  2.07it/s][A
 17%|█▋        | 483/2880 [01:01<19:18,  2.07it/s][A
 17%|█▋        | 483/2880 [01:01<19:18,  2.07it/s][A
 17%|█▋        | 483/2880 [01:01<19:18,  2.07it/s][A
 17%|█▋        | 483/2880 [01:01<19:18,  2.07it/s][A
 17%|█▋        | 483/2880 [01:01<19:18,  2.07it/s][A
 17%|█▋        | 483/2880 [01:01<19:18,  2.07it/s][A
 17%|█▋        | 483/2880 [0

 21%|██        | 611/2880 [01:15<17:35,  2.15it/s][A
 21%|██        | 611/2880 [01:15<17:35,  2.15it/s][A
 21%|██        | 611/2880 [01:15<17:35,  2.15it/s][A
 21%|██▏       | 612/2880 [01:15<15:11,  2.49it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [01:16<03:50,  9.77it/s][A
 22%|██▏       | 631/2880 [0

 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 773/2880 [01:31<03:28, 10.09it/s][A
 27%|██▋       | 775/2880 [01:31<04:38,  7.56it/s][A
 28%|██▊       | 793/2880 [01:32<12:22,  2.81it/s][A
 28%|██▊       | 794/2880 [01:32<14:04,  2.47it/s][A
 28%|██▊       | 794/2880 [01:32<14:04,  2.47it/s][A
 28%|██▊       | 794/2880 [01:32<14:04,  2.47it/s][A
 28%|██▊       | 794/2880 [0

 32%|███▏      | 917/2880 [01:46<04:42,  6.95it/s][A
 32%|███▏      | 918/2880 [01:46<05:35,  5.85it/s][A
 32%|███▏      | 920/2880 [01:46<06:42,  4.86it/s][A
 32%|███▎      | 936/2880 [01:47<06:47,  4.77it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [01:47<05:22,  6.02it/s][A
 33%|███▎      | 938/2880 [0

Now, we export all the points, in the different formats.

In [None]:
# Text file (coordinates.txt)
export_road_points(road_points)
# Dictionary and KD tree dump (kdtree.txt and coords_to_roadpoints.txt)
kdtree = make_kdtree(road_points)
# Text file (my_map.html)
make_gmap(road_points)
