# Notes

* Bringing in data direct from .osm looks possible
* Nodes are simple, edges need reconstructing from ways
* Relations may contain additional required metadata
* Need to look into the spec for .osm files in more detail to check for any potential blockers

## Required Information

### Nodes

* id
* lat
* lon

### Edges

* src
* dst
* type
* way_name
* way_pos
* TBC

# Decompress .bz2 file

In [1]:
source_folder = "/home/ross/repos/fell_finder/data/extracts/osm"
source_file = "/home/ross/repos/fell_finder/data/extracts/osm/hampshire-latest.osm.bz2"

target_file = source_file[:-4]

In [28]:
import bz2

In [29]:
with open(source_file, 'rb') as fobj:
    data = bz2.open(fobj).read().decode('utf8')

In [None]:
with open(target_file, 'w') as fobj:
    fobj.write(data)

In [None]:
print(data[:10000])

<?xml version='1.0' encoding='UTF-8'?>
<osm version="0.6" generator="osmium/1.14.0">
  <bounds minlat="50.69041" minlon="-1.959101" maxlat="51.38712" maxlon="-0.726306"/>
  <node id="132647" version="1" timestamp="2007-02-11T17:49:32Z" lat="50.7689062" lon="-1.8071861"/>
  <node id="132648" version="1" timestamp="2007-02-11T17:49:11Z" lat="50.771157" lon="-1.8065046"/>
  <node id="132649" version="1" timestamp="2005-07-18T10:07:30Z" lat="50.7749443" lon="-1.8077408"/>
  <node id="132652" version="1" timestamp="2006-09-02T12:06:26Z" lat="50.7825406" lon="-1.811734"/>
  <node id="132653" version="1" timestamp="2007-02-11T17:49:23Z" lat="50.78739" lon="-1.8139438"/>
  <node id="132654" version="1" timestamp="2007-02-11T17:49:33Z" lat="50.7922247" lon="-1.8146951"/>
  <node id="132655" version="1" timestamp="2007-02-11T17:49:17Z" lat="50.800057" lon="-1.8156341"/>
  <node id="132656" version="2" timestamp="2022-01-21T14:45:45Z" lat="50.8031635" lon="-1.8152223"/>
  <node id="132657" versio

# Read into element tree

In [3]:
from xml.etree.ElementTree import ElementTree

In [4]:
tree = ElementTree(file=target_file)

In [6]:
root = tree.getroot()

## Extract node details

In [37]:
all_nodes = root.findall('node')
len(all_nodes)

5322080

In [19]:
all_nodes[0].tag

'node'

In [20]:
all_nodes[0].attrib

{'id': '132647',
 'version': '1',
 'timestamp': '2007-02-11T17:49:32Z',
 'lat': '50.7689062',
 'lon': '-1.8071861'}

## Extract way details

In [36]:
all_ways = root.findall('way')
len(all_ways)

781612

In [31]:
all_ways[0].attrib

{'id': '130', 'version': '16', 'timestamp': '2021-02-26T22:24:10Z'}

In [33]:
for item in all_ways[0]:
    print(item.tag, item.attrib)

nd {'ref': '630299'}
nd {'ref': '2058592714'}
nd {'ref': '630302'}
nd {'ref': '2058592713'}
nd {'ref': '2058592712'}
nd {'ref': '630303'}
nd {'ref': '3077175482'}
nd {'ref': '630291'}
nd {'ref': '2058592711'}
nd {'ref': '8462451968'}
nd {'ref': '25504336'}
nd {'ref': '8462451970'}
nd {'ref': '8462451957'}
nd {'ref': '25504337'}
nd {'ref': '25504338'}
nd {'ref': '25504339'}
nd {'ref': '8462451951'}
nd {'ref': '25504340'}
nd {'ref': '8462451949'}
nd {'ref': '1365953'}
tag {'k': 'bicycle', 'v': 'designated'}
tag {'k': 'designation', 'v': 'public_bridleway'}
tag {'k': 'foot', 'v': 'designated'}
tag {'k': 'highway', 'v': 'bridleway'}
tag {'k': 'lit', 'v': 'no'}
tag {'k': 'name', 'v': 'Castle Lane'}
tag {'k': 'source', 'v': 'survey'}
tag {'k': 'source:name', 'v': 'OS_OpenData_StreetView'}
tag {'k': 'surface', 'v': 'unpaved'}


## Examine relations

In [35]:
all_relations = root.findall('relation')
len(all_relations)

7712

In [39]:
all_relations[0].attrib

{'id': '2197', 'version': '22', 'timestamp': '2022-10-10T13:28:30Z'}

In [40]:
for item in all_relations[0]:
    print(item.tag, item.attrib)

member {'type': 'way', 'ref': '8584816', 'role': ''}
member {'type': 'way', 'ref': '58793897', 'role': ''}
member {'type': 'way', 'ref': '170869257', 'role': ''}
member {'type': 'way', 'ref': '58793901', 'role': ''}
member {'type': 'way', 'ref': '215102514', 'role': ''}
member {'type': 'way', 'ref': '215102515', 'role': ''}
member {'type': 'way', 'ref': '215102513', 'role': ''}
member {'type': 'way', 'ref': '215102516', 'role': ''}
member {'type': 'way', 'ref': '215102512', 'role': ''}
member {'type': 'way', 'ref': '895507581', 'role': ''}
member {'type': 'way', 'ref': '1102615019', 'role': ''}
tag {'k': 'name', 'v': 'Winchester Road'}
tag {'k': 'ref', 'v': 'B2177'}
tag {'k': 'route', 'v': 'road'}
tag {'k': 'type', 'v': 'route'}


In [41]:
all_relations[0][0]

[]