From 2a8a642402ad1e118548317bc1338cae0c0b0037 Mon Sep 17 00:00:00 2001 From: S4Adam Date: Tue, 23 May 2023 19:57:33 +0200 Subject: [PATCH 1/2] Fetch/transform xml sequence --- python/overpass-api-fetch/README.md | 50 +++++++++++++++++++ .../fetch_structures/main.py | 27 ++++++++++ .../fetch_structures/package.json | 15 ++++++ .../fetch_structures/requirements.txt | 2 + python/overpass-api-fetch/package.json | 21 ++++++++ .../overpass-api-fetch/transform_xml/main.py | 23 +++++++++ .../transform_xml/package.json | 15 ++++++ .../transform_xml/requirements.txt | 1 + .../transform_xml/transform_xml.py | 27 ++++++++++ 9 files changed, 181 insertions(+) create mode 100644 python/overpass-api-fetch/README.md create mode 100644 python/overpass-api-fetch/fetch_structures/main.py create mode 100644 python/overpass-api-fetch/fetch_structures/package.json create mode 100644 python/overpass-api-fetch/fetch_structures/requirements.txt create mode 100644 python/overpass-api-fetch/package.json create mode 100644 python/overpass-api-fetch/transform_xml/main.py create mode 100644 python/overpass-api-fetch/transform_xml/package.json create mode 100644 python/overpass-api-fetch/transform_xml/requirements.txt create mode 100644 python/overpass-api-fetch/transform_xml/transform_xml.py diff --git a/python/overpass-api-fetch/README.md b/python/overpass-api-fetch/README.md new file mode 100644 index 0000000..5679ede --- /dev/null +++ b/python/overpass-api-fetch/README.md @@ -0,0 +1,50 @@ +# Send to Telegram + +The **fetch_structures** Sequence retrieves structures with `height>50m` from OpenStreetMap using **[Overpass interpreter](https://overpass-api.de/)**. +The **transform_xml** Sequence takes the fetched data as an input and truncates all tags except for: *coordinates, height* and *building type*. + +Data is exchanged using `structures-xml` Topic with content-type `text/plain`. + +___ + + +## Running +> 💡**NOTE:** Packaging of Python Sequences is not very "pythonic" for now. If you have any idea, how we should resolve it for your comfort, please let us know [here](https://github.com/scramjetorg/transform-hub/issues/598). + +> ❗ Remember to [setup transform-hub locally](https://docs.scramjet.org/platform/self-hosted-installation) or use the [platform's environment](https://docs.scramjet.org/platform/quick-start) for the sequence deployment. + +Open the terminal and run the following commands: + +```bash +# Install dependencies +npm run build + +# Deploy samples to Hub +npm run deploy:fetch +npm run deploy:transform +``` + +To get it going, input 4 coordinates to fetch_structures Sequence: +*Min Latitude, Min Longitude, Max Latitude, Max Longitude* + +Fifth argument, *height*, is optional and **50** by default. +```bash +si inst input +# type in e.g. 52.1485 20.7917 52.3667 21.2816 80 +# Warsaw buildings with height>80m +``` +>You can get the coordinates using **[bbox tool](https://norbertrenner.de/osm/bbox.html)** 🔧. + +You can also +Data will then be fetched and passed to the Topic. + +The **transform_xml** Sequence will parse the xml file from the Topic and remove unnecessary tags. +You can edit the `transform_xml/main.py` to save the result xml into e.g. database. +```py + xml_file = transform_xml_data(''.join(chunks)) + context.logger.info('Transformed xml file') + # save to e.g. database... +``` +___ + + diff --git a/python/overpass-api-fetch/fetch_structures/main.py b/python/overpass-api-fetch/fetch_structures/main.py new file mode 100644 index 0000000..3625f82 --- /dev/null +++ b/python/overpass-api-fetch/fetch_structures/main.py @@ -0,0 +1,27 @@ +import requests + +# topic to send data to +provides = { + 'provides': 'structures-xml', + 'contentType': 'text/plain' +} +import time + +def fetch_tall_structures(min_lat, min_lon, max_lat, max_lon, height=50): + overpass_url = "http://overpass-api.de/api/interpreter" + query = f""" + [out:xml]; + ( + way(if:number(t["height"]) > {int(height)})({min_lat},{min_lon},{max_lat},{max_lon}); + ); + out geom; + """ + resp = requests.get(overpass_url, params={'data': query}) + print(resp.text) + return resp.text + + +async def run(context, input): + context.logger.info('Waiting for coordinates...') + + return input.map(lambda x: fetch_tall_structures(*(x.split()))) diff --git a/python/overpass-api-fetch/fetch_structures/package.json b/python/overpass-api-fetch/fetch_structures/package.json new file mode 100644 index 0000000..586c859 --- /dev/null +++ b/python/overpass-api-fetch/fetch_structures/package.json @@ -0,0 +1,15 @@ +{ + "name": "geo-data-xml", + "version": "1.0.0", + "main": "main.py", + "author": "AM", + "license": "GPL-3.0", + "description": "Fetch data from OpenStreetMap in a xml format.", + "engines": { + "python3": "3.8.0" + }, + "scripts": { + "build": "mkdir -p dist/__pypackages__/ && pip3 install -t dist/__pypackages__/ -r requirements.txt && cp -t ./dist/ *.py *.json", + "clean": "rm -rf ./dist" + } +} diff --git a/python/overpass-api-fetch/fetch_structures/requirements.txt b/python/overpass-api-fetch/fetch_structures/requirements.txt new file mode 100644 index 0000000..ab8d194 --- /dev/null +++ b/python/overpass-api-fetch/fetch_structures/requirements.txt @@ -0,0 +1,2 @@ +scramjet-framework-py +requests \ No newline at end of file diff --git a/python/overpass-api-fetch/package.json b/python/overpass-api-fetch/package.json new file mode 100644 index 0000000..e6973f8 --- /dev/null +++ b/python/overpass-api-fetch/package.json @@ -0,0 +1,21 @@ +{ + "name": "fetch-stuctures-xml", + "version": "1.0.0", + "main": "main.py", + "author": "AM", + "license": "GPL-3.0", + "description": "Removes certain XML tags from a valid xml file", + "engines": { + "python3": "3.8.0" + }, + "scripts": { + "clean": "rm -rf ./dist", + "build": "npm run build:fetch && npm run build:transform", + "build:fetch": "( cd fetch_structures && npm run build && npx si seq pack dist -o ../dist/fetch.tar.gz)", + "build:transform": "(cd transform_xml && npm run build && npx si seq pack dist -o ../dist/transform.tar.gz)", + "prebuild": "mkdir -p dist", + "deploy:fetch": "si seq deploy dist/fetch.tar.gz", + "deploy:transform": "si seq deploy dist/transform.tar.gz" + }, + "dependencies": { "@scramjet/cli": "^0.30.0" } +} diff --git a/python/overpass-api-fetch/transform_xml/main.py b/python/overpass-api-fetch/transform_xml/main.py new file mode 100644 index 0000000..2108f6e --- /dev/null +++ b/python/overpass-api-fetch/transform_xml/main.py @@ -0,0 +1,23 @@ +from transform_xml import transform_xml_data + +# topic to get data from +requires = { + 'requires': 'structures-xml', + 'contentType': 'text/plain' +} +end_tag = '' + + +async def run(context, input): + while True: + chunks = [] + + async for chunk in input: + chunks.append(chunk) + + if end_tag in chunk: + break + + xml_file = transform_xml_data(''.join(chunks)) + context.logger.info('Transformed xml file') + # save to e.g. database... diff --git a/python/overpass-api-fetch/transform_xml/package.json b/python/overpass-api-fetch/transform_xml/package.json new file mode 100644 index 0000000..3e98c4f --- /dev/null +++ b/python/overpass-api-fetch/transform_xml/package.json @@ -0,0 +1,15 @@ +{ + "name": "transform-xml", + "version": "1.0.0", + "main": "main.py", + "author": "AM", + "license": "GPL-3.0", + "description": "Removes certain XML tags from a valid xml file", + "engines": { + "python3": "3.8.0" + }, + "scripts": { + "build": "mkdir -p dist/__pypackages__/ && pip3 install -t dist/__pypackages__/ -r requirements.txt && cp -t ./dist/ *.py *.json", + "clean": "rm -rf ./dist" + } +} diff --git a/python/overpass-api-fetch/transform_xml/requirements.txt b/python/overpass-api-fetch/transform_xml/requirements.txt new file mode 100644 index 0000000..f16f13c --- /dev/null +++ b/python/overpass-api-fetch/transform_xml/requirements.txt @@ -0,0 +1 @@ +scramjet-framework-py \ No newline at end of file diff --git a/python/overpass-api-fetch/transform_xml/transform_xml.py b/python/overpass-api-fetch/transform_xml/transform_xml.py new file mode 100644 index 0000000..5259f89 --- /dev/null +++ b/python/overpass-api-fetch/transform_xml/transform_xml.py @@ -0,0 +1,27 @@ +import xml.etree.ElementTree as ET + + +def transform_xml_data(xml_string): + tags_to_keep = ['bounds', 'nd'] + attr_to_keep = ['height', 'building'] + xml = ET.fromstring(xml_string) + + for way in xml.findall('way'): + # remove all 'ref=' elements + for nd in way.findall('nd'): + del nd.attrib['ref'] + + # remove all other tags except the specified ones + for tag in way.findall('tag'): + if tag.attrib.get('k') not in attr_to_keep: + way.remove(tag) + else: + tags_to_keep.append(tag.tag) + + for tag in way: + if tag.tag not in tags_to_keep: + way.remove(tag) + + # convert the modified XML to string + out_xml = ET.tostring(xml, encoding='utf-8') + return out_xml From a10dd62eb54dfa21cc7d2cb9e3297ca17d40e33d Mon Sep 17 00:00:00 2001 From: S4Adam Date: Tue, 23 May 2023 20:06:11 +0200 Subject: [PATCH 2/2] Remove leftovers --- python/overpass-api-fetch/fetch_structures/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/overpass-api-fetch/fetch_structures/main.py b/python/overpass-api-fetch/fetch_structures/main.py index 3625f82..8d4c9e1 100644 --- a/python/overpass-api-fetch/fetch_structures/main.py +++ b/python/overpass-api-fetch/fetch_structures/main.py @@ -5,7 +5,7 @@ 'provides': 'structures-xml', 'contentType': 'text/plain' } -import time + def fetch_tall_structures(min_lat, min_lon, max_lat, max_lon, height=50): overpass_url = "http://overpass-api.de/api/interpreter" @@ -17,7 +17,6 @@ def fetch_tall_structures(min_lat, min_lon, max_lat, max_lon, height=50): out geom; """ resp = requests.get(overpass_url, params={'data': query}) - print(resp.text) return resp.text