In [1]:
import requests

from database import *

from geoalchemy2 import WKTElement
from shapely.geometry import shape
from tqdm import tqdm

In [2]:
ACCEPTED_JSON_TYPES = ['application/json', 'application/geo+json']

def get_api_collections(base_api):
    """
    Returns the collections endpoint from the API base URL
    For example:
    https://api.hamburg.de/datasets/v1/fahrradhaeuschen/
    returns
    https://api.hamburg.de/datasets/v1/fahrradhaeuschen/collections
    """
    # the header we send with our requests
    headers = {'Content-Type': 'application/json'}

    base_response = requests.get(base_api, headers=headers)

    collection_links = []

    # check if the request was successful
    if base_response.status_code == 200:

        base_json = base_response.json()
        base_links = base_json['links']

        # find the link with the rel 'data'
        for base_link in base_links:
            if base_link['rel'] == 'data':
                collections_api = base_link['href']

                collections_response = requests.get(collections_api, headers=headers)

                # check if the request was successful
                if collections_response.status_code == 200:
                    collections_json = collections_response.json()

                    return collections_json['collections']

def get_items_endpoint(collection):
    """
    Returns the correct items endpoint from a collection response
    """

    # check if the request was successful
    collection_links = collection['links']

    # find the link with the rel 'items'
    # and the type 'application/json' or 'application/geo+json'
    for collection_link in collection_links:
        if collection_link['rel'] == 'items' and collection_link['type'] in ACCEPTED_JSON_TYPES:
            return collection_link['href']

In [3]:
links = [
    'https://api.hamburg.de/datasets/v1/perspektive_wohnen',
    'https://api.hamburg.de/datasets/v1/schulen',
    'https://api.hamburg.de/datasets/v1/pflegeeinrichtungen',
    'https://api.hamburg.de/datasets/v1/krankenhaeuser',
    'https://api.hamburg.de/datasets/v1/notunterkuenfte',
    'https://api.hamburg.de/datasets/v1/waermekataster_stromverbrauch',
    'https://api.hamburg.de/datasets/v1/feuerwehrstandorte',
    'https://api.hamburg.de/datasets/v1/deichinformationen',
    'https://api.hamburg.de/datasets/v1/wasserrettungspunkte',
    'https://api.hamburg.de/datasets/v1/uesg',
    'https://api.hamburg.de/datasets/v1/deichinformationen',
    'https://api.hamburg.de/datasets/v1/hvv_einzugsbereiche'
]

In [10]:
get_api_collections(links[0])[0]

{'title': 'Bestehende Flüchtlingsunterkünfte mit der Perspektive Wohnen',
 'id': 'perspektive_wohnen_bestehend',
 'extent': {'spatial': {'bbox': [[9.770895066763483,
     53.47530752046935,
     10.139275162346298,
     53.67115976219237]],
   'crs': 'http://www.opengis.net/def/crs/OGC/1.3/CRS84'}},
 'itemType': 'feature',
 'crs': ['#/crs'],
 'storageCrs': 'http://www.opengis.net/def/crs/EPSG/0/25832',
 'links': [{'rel': 'items',
   'type': 'text/html',
   'title': "Access the features in the collection 'Bestehende Flüchtlingsunterkünfte mit der Perspektive Wohnen' as HTML",
   'href': 'https://api.hamburg.de/datasets/v1/perspektive_wohnen/collections/perspektive_wohnen_bestehend/items?f=html'},
  {'rel': 'items',
   'type': 'application/geo+json',
   'title': "Access the features in the collection 'Bestehende Flüchtlingsunterkünfte mit der Perspektive Wohnen' as GeoJSON",
   'href': 'https://api.hamburg.de/datasets/v1/perspektive_wohnen/collections/perspektive_wohnen_bestehend/items?f

In [11]:
for link in links:
    print(get_items_endpoint(link))
    print()

TypeError: string indices must be integers

In [12]:
def request_dataset(dataset, verbose=False):
    """
    Takes in a Dataset object from the database and requests the dataset from the API.
    Returns the JSON response from the API or None if the request failed.
    """

    url = dataset.url
    items_link = get_items_endpoint(url)

    if items_link is not None:
        response = requests.get(items_link)
        if response.status_code == 200:
            response_json = response.json()

            if verbose: print(f'{response_json["numberReturned"]} items returned from {response_json["totalFeatures"]}')
    
    return None

In [21]:
x = get_api_collections('https://api.hamburg.de/datasets/v1/perspektive_wohnen/')
x

[{'title': 'Bestehende Flüchtlingsunterkünfte mit der Perspektive Wohnen',
  'id': 'perspektive_wohnen_bestehend',
  'extent': {'spatial': {'bbox': [[9.770895066763483,
      53.47530752046935,
      10.139275162346298,
      53.67115976219237]],
    'crs': 'http://www.opengis.net/def/crs/OGC/1.3/CRS84'}},
  'itemType': 'feature',
  'crs': ['#/crs'],
  'storageCrs': 'http://www.opengis.net/def/crs/EPSG/0/25832',
  'links': [{'rel': 'items',
    'type': 'text/html',
    'title': "Access the features in the collection 'Bestehende Flüchtlingsunterkünfte mit der Perspektive Wohnen' as HTML",
    'href': 'https://api.hamburg.de/datasets/v1/perspektive_wohnen/collections/perspektive_wohnen_bestehend/items?f=html'},
   {'rel': 'items',
    'type': 'application/geo+json',
    'title': "Access the features in the collection 'Bestehende Flüchtlingsunterkünfte mit der Perspektive Wohnen' as GeoJSON",
    'href': 'https://api.hamburg.de/datasets/v1/perspektive_wohnen/collections/perspektive_wohnen

In [8]:
# connect to the database
engine, session = connect_db()

# force drop all tables
Base.metadata.drop_all(engine)

# create the tables
Base.metadata.create_all(engine)

In [4]:
collection_identifiers

['perspektive_wohnen_bestehend']

In [3]:
engine, session = connect_db()

# read all urls from urls.txt
with open('urls.txt', 'r') as urls_file:
    # strip the newline character from each line
    lines = [url.strip() for url in urls_file.readlines()]

for line in tqdm(lines):

    # split the line at the comma
    tokens = line.split(',')
    url = tokens[0]
    collection_identifiers = tokens[1:]

    # request the dataset from the API
    dataset_response = requests.get(url).json()

    dataset_title = dataset_response['title']
    dataset_description = dataset_response['description']

    dataset = Dataset(
        title=dataset_title,
        description=dataset_description,
        url=url,
        collection_identifiers=collection_identifiers
    )

    session.add(dataset)

    # create a new layer
    layer = Layer(
        name=dataset_title
    )

    session.add(layer)

    collections = get_api_collections(url)

    for collection in collections:

        collection_id = collection['id']

        if collection_id in collection_identifiers:

            # create a new style
            style = Style(
                name=collection['title'],
                area_color='#000000',
                opacity=0.5
            )

            session.add(style)

            # create a Collection database object
            items_link = get_items_endpoint(collection)

            db_collection = Collection(
                identifier=collection.get('id', 'collection_identifier'),
                title=collection.get('title', 'collection_title'),
                url=items_link,

                dataset=dataset,
                layer=layer,
                style=style
            )

            session.add(db_collection)

            # get all items from the items endpoint
            items_response = requests.get(items_link).json()
            features = items_response['features']

            for feature in features:

                # Convert GeoJSON geometry to a Shapely geometry
                shapely_geom = shape(feature['geometry'])

                # Use Shapely geometry with `geoalchemy2`
                geometry_type = shapely_geom.geom_type
                wkt_geometry = shapely_geom.wkt
                srid = 4326
                geometry_element = WKTElement(wkt_geometry, srid)
                
                # create a new Feature database object
                db_feature = Feature(
                    geometry=geometry_element,
                    geometry_type=feature['geometry']['type'],
                    properties=feature['properties'],

                    collection=db_collection
                )

                session.add(db_feature)

    session.commit()

  session.commit()
  0%|          | 0/10 [00:00<?, ?it/s]


IntegrityError: (psycopg2.errors.NotNullViolation) null value in column "id" of relation "collections" violates not-null constraint
DETAIL:  Failing row contains (null, perspektive_wohnen_bestehend, Bestehende Flüchtlingsunterkünfte mit der Perspektive Wohnen, null, https://api.hamburg.de/datasets/v1/perspektive_wohnen/collection..., 3, 3, 3).

[SQL: INSERT INTO collections (identifier, title, url, dataset_id, layer_id, style_id) VALUES (%(identifier)s, %(title)s, %(url)s, %(dataset_id)s, %(layer_id)s, %(style_id)s)]
[parameters: {'identifier': 'perspektive_wohnen_bestehend', 'title': 'Bestehende Flüchtlingsunterkünfte mit der Perspektive Wohnen', 'url': 'https://api.hamburg.de/datasets/v1/perspektive_wohnen/collections/perspektive_wohnen_bestehend/items?f=json', 'dataset_id': 3, 'layer_id': 3, 'style_id': 3}]
(Background on this error at: https://sqlalche.me/e/20/gkpj)