In [1]:
import requests

from database import *

from geoalchemy2 import WKTElement
from shapely.geometry import shape
from tqdm import tqdm

import json

In [2]:
def get_api_collections(base_api):
    """
    Returns the collections endpoint from the API base URL
    For example:
    https://api.hamburg.de/datasets/v1/fahrradhaeuschen/
    returns the response from:
    https://api.hamburg.de/datasets/v1/fahrradhaeuschen/collections
    """
    # the header we send with our requests
    headers = {'Content-Type': 'application/json'}

    base_response = requests.get(base_api, headers=headers)

    # check if the request was successful
    if base_response.status_code == 200:

        base_json = base_response.json()
        base_links = base_json['links']

        # find the link with the rel 'data'
        for base_link in base_links:
            if base_link['rel'] == 'data':
                collections_api = base_link['href']

                collections_response = requests.get(collections_api, headers=headers)

                # check if the request was successful
                if collections_response.status_code == 200:
                    collections_json = collections_response.json()

                    return collections_json['collections']

def get_items_endpoint(collection):
    """
    Returns the correct items endpoint from a collection response
    """

    # check if the request was successful
    collection_links = collection['links']

    # find the link with the rel 'items'
    # and the type 'application/json' or 'application/geo+json'
    for collection_link in collection_links:
        if collection_link['rel'] == 'items' and collection_link['type'] in ACCEPTED_JSON_TYPES:
            return collection_link['href']
        
def request_items(collection, verbose=False):
    """
    Takes in a Collection object from the database and requests the dataset items from the API.
    Returns the GeoJSON response from the API or None if the request failed.
    """

    url = collection.url
    entries = collection.entries

    # also add the limit parameter to the url
    # it controls how many items are returned
    url = url + f'&limit={entries}'
    response = requests.get(url)

    if response.status_code == 200:

        response_json = response.json()
        if verbose: print(f'{response_json["numberReturned"]} items returned from {response_json["totalFeatures"]}')

        return response_json
    
    return None

def get_collection_properties(collection):
    """
    This function takes in a Collection object from the database and returns the set of all properties of the features.
    """

    # a set of all keys
    keys = set()

    features = collection.features

    if features is None:
        return None

    if len(features) == 0:
        return None

    for feature in features:

        current_keys = feature.properties.keys()

        # add all keys to the set
        keys[collection.identifier] = keys[collection.identifier].union(current_keys)

    return keys

In [3]:
# just some links for testing
links = [
    'https://api.hamburg.de/datasets/v1/perspektive_wohnen',
    'https://api.hamburg.de/datasets/v1/schulen',
    'https://api.hamburg.de/datasets/v1/pflegeeinrichtungen',
    'https://api.hamburg.de/datasets/v1/krankenhaeuser',
    'https://api.hamburg.de/datasets/v1/notunterkuenfte',
    'https://api.hamburg.de/datasets/v1/waermekataster_stromverbrauch',
    'https://api.hamburg.de/datasets/v1/feuerwehrstandorte',
    'https://api.hamburg.de/datasets/v1/deichinformationen',
    'https://api.hamburg.de/datasets/v1/wasserrettungspunkte',
    'https://api.hamburg.de/datasets/v1/uesg',
    'https://api.hamburg.de/datasets/v1/deichinformationen',
    'https://api.hamburg.de/datasets/v1/hvv_einzugsbereiche'
]

In [6]:
collections = get_api_collections(links[0])

for collection in collections:
    print(collection['title'])
    print(collection['itemCount'])
    print()

Bestehende Flüchtlingsunterkünfte mit der Perspektive Wohnen
15

Geplante Flüchtlingsunterkünfte mit der Perspektive Wohnen
0



In [5]:
# connect to the database
engine, session = connect_db()

# get a single collection from the database
collection = session.query(Collection).first()


session.close()

In [7]:
collection.url

'https://api.hamburg.de/datasets/v1/perspektive_wohnen/collections/perspektive_wohnen_bestehend/items?f=json'

In [3]:
# connect to the database
engine, session = connect_db()

# force drop all tables
Base.metadata.drop_all(engine)

# create the tables
Base.metadata.create_all(engine)

In [9]:
# rollback
session.rollback()

In [14]:
engine, session = connect_db()

# open api_config.json as json
api_configs = json.load(open('api_config.json', 'r'))
dataset_configs = api_configs['datasets']

# iterate over all api configs
for dataset_config in tqdm(dataset_configs):

    # split the line at the comma
    url = dataset_config['url']
    collection_identifiers = dataset_config['collections'].keys()

    # request the dataset from the API
    dataset_response = requests.get(url).json()

    # takes the name from the api_config.json if it exists
    # otherwise use the title from the API response
    dataset_title = dataset_config.get('name', dataset_response['title'])
    dataset_description = dataset_response['description']

    dataset = Dataset(
        title=dataset_title,
        description=dataset_description,
        url=url,
        collection_identifiers=collection_identifiers
    )

    session.add(dataset)

    # create a new layer
    layer = Layer(
        name=dataset_title
    )

    session.add(layer)

    # get all collections from the collections endpoint
    collections = get_api_collections(url)

    for collection in collections:

        collection_id = collection['id']
        
        if collection_id in collection_identifiers:

            collection_config = dataset_config['collections'][collection_id]

            # create a new style
            style = Style(
                name=collection['title'],
                area_color='#000000',
                opacity=0.5
            )

            session.add(style)

            # create a Collection database object
            items_link = get_items_endpoint(collection)

            db_collection = Collection(
                identifier=collection.get('id', 'collection_identifier'),
                title=collection.get('title', 'collection_title'),
                url=items_link,

                dataset=dataset,
                layer=layer,
                style=style
            )

            session.add(db_collection)

            items_response = request_items(db_collection)

            # get all items from the items endpoint
            items_response = requests.get(items_link).json()
            features = items_response['features']

            for feature in features:

                # skip features without a geometry
                if feature['geometry'] is None:
                    continue

                # Convert GeoJSON geometry to a Shapely geometry
                shapely_geom = shape(feature['geometry'])

                # Use Shapely geometry with `geoalchemy2`
                geometry_type = shapely_geom.geom_type
                wkt_geometry = shapely_geom.wkt
                srid = 4326
                geometry_element = WKTElement(wkt_geometry, srid)
                
                # create a new Feature database object
                db_feature = Feature(
                    geometry=geometry_element,
                    geometry_type=feature['geometry']['type'],
                    properties=feature['properties'],

                    collection=db_collection
                )

                session.add(db_feature)

    session.commit()

100%|██████████| 10/10 [00:33<00:00,  3.37s/it]


In [19]:
# get all collections

collections[0].features[0].properties.keys()

dict_keys(['bezeichnung', 'bezirk', 'stadtteil', 'platzzahl', 'platzzahl_hinweis', 'bemerkung', 'bemerkung_link', 'inbetriebnahme'])

In [25]:
keys = {}

collections = session.query(Collection).all()

# create a set for each collection
for collection in collections:
    keys[collection.identifier] = set()

for collection in collections:

    features = collection.features

    if features is None:
        continue

    if len(features) == 0:
        continue

    for feature in features:

        current_keys = feature.properties.keys()

        # add all keys to the set
        keys[collection.identifier] = keys[collection.identifier].union(current_keys)

keys

{'perspektive_wohnen_bestehend': {'bemerkung',
  'bemerkung_link',
  'bezeichnung',
  'bezirk',
  'inbetriebnahme',
  'platzzahl',
  'platzzahl_hinweis',
  'stadtteil'},
 'nicht_staatliche_schulen': {'abschluss',
  'adresse_ort',
  'adresse_strasse_hausnr',
  'anzahl_schueler',
  'anzahl_schueler_gesamt',
  'bezirk',
  'fax',
  'ganztagsform',
  'is_rebbz',
  'kapitelbezeichnung',
  'kernzeitbetreuung',
  'lgv_standortk_erwachsenenbildung',
  'rebbz_homepage',
  'rechtsform',
  'schueleranzahl_schuljahr',
  'schul_email',
  'schul_homepage',
  'schul_id',
  'schul_telefonnr',
  'schulaufsicht',
  'schulform',
  'schulname',
  'schultyp',
  'sozialindex',
  'stadtteil',
  'standort_id',
  'zustaendiges_rebbz'},
 'staatliche_schulen': {'abschluss',
  'adresse_ort',
  'adresse_strasse_hausnr',
  'ansprechp_buero',
  'ansprechp_klasse_1',
  'ansprechp_klasse_5',
  'anzahl_schueler',
  'anzahl_schueler_gesamt',
  'auszeichnung',
  'bezirk',
  'bilingual',
  'fax',
  'ferienbetreuung_anteil'

In [None]:
collections = session.query(Collection).all()

for collection in tqdm(collections):
    
    # request the collection items
    items_response = request_items(collection)

    # save as .json file under /inspection
    with open(f'inspection/{collection.identifier}.json', 'w') as json_file:
        json.dump(items_response, json_file)