In [1]:
import requests, json
from pprint import pprint

### List all datasets

In [2]:
def get_all_datasets():
    url = "https://opendata.urbanplatform.portodigital.pt/api/3/action/package_list";
    r = requests.get(url)
    return r.json()["result"]

In [3]:
datasets = get_all_datasets()
# print(datasets)

### Get Information for custom dataset, by name
If `url_only` is true, returns location of data, else a tuple of `(dataset-resource, FORMAT, URL)`

In [4]:
def get_dataset_info(dataset, url_only=True):
    # returns a generator for (dataset-resource, FORMAT, URL) or just the URL
    url = "https://opendata.urbanplatform.portodigital.pt/api/3/action/package_show?id=%s" % dataset
    j = requests.get(url).json()
    if not j["success"]: print(j["error"]); return
    for resource in j['result']['resources']:
        if url_only: yield resource["url"]
        else: yield (resource["name"], resource["format"], resource["url"])

In [5]:
alojamento = list(get_dataset_info(datasets[1]))[0]
print(alojamento)
print(list(get_dataset_info(datasets[1], False)))

https://servsig.cm-porto.pt/arcgis/rest/services/OpenData_APD/OpenData_APD/MapServer/35
[('Alojamento Local', 'Esri REST', 'https://servsig.cm-porto.pt/arcgis/rest/services/OpenData_APD/OpenData_APD/MapServer/35')]


In [6]:
# exemplo com vários
list(get_dataset_info("porto-meteorologia"))

['https://broker.fiware.urbanplatform.portodigital.pt/v2/entities?type=WeatherObserved',
 'https://broker.fiware.urbanplatform.portodigital.pt/v2/entities?type=WeatherForecast']

In [7]:
# exemplo com csv que dá "Access denied"
list(get_dataset_info("apdg-zonas-de-estacionamento-pago"))

{'message': 'Access denied: User  not authorized to read package d5d1106b-5db6-4434-b1b2-9083d9fb1f49', '__type': 'Authorization Error'}


[]

### Get data from url
 * Set the query params
 * Specify how to parse result
 * function to handle request result and return generator of datapoints

In [8]:
DEFAULT_REQ_PARAMS = {'where': "1=1", 'returnGeometry': 'true','orderByFields': 'objectid ASC', 'outSR': '4326'}

In [9]:
def remove_useless_from_dict(dic):
    return {k: v for k, v in dic.items() if v and v!=" "}
def parse_features_geojson(x):
    features = x["properties"]
    features["coordinates"] = x["geometry"]["coordinates"]
    return remove_useless_from_dict(features)
def parse_features_json(x): return remove_useless_from_dict(x["attributes"])

In [10]:
def get_dataset_data(dataset_url, req_params={"f":"json"}, f="geojson", fields="*", offset=0):
    # default format could be json, but this gives x, y and not lat, lon
    params = DEFAULT_REQ_PARAMS; params.update(req_params); params["f"] = f; params["outFields"]=fields;
    params["resultOffset"]=offset
    data = requests.get(dataset_url + "/query", params=params).json()
    get_attributes = parse_features_geojson if f=="geojson" else parse_features_json
    return map(parse_features_geojson, data["features"])

In [37]:
def get_dataset_data(dataset_url, req_params={"f":"json"}, f="geojson", fields="*"):
    if "fiware" in dataset_url:
        print("API for broker not parsed yet")
        pprint(requests.get(url).json())
    else:
        # default format could be json, but this gives x, y and not lat, lon
        params = DEFAULT_REQ_PARAMS; params.update(req_params); params["f"] = f; params["outFields"]=fields
        data = requests.get(dataset_url + "/query", params=params).json()
        get_attributes = parse_features_geojson if f=="geojson" else parse_features_json
        return map(parse_features_geojson, data["features"])

In [38]:
url="https://broker.fiware.urbanplatform.portodigital.pt/v2/entities?type=PointOfInterest"
get_dataset_data(url)

API for broker not parsed yet
[{'accessibility': {'metadata': {}, 'type': 'Text', 'value': None},
  'accessibility_es': {'metadata': {}, 'type': 'Text', 'value': None},
  'accessibility_pt': {'metadata': {}, 'type': 'Text', 'value': None},
  'address': {'metadata': {},
              'type': 'StructuredValue',
              'value': {'addressLocality': 'Porto',
                        'addressRegion': 'RAMALDE',
                        'postalCode': '4100-321',
                        'streetAddress': 'Rua Manuel Pinto de Azevedo, 64'}},
  'category': {'metadata': {}, 'type': 'StructuredValue', 'value': ['123']},
  'closedOnHoliday': {'metadata': {}, 'type': 'Text', 'value': None},
  'contactPoint': {'metadata': {},
                   'type': 'StructuredValue',
                   'value': {'email': 'joana.lencastre@1000paladares.com.pt',
                             'faxNumber': '+351226168337',
                             'telephone': '+351226168335',
                             'url

                'type': 'StructuredValue',
                'value': ['Parejas', 'Jóvenes']},
  'target_pt': {'metadata': {},
                'type': 'StructuredValue',
                'value': ['Casais', 'Jovens']},
  'touristic': {'metadata': {}, 'type': 'Text', 'value': 'False'},
  'type': 'PointOfInterest',
  'wifi': {'metadata': {}, 'type': 'Text', 'value': None}},
 {'accessibility': {'metadata': {}, 'type': 'Text', 'value': None},
  'accessibility_es': {'metadata': {}, 'type': 'Text', 'value': None},
  'accessibility_pt': {'metadata': {}, 'type': 'Text', 'value': None},
  'address': {'metadata': {},
              'type': 'StructuredValue',
              'value': {'addressLocality': 'Porto',
                        'addressRegion': 'UNIÃO DAS FREGUESIAS DE CEDOFEITA, '
                                         'SANTO ILDEFONSO, SÉ, MIRAGAIA, SÃO '
                                         'NICOLAU E VITÓRIA',
                        'postalCode': '4049-009',
                        '

                                'type': 'StructuredValue',
                                'value': [{'description': 'Daily: 07:00-23:00',
                                           'description_es': 'Todos los días: '
                                                             '07:00-23:00',
                                           'description_pt': 'Todos os dias: '
                                                             '07:00-23:00'}]},
  'smokingArea': {'metadata': {}, 'type': 'Text', 'value': 'False'},
  'socialNetwork': {'metadata': {},
                    'type': 'StructuredValue',
                    'value': [{'designation': 'Facebook',
                               'url': 'https://www.facebook.com/1872riverhouse'},
                              {'designation': 'Tripadvisor',
                               'url': 'http://www.tripadvisor.com/Hotel_Review-g189180-d6593048-Reviews-1872_'}]},
  'tag': {'metadata': {},
          'type': 'StructuredValue',
          'value

 {'accessibility': {'metadata': {}, 'type': 'Text', 'value': None},
  'accessibility_es': {'metadata': {}, 'type': 'Text', 'value': None},
  'accessibility_pt': {'metadata': {}, 'type': 'Text', 'value': None},
  'address': {'metadata': {},
              'type': 'StructuredValue',
              'value': {'addressLocality': 'Porto',
                        'addressRegion': 'PARANHOS',
                        'postalCode': '4250-288',
                        'streetAddress': 'Rua de Monsanto, 594 Hab B11'}},
  'category': {'metadata': {}, 'type': 'StructuredValue', 'value': ['303']},
  'closedOnHoliday': {'metadata': {}, 'type': 'Text', 'value': 'False'},
  'contactPoint': {'metadata': {},
                   'type': 'StructuredValue',
                   'value': {'email': 'info@3ts.pt',
                             'faxNumber': None,
                             'telephone': '+351 964542474',
                             'url': '3ts.pt'}},
  'costDescription': {'metadata': {}, 'type': 'Te

                            'label_pt': None,
                            'rightsID': 'CC BY-NC-SA - Some Rights Reserved',
                            'rightsID_es': 'CC BY-NC-SA - Algunos Derechos '
                                           'Reservados',
                            'rightsID_pt': 'CC BY-NC-SA - Alguns Direitos '
                                           'Reservados',
                            'title': '6ONLY - GUEST HOUSE',
                            'title_es': '6ONLY - GUEST HOUSE',
                            'title_pt': '6ONLY - GUEST HOUSE',
                            'url': 'http://recursos.visitporto.travel/pois/354_2.jpg'},
                           {'authorID': '6Only - Guest House',
                            'label': None,
                            'label_es': None,
                            'label_pt': None,
                            'rightsID': 'CC BY-NC-SA - Some Rights Reserved',
                            'rightsID_es': 'CC BY-NC-SA - A

In [23]:
alojamentos = list(get_dataset_data(alojamento))
total = list(get_dataset_data_full(alojamento))
print("There are %d alojamentos" % len(alojamentos))
print("After all there were %s alojamentos" % len(total))
# print(alojamentos)

There are 1000 alojamentos
After all there were 6657 alojamentos


In [29]:
print(total[5000])

{'objectid': 5001, 'data_levan': -2209161600000, 'cod_topo': 'RALEG1', 'n_pol': '362', 'nome_aloj': 'Plano de Fuga', 'ano_reg': 2018, 'n_reg': '69821/AL', 'qual_tit': 'Comodatario', 'tit_tipo': 'Pessoa singular (empresário em nome individual)', 'pos_1951': 'S', 'morada': 'Rua da Alegria', 'modalidade': 'Estabelecimento de hospedagem', 'n_policia': '362', 'cod_postal': '4000-035', 'data_reg': 1524441600000, 'data_ab': 1525132800000, 'coordinates': [-8.603564962722654, 41.152752988738456]}


In [31]:
print(len(set(map(lambda x: str(x["coordinates"]), total))))

6542
