In [3]:
import requests, json
from pprint import pprint

### List all datasets

In [2]:
def get_all_datasets():
    url = "https://opendata.urbanplatform.portodigital.pt/api/3/action/package_list";
    r = requests.get(url)
    return r.json()["result"]

In [3]:
datasets = get_all_datasets()
# print(datasets)

### Get Information for custom dataset, by name
If `url_only` is true, returns location of data, else a tuple of `(dataset-resource, FORMAT, URL)`

In [4]:
def get_dataset_info(dataset, url_only=True):
    # returns a generator for (dataset-resource, FORMAT, URL) or just the URL
    url = "https://opendata.urbanplatform.portodigital.pt/api/3/action/package_show?id=%s" % dataset
    j = requests.get(url).json()
    if not j["success"]: print(j["error"]); return
    for resource in j['result']['resources']:
        if url_only: yield resource["url"]
        else: yield (resource["name"], resource["format"], resource["url"])

In [5]:
alojamento = list(get_dataset_info(datasets[1]))[0]
print(alojamento)
print(list(get_dataset_info(datasets[1], False)))

https://servsig.cm-porto.pt/arcgis/rest/services/OpenData_APD/OpenData_APD/MapServer/35
[('Alojamento Local', 'Esri REST', 'https://servsig.cm-porto.pt/arcgis/rest/services/OpenData_APD/OpenData_APD/MapServer/35')]


In [6]:
# exemplo com vários
list(get_dataset_info("porto-meteorologia"))

['https://broker.fiware.urbanplatform.portodigital.pt/v2/entities?type=WeatherObserved',
 'https://broker.fiware.urbanplatform.portodigital.pt/v2/entities?type=WeatherForecast']

In [7]:
# exemplo com csv que dá "Access denied"
list(get_dataset_info("apdg-zonas-de-estacionamento-pago"))

{'message': 'Access denied: User  not authorized to read package d5d1106b-5db6-4434-b1b2-9083d9fb1f49', '__type': 'Authorization Error'}


[]

### Get data from url
 * Set the query params
 * Specify how to parse result
 * function to handle request result and return generator of datapoints

In [8]:
DEFAULT_REQ_PARAMS = {'where': "1=1", 'returnGeometry': 'true','orderByFields': 'objectid ASC', 'outSR': '4326'}

In [9]:
def remove_useless_from_dict(dic):
    return {k: v for k, v in dic.items() if v and v!=" "}
def parse_features_geojson(x):
    features = x["properties"]
    features["coordinates"] = x["geometry"]["coordinates"]
    return remove_useless_from_dict(features)
def parse_features_json(x): return remove_useless_from_dict(x["attributes"])

In [10]:
def get_dataset_data(dataset_url, req_params={"f":"json"}, f="geojson", fields="*", offset=0):
    # default format could be json, but this gives x, y and not lat, lon
    params = DEFAULT_REQ_PARAMS; params.update(req_params); params["f"] = f; params["outFields"]=fields;
    params["resultOffset"]=offset
    data = requests.get(dataset_url + "/query", params=params).json()
    get_attributes = parse_features_geojson if f=="geojson" else parse_features_json
    return map(parse_features_geojson, data["features"])

In [44]:
def get_dataset_data(dataset_url, req_params={"f":"json"}, f="geojson", fields="*", offset=0):
    if "fiware" in dataset_url:
#         pprint(requests.get(dataset_url).json()[0])
        def get_att(x): 
            x.update({"lon": x["location"]["value"]["coordinates"][0], "lat":x["location"]["value"]["coordinates"][1]})
            return x
        return map(get_att, requests.get(dataset_url).json())
    else:
        # default format could be json, but this gives x, y and not lat, lon
        params = DEFAULT_REQ_PARAMS; params.update(req_params); params["f"] = f; params["outFields"]=fields; params["resultOffset"]=offset
        data = requests.get(dataset_url + "/query", params=params).json()
        get_attributes = parse_features_geojson if f=="geojson" else parse_features_json
        return map(parse_features_geojson, data["features"])

In [45]:
url="https://broker.fiware.urbanplatform.portodigital.pt/v2/entities?type=PointOfInterest"
next(get_dataset_data(url))

{'id': 'urn:ngsi-ld:PointOfInterest:porto:portalturismo:31:1207',
 'type': 'PointOfInterest',
 'accessibility': {'type': 'Text', 'value': None, 'metadata': {}},
 'accessibility_es': {'type': 'Text', 'value': None, 'metadata': {}},
 'accessibility_pt': {'type': 'Text', 'value': None, 'metadata': {}},
 'address': {'type': 'StructuredValue',
  'value': {'addressLocality': 'Porto',
   'postalCode': '4100-321',
   'streetAddress': 'Rua Manuel Pinto de Azevedo, 64',
   'addressRegion': 'RAMALDE'},
  'metadata': {}},
 'category': {'type': 'StructuredValue', 'value': ['123'], 'metadata': {}},
 'closedOnHoliday': {'type': 'Text', 'value': None, 'metadata': {}},
 'contactPoint': {'type': 'StructuredValue',
  'value': {'url': None,
   'faxNumber': '+351226168337',
   'telephone': '+351226168335',
   'email': 'joana.lencastre@1000paladares.com.pt'},
  'metadata': {}},
 'costDescription': {'type': 'Text', 'value': 'Undefined', 'metadata': {}},
 'costDescription_es': {'type': 'Text',
  'value': 'No 

In [23]:
alojamentos = list(get_dataset_data(alojamento))
total = list(get_dataset_data_full(alojamento))
print("There are %d alojamentos" % len(alojamentos))
print("After all there were %s alojamentos" % len(total))
# print(alojamentos)

There are 1000 alojamentos
After all there were 6657 alojamentos


In [29]:
print(total[5000])

{'objectid': 5001, 'data_levan': -2209161600000, 'cod_topo': 'RALEG1', 'n_pol': '362', 'nome_aloj': 'Plano de Fuga', 'ano_reg': 2018, 'n_reg': '69821/AL', 'qual_tit': 'Comodatario', 'tit_tipo': 'Pessoa singular (empresário em nome individual)', 'pos_1951': 'S', 'morada': 'Rua da Alegria', 'modalidade': 'Estabelecimento de hospedagem', 'n_policia': '362', 'cod_postal': '4000-035', 'data_reg': 1524441600000, 'data_ab': 1525132800000, 'coordinates': [-8.603564962722654, 41.152752988738456]}


In [31]:
print(len(set(map(lambda x: str(x["coordinates"]), total))))

6542
