Skip to content

Commit

Permalink
Issue143 (#152)
Browse files Browse the repository at this point in the history
* retry with stamina, use wfs module in bc2pg module to get retries, log http error codes

* retry wcs if needed

* update requirements
  • Loading branch information
smnorris committed Nov 17, 2023
1 parent 9320e38 commit 62263c1
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 154 deletions.
34 changes: 13 additions & 21 deletions bcdata/bc2pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,18 @@

import geopandas as gpd
import numpy
import stamina
from geoalchemy2 import Geometry
from shapely.geometry.linestring import LineString
from shapely.geometry.multilinestring import MultiLineString
from shapely.geometry.multipoint import MultiPoint
from shapely.geometry.multipolygon import MultiPolygon
from shapely.geometry.point import Point
from shapely.geometry.polygon import Polygon
from tenacity import retry
from tenacity.stop import stop_after_delay
from tenacity.wait import wait_random_exponential

import bcdata
from bcdata.database import Database
from bcdata.wfs import BCWFS

log = logging.getLogger(__name__)

Expand All @@ -33,16 +32,6 @@
]


@retry(stop=stop_after_delay(120), wait=wait_random_exponential(multiplier=1, max=60))
def _download(url):
"""offload download requests to geopandas, using tenacity to handle unsuccessful requests"""
try:
data = gpd.read_file(url)
except Exception:
log.debug("WFS/network error")
return data


def bc2pg(
dataset,
db_url,
Expand All @@ -68,6 +57,9 @@ def bc2pg(
# connect to target db
db = Database(db_url)

# create wfs service interface instance
WFS = BCWFS()

# define requests
urls = bcdata.define_requests(
dataset,
Expand All @@ -90,7 +82,7 @@ def bc2pg(

# if not appending, define and create table
else:
# get info about the table from catalouge
# get info about the table from catalogue
table_definition = bcdata.get_table_definition(dataset)

if not table_definition["schema"]:
Expand All @@ -100,9 +92,9 @@ def bc2pg(

# if geometry type is not provided, determine type by making the first request
if not geometry_type:
log.info(urls[0])
df = _download(urls[0])
log.info(_download.retry.statistics) # log the retry stats
df = WFS.make_requests(
[urls[0]], as_gdf=True, crs="epsg:3005", lowercase=True
)
geometry_type = df.geom_type.unique()[0] # keep only the first type
if numpy.any(
df.has_z.unique()[0]
Expand Down Expand Up @@ -143,11 +135,11 @@ def bc2pg(
if not schema_only:
# loop through the requests
for n, url in enumerate(urls):
# if not downloaded above when checking geom type, dow
# if first url not downloaded above when checking geom type, do now
if df is None:
log.info(url)
df = _download(url)
log.info(_download.retry.statistics) # log the retry stats
df = WFS.make_requests(
[url], as_gdf=True, crs="epsg:3005", lowercase=True
)
# tidy the resulting dataframe
df = df.rename_geometry("geom")
# lowercasify
Expand Down
45 changes: 27 additions & 18 deletions bcdata/bcdc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
from urllib.parse import urlparse

import requests
from tenacity import retry
from tenacity.stop import stop_after_delay
from tenacity.wait import wait_random_exponential
import stamina

import bcdata

Expand All @@ -14,33 +12,44 @@
BCDC_API_URL = "https://catalogue.data.gov.bc.ca/api/3/action/"


@retry(stop=stop_after_delay(120), wait=wait_random_exponential(multiplier=1, max=60))
class ServiceException(Exception):
pass


@stamina.retry(on=requests.HTTPError, timeout=60)
def _package_show(package):
try:
r = requests.get(BCDC_API_URL + "package_show", params={"id": package})
except Exception:
log.error("BCDC API Error")
r = requests.get(BCDC_API_URL + "package_show", params={"id": package})
if r.status_code in [400, 404]:
log.error(f"HTTP error {r.status_code}")
log.error(f"Response headers: {r.headers}")
log.error(f"Response text: {r.text}")
raise ValueError(f"Dataset {package} not found in DataBC API list")
if r.status_code in [500, 502, 503, 504]: # presumed serivce error, retry
log.warning(f"HTTP error: {r.status_code}")
log.warning(f"Response headers: {r.headers}")
log.warning(f"Response text: {r.text}")
r.raise_for_status()
else:
log.debug(r.text)
return r


@retry(stop=stop_after_delay(120), wait=wait_random_exponential(multiplier=1, max=60))
@stamina.retry(on=requests.HTTPError, timeout=60)
def _table_definition(table_name):
try:
r = requests.get(BCDC_API_URL + "package_search", params={"q": table_name})
status_code = r.status_code
if status_code != 200:
raise ValueError(f"Error searching BC Data Catalogue API: {status_code}")
except Exception:
log.error("BCDC API Error")
r = requests.get(BCDC_API_URL + "package_search", params={"q": table_name})
if r.status_code != 200:
log.warning(r.headers)
if r.status_code in [400, 401, 404]:
raise ServiceException(r.text) # presumed request error
if r.status_code in [500, 502, 503, 504]: # presumed serivce error, retry
r.raise_for_status()
return r


def get_table_name(package):
"""Query DataBC API to find WFS table/layer name for given package"""
package = package.lower() # package names are lowercase
r = _package_show(package)
if r.status_code != 200:
raise ValueError("{d} is not present in DataBC API list".format(d=package))
result = r.json()["result"]
# Because the object_name in the result json is not a 100% reliable key
# for WFS requests, parse URL in WMS resource(s).
Expand Down
62 changes: 38 additions & 24 deletions bcdata/wcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import rasterio
import requests
import stamina

import bcdata

Expand All @@ -11,6 +12,10 @@
WCS_URL = "https://openmaps.gov.bc.ca/om/wcs"


class ServiceException(Exception):
pass


def align_bounds(bounds):
"""
Adjust input bounds to align with Hectares BC raster
Expand All @@ -21,6 +26,28 @@ def align_bounds(bounds):
return (ll[0], ll[1], ur[0], ur[1])


@stamina.retry(on=requests.HTTPError, timeout=60)
def make_request(payload):
r = requests.get(
WCS_URL,
params=payload,
headers={"User-Agent": "bcdata.py ({bcdata.__version__})"},
)
log.debug(r.url)
if r.status_code == 200:
return r
elif r.status_code in [400, 401, 404]:
log.error(f"HTTP error {r.status_code}")
log.error(f"Response headers: {r.headers}")
log.error(f"Response text: {r.text}")
raise ServiceException(r.text) # presumed request error
elif r.status_code in [500, 502, 503, 504]: # presumed serivce error, retry
log.warning(f"HTTP error: {r.status_code}, retrying")
log.warning(f"Response headers: {r.headers}")
log.warning(f"Response text: {r.text}")
r.raise_for_status()


def get_dem(
bounds,
out_file="dem.tif",
Expand Down Expand Up @@ -87,35 +114,22 @@ def get_dem(

# request data from WCS
log.debug(payload)
r = requests.get(
WCS_URL,
params=payload,
headers={"User-Agent": "bcdata.py ({bcdata.__version__})"},
)
log.debug(r.headers)
if r.status_code == 200:
if r.headers["Content-Type"] == "image/tiff":
with open(out_file, "wb") as file:
file.write(r.content)
elif r.headers["Content-Type"] == "application/vnd.ogc.se_xml;charset=UTF-8":
raise RuntimeError(
"WCS request {} failed with error {}".format(
r.url, str(r.content.decode("utf-8"))
)
)
else:
raise RuntimeError(
"WCS request {} failed, content type {}".format(
r.url, str(r.headers["Content-Type"])
)
r = make_request(payload)
if r.headers["Content-Type"] == "image/tiff":
with open(out_file, "wb") as file:
file.write(r.content)
elif r.headers["Content-Type"] == "application/vnd.ogc.se_xml;charset=UTF-8":
raise RuntimeError(
"WCS request {} failed with error {}".format(
r.url, str(r.content.decode("utf-8"))
)
)
else:
raise RuntimeError(
"WCS request {} failed with status code {}".format(
r.url, str(r.status_code)
"WCS request {} failed, content type {}".format(
r.url, str(r.headers["Content-Type"])
)
)

if as_rasterio:
return rasterio.open(out_file, "r")
else:
Expand Down

0 comments on commit 62263c1

Please sign in to comment.