diff --git a/poetry.lock b/poetry.lock index 66b0f133..2963d65d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -694,21 +694,21 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "progressbar2" -version = "4.2.0" +version = "4.3.2" description = "A Python Progressbar library to provide visual (yet text based) progress to long running operations." optional = false -python-versions = ">=3.7.0" +python-versions = ">=3.8" files = [ - {file = "progressbar2-4.2.0-py2.py3-none-any.whl", hash = "sha256:1a8e201211f99a85df55f720b3b6da7fb5c8cdef56792c4547205be2de5ea606"}, - {file = "progressbar2-4.2.0.tar.gz", hash = "sha256:1393922fcb64598944ad457569fbeb4b3ac189ef50b5adb9cef3284e87e394ce"}, + {file = "progressbar2-4.3.2-py3-none-any.whl", hash = "sha256:036fa3bd35ae27c92e73fce4fb18aa4ba5090a1880d880cf954ecb75ccd6f3fb"}, + {file = "progressbar2-4.3.2.tar.gz", hash = "sha256:c37e6e1b4e57ab43f95c3d0e8d90061bec140e4fed56b8343183db3aa1e19a52"}, ] [package.dependencies] -python-utils = ">=3.0.0" +python-utils = ">=3.8.1" [package.extras] -docs = ["sphinx (>=1.8.5)"] -tests = ["flake8 (>=3.7.7)", "freezegun (>=0.3.11)", "pytest (>=4.6.9)", "pytest-cov (>=2.6.1)", "pytest-mypy", "sphinx (>=1.8.5)"] +docs = ["sphinx (>=1.8.5)", "sphinx-autodoc-typehints (>=1.6.0)"] +tests = ["dill (>=0.3.6)", "flake8 (>=3.7.7)", "freezegun (>=0.3.11)", "pytest (>=4.6.9)", "pytest-cov (>=2.6.1)", "pytest-mypy", "sphinx (>=1.8.5)"] [[package]] name = "protego" diff --git a/public_law/parsers/usa/colorado/crs.py b/public_law/parsers/usa/colorado/crs.py index c30c03b6..6971f5ce 100644 --- a/public_law/parsers/usa/colorado/crs.py +++ b/public_law/parsers/usa/colorado/crs.py @@ -3,7 +3,7 @@ from scrapy.selector.unified import Selector from scrapy.http.response.xml import XmlResponse -from typing import Any +from typing import Any, Optional from public_law.text import NonemptyString, URL, titleize from public_law.items.crs import Article, Division, Title @@ -19,7 +19,7 @@ def parse_title_bang(dom: XmlResponse, logger: Any) -> Title: return title -def parse_title(dom: XmlResponse, logger: Any) -> Title | None: +def parse_title(dom: XmlResponse, logger: Any) -> Optional[Title]: match(dom.xpath("//TITLE-TEXT/text()").get()): case str(raw_name): name = NonemptyString(titleize(raw_name)) @@ -33,19 +33,23 @@ def parse_title(dom: XmlResponse, logger: Any) -> Title | None: case None: logger.warn(f"Could not the parse title number in {dom.url}") return None - - url_number = number.rjust(2, "0") - source_url = URL(f"https://leg.colorado.gov/sites/default/files/images/olls/crs2022-title-{url_number}.pdf") - - return Title( - name = name, - number = number, - source_url = URL(source_url), - children = _parse_divisions_or_articles(number, dom, logger) - ) + + match _parse_divisions_or_articles(number, dom, logger): + case None: + logger.warn(f"Could not parse divisions or articles in Title {number}") + return None + case children: + url_number = number.rjust(2, "0") + source_url = URL(f"https://leg.colorado.gov/sites/default/files/images/olls/crs2022-title-{url_number}.pdf") + return Title( + name = name, + number = number, + source_url = URL(source_url), + children = children + ) -def _parse_divisions_or_articles(title_number: NonemptyString, dom: Selector | XmlResponse, logger: Any) -> list[Division] | list[Article]: +def _parse_divisions_or_articles(title_number: NonemptyString, dom: Selector | XmlResponse, logger: Any) -> Optional[list[Division] | list[Article]]: division_nodes = dom.xpath("//T-DIV") article_nodes = dom.xpath("//TA-LIST") @@ -54,6 +58,9 @@ def _parse_divisions_or_articles(title_number: NonemptyString, dom: Selector | X elif len(article_nodes) > 0: func = parse_articles else: - raise Exception(f"Could not parse divisions or articles in Title {title_number}. Neither T-DIV nor TA-LIST nodes were found.") + msg = f"""Could not parse divisions or articles in Title {title_number}. + Neither T-DIV nor TA-LIST nodes were found.""" + logger.warn(msg) + return None return func(title_number, dom, logger)