scrapd · rgreinho · Jun 9, 2019 · Jun 7, 2019
diff --git a/scrapd/core/apd.py b/scrapd/core/apd.py
@@ -510,22 +510,17 @@ def parse_page_content(detail_page, notes_parsed=False):
     :rtype: dict
     """
     d = {}
-    searches = [
-        (Fields.LOCATION, re.compile(r'>Location:.*>\s{2,}(?:</strong>)?([^<]+)')),
-    ]
     normalized_detail_page = unicodedata.normalize("NFKD", detail_page)
-    for search in searches:
-        match = re.search(search[1], normalized_detail_page)
-        if match:
-            d[search[0]] = match.groups()[0]
 
     # Parse the `Case` field.
     d[Fields.CASE] = parse_case_field(normalized_detail_page)
     if not d.get(Fields.CASE):
         raise ValueError('A case number is mandatory.')
 
     # Parse the `Crashes` field.
-    d[Fields.CRASHES] = parse_crashes_field(normalized_detail_page)
+    crash_str = parse_crashes_field(normalized_detail_page)
+    if crash_str:
+        d[Fields.CRASHES] = crash_str
 
     # Parse the `Date` field.
     date_field_str = parse_date_field(normalized_detail_page)
@@ -538,7 +533,14 @@ def parse_page_content(detail_page, notes_parsed=False):
         d[Fields.DECEASED] = deceased_field_str
 
     # Parse the `Time` field.
-    d[Fields.TIME] = parse_time_field(normalized_detail_page)
+    time_str = parse_time_field(normalized_detail_page)
+    if time_str:
+        d[Fields.TIME] = time_str
+
+    # Parse the location field.
+    location_str = parse_location_field(normalized_detail_page)
+    if location_str:
+        d[Fields.LOCATION] = location_str
 
     # Fill in Notes from Details page if not in twitter description.
     search_notes = re.compile(r'>Deceased:.*\s{2,}(.|\n)*?<\/p>(.|\n)*?<\/p>')
@@ -652,6 +654,27 @@ def parse_time_field(page):
     return match_pattern(page, time_pattern)
 
 
+def parse_location_field(page):
+    """
+    Extract the location information from the content of the fatality page.
+
+    :param page: the content of the fatality page
+    :type page: str
+    """
+    location_pattern = re.compile(
+        r'''
+        >Location:      # The name of the desired field.
+        .*              # Any character
+        >               # The '>' character
+        \s{2,}          # Any whitespace (at least 2)
+        (?:</strong>)   # Non capture closing strong tag
+        ?([^<]+)        # Capture any character except '<'.
+        ''',
+        re.VERBOSE,
+    )
+    return match_pattern(page, location_pattern)
+
+
 def match_pattern(text, pattern, group_number=0):
     """
     Match a pattern.

diff --git a/tests/core/test_apd.py b/tests/core/test_apd.py
@@ -674,3 +674,19 @@ def test_parse_deceased_field_00(input_, expected):
     """Ensure the deceased field gets parsed correctly."""
     actual = apd.parse_deceased_field(input_)
     assert actual == expected
+
+
+@pytest.mark.parametrize('input_,expected', (
+    (
+        '>Location:</span></strong>     West William Cannon Drive and Ridge Oak Road</p>',
+        'West William Cannon Drive and Ridge Oak Road',
+    ),
+    (
+        '>Location:</strong>     183 service road westbound and Payton Gin Rd.</p>',
+        '183 service road westbound and Payton Gin Rd.',
+    ),
+))
+def test_parse_location_field_00(input_, expected):
+    """Ensure."""
+    actual = apd.parse_location_field(input_)
+    assert actual == expected