diff --git a/scrapely/extraction/regionextract.py b/scrapely/extraction/regionextract.py
index e1e6ffa..57ad751 100644
--- a/scrapely/extraction/regionextract.py
+++ b/scrapely/extraction/regionextract.py
@@ -409,7 +409,7 @@ def _doextract(self, page, region_elements, start_index, end_index, nested_regio
end_index, _, following_data = self._doextract(page, following_regions, start_index, end_index, **kwargs)
if end_index is not None:
pindex, sindex, extracted_data = self._doextract(page, [first_region], start_index, end_index - 1, nested_regions, ignored_regions, **kwargs)
- extracted_data += following_data
+ extracted_data += following_data
elif nested_regions:
_, _, nested_data = self._doextract(page, nested_regions, start_index, end_index, **kwargs)
extracted_data += nested_data
diff --git a/scrapely/tests/test_extraction.py b/scrapely/tests/test_extraction.py
index b5d4600..aedb7c9 100644
--- a/scrapely/tests/test_extraction.py
+++ b/scrapely/tests/test_extraction.py
@@ -936,6 +936,29 @@
"""
+ANNOTATED_PAGE31 = u"""
+
+
+
Product name
+
60.00
+
description
+
features
+

+
+
+
+"""
+
+EXTRACT_PAGE31 = u"""
+
+
+
Product name
+
60.00
+

+
+
+
+"""
DEFAULT_DESCRIPTOR = ItemDescriptor('test',
'item test, removes tags from description attribute',
@@ -950,6 +973,15 @@
]
)
+SAMPLE_DESCRIPTOR1a = ItemDescriptor('test', 'product test', [
+ A('name', "Product name"),
+ A('price', "Product price, including any discounts and tax or vat",
+ contains_any_numbers),
+ A('image_urls', "URLs for one or more images", image_url),
+ A('description', "The full description of the product", html),
+ ]
+ )
+
SAMPLE_DESCRIPTOR2 = ItemDescriptor('test', 'item test', [
A('description', 'description field without tags', notags),
A('price', "Product price, including any discounts and tax or vat",
@@ -1227,6 +1259,13 @@
('avoid false positives on scripts', [ANNOTATED_PAGE30], EXTRACT_PAGE30d, SAMPLE_DESCRIPTOR3,
None
),
+ ('correctly extract regions that follows more than one consecutive misses', [ANNOTATED_PAGE31], EXTRACT_PAGE31, SAMPLE_DESCRIPTOR1a,
+ {
+ u'price': [u'60.00'],
+ u'name': [u'Product name'],
+ u'image_urls': [['http://example.com/image.jpg']]
+ }
+ )
]
class TestIbl(TestCase):