diff --git a/scrapely/htmlpage.py b/scrapely/htmlpage.py
index b79788c..258e3cc 100644
--- a/scrapely/htmlpage.py
+++ b/scrapely/htmlpage.py
@@ -133,9 +133,11 @@ class HtmlPageParsedRegion(HtmlPageRegion):
fragments contained within this region
"""
def __new__(cls, htmlpage, start_index, end_index):
- text_start = htmlpage.parsed_body[start_index].start
- text_end = htmlpage.parsed_body[end_index or -1].end
- text = htmlpage.body[text_start:text_end]
+ text = htmlpage.body
+ if text:
+ text_start = htmlpage.parsed_body[start_index].start
+ text_end = htmlpage.parsed_body[end_index or -1].end
+ text = htmlpage.body[text_start:text_end]
return HtmlPageRegion.__new__(cls, htmlpage, text)
def __init__(self, htmlpage, start_index, end_index):
diff --git a/scrapely/tests/test_htmlpage.py b/scrapely/tests/test_htmlpage.py
index c59ed7f..5d83dc1 100644
--- a/scrapely/tests/test_htmlpage.py
+++ b/scrapely/tests/test_htmlpage.py
@@ -5,7 +5,7 @@
from unittest import TestCase
from scrapely.tests import iter_samples
-from scrapely.htmlpage import parse_html, HtmlTag, HtmlDataFragment
+from scrapely.htmlpage import parse_html, HtmlTag, HtmlDataFragment, HtmlPage
from scrapely.tests.test_htmlpage_data import *
def _encode_element(el):
@@ -135,3 +135,6 @@ def test_malformed2(self):
parsed = [_decode_element(d) for d in PARSED9]
self._test_sample(PAGE9, parsed)
+ def test_empty_subregion(self):
+ htmlpage = HtmlPage(body=u"")
+ self.assertEqual(htmlpage.subregion(), u"")