diff --git a/scrapely/extraction/pageobjects.py b/scrapely/extraction/pageobjects.py
index b5f852f..87a41cc 100644
--- a/scrapely/extraction/pageobjects.py
+++ b/scrapely/extraction/pageobjects.py
@@ -96,6 +96,10 @@ def __init__(self, htmlpage, regions):
     def parsed_fragments(self):
         return chain(*(r.parsed_fragments for r in self.regions))
 
+    @property
+    def text_content(self):
+        return chain(*(r.text_content for r in self.regions))
+
 class Page(object):
     """Basic representation of a page. This consists of a reference to a
     dictionary of tokens and an array of raw token ids
diff --git a/scrapely/htmlpage.py b/scrapely/htmlpage.py
index b73e24b..e80b85a 100644
--- a/scrapely/htmlpage.py
+++ b/scrapely/htmlpage.py
@@ -75,6 +75,15 @@ def fragment_data(self, data_fragment):
         """portion of the body corresponding to the HtmlDataFragment"""
         return self.body[data_fragment.start:data_fragment.end]
 
+class TextPage(HtmlPage):
+    """An HtmlPage with one unique HtmlDataFragment, needed to have a
+    convenient text with same interface as html page but avoiding unnecesary
+    reparsing"""
+    def _set_body(self, text): 
+        self._body = text
+        self.parsed_body = [HtmlDataFragment(0, len(self._body), True)]
+    body = property(lambda x: x._body, _set_body, doc="raw text for the page")
+
 class HtmlPageRegion(unicode):
     """A Region of an HtmlPage that has been extracted
     """
@@ -87,7 +96,11 @@ def __init__(self, htmlpage, data):
         htmlpage is the original page and data is the raw html
         """
         self.htmlpage = htmlpage
-    
+ 
+    @property
+    def text_content(self):
+        return self
+        
 class HtmlPageParsedRegion(HtmlPageRegion):
     """A region of an HtmlPage that has been extracted
 
@@ -111,20 +124,31 @@ def parsed_fragments(self):
         end = self.end_index + 1 if self.end_index is not None else None
         return self.htmlpage.parsed_body[self.start_index:end]
 
+    @property
+    def text_content(self):
+        """Text content of this parsed region"""
+        text_all = u" ".join(self.htmlpage.body[_element.start:_element.end] \
+                for _element in self.parsed_fragments if \
+                not isinstance(_element, HtmlTag) and _element.is_text_content)
+        return TextPage(self.htmlpage.url, self.htmlpage.headers, \
+                text_all, encoding=self.htmlpage.encoding).subregion()
+
+
 class HtmlTagType(object):
     OPEN_TAG = 1
     CLOSE_TAG = 2 
     UNPAIRED_TAG = 3
 
 class HtmlDataFragment(object):
-    __slots__ = ('start', 'end')
+    __slots__ = ('start', 'end', 'is_text_content')
     
-    def __init__(self, start, end):
+    def __init__(self, start, end, is_text_content=False):
         self.start = start
         self.end = end
+        self.is_text_content = is_text_content
         
     def __str__(self):
-        return "<HtmlDataFragment [%s:%s]>" % (self.start, self.end)
+        return "<HtmlDataFragment [%s:%s] is_text_content: %s>" % (self.start, self.end, self.is_text_content)
 
     def __repr__(self):
         return str(self)
@@ -171,7 +195,7 @@ def parse_html(text):
         end = match.end()
             
         if start > prev_end:
-            yield HtmlDataFragment(prev_end, start)
+            yield HtmlDataFragment(prev_end, start, True)
 
         if match.groups()[0] is not None: # comment
             yield HtmlDataFragment(start, end)
@@ -183,7 +207,7 @@ def parse_html(text):
         prev_end = end
     textlen = len(text)
     if prev_end < textlen:
-        yield HtmlDataFragment(prev_end, textlen)
+        yield HtmlDataFragment(prev_end, textlen, True)
 
 def _parse_script(match):
     """parse a <script>...</script> region matched by _HTML_REGEXP"""
diff --git a/scrapely/tests/samples/samples_htmlpage_0.json b/scrapely/tests/samples/samples_htmlpage_0.json
index 08826aa..a93cb0c 100644
--- a/scrapely/tests/samples/samples_htmlpage_0.json
+++ b/scrapely/tests/samples/samples_htmlpage_0.json
@@ -211,15 +211,18 @@
         },
         {       
                 "start": 1073,
-                "end": 1074
+                "end": 1074,
+                "is_text_content": false
         },
         {
                 "start": 1074, 
-                "end": 2052
+                "end": 2052,
+                "is_text_content": false
         },
         {
                 "start": 2052,
-                "end": 2053
+                "end": 2053,
+                "is_text_content": false
         },
         {
                 "attributes": {}, 
diff --git a/scrapely/tests/samples/samples_htmlpage_1.json b/scrapely/tests/samples/samples_htmlpage_1.json
index f94837c..8cc36e6 100644
--- a/scrapely/tests/samples/samples_htmlpage_1.json
+++ b/scrapely/tests/samples/samples_htmlpage_1.json
@@ -212,7 +212,8 @@
         },
         {
                 "start": 1956,
-                "end": 1979
+                "end": 1979,
+                "is_text_content": false
         },
         {
                 "start": 1979,
@@ -274,15 +275,18 @@
         }, 
         {
                 "start": 2282, 
-                "end": 2283
+                "end": 2283,
+                "is_text_content": false
         },
         {
                 "start": 2283,
-                "end": 2437
+                "end": 2437,
+                "is_text_content": false
         },
         {
                 "start": 2437,
-                "end": 2438
+                "end": 2438,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
@@ -306,7 +310,8 @@
         }, 
         {
                 "start": 2482, 
-                "end": 2702
+                "end": 2702,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
@@ -330,7 +335,8 @@
         }, 
         {
                 "start": 2743, 
-                "end": 2851
+                "end": 2851,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
@@ -345,7 +351,8 @@
         },
         {
                 "start": 2861,
-                "end": 2882
+                "end": 2882,
+                "is_text_content": false
         },
         {
                 "start": 2882,
@@ -986,7 +993,8 @@
         }, 
         {
                 "start": 5346, 
-                "end": 5537
+                "end": 5537,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
@@ -8851,7 +8859,8 @@
         }, 
         {
                 "start": 30410, 
-                "end": 30920
+                "end": 30920,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
@@ -9551,7 +9560,8 @@
         }, 
         {
                 "start": 33433, 
-                "end": 33454
+                "end": 33454,
+                "is_text_content": false
         }, 
         {
                 "attributes": {
@@ -9576,7 +9586,8 @@
         }, 
         {
                 "start": 33669, 
-                "end": 33689
+                "end": 33689,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
@@ -21129,7 +21140,8 @@
         }, 
         {
                 "start": 70112, 
-                "end": 70136
+                "end": 70136,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
diff --git a/scrapely/tests/samples/samples_htmlpage_2.json b/scrapely/tests/samples/samples_htmlpage_2.json
index c856f4a..c6f99af 100644
--- a/scrapely/tests/samples/samples_htmlpage_2.json
+++ b/scrapely/tests/samples/samples_htmlpage_2.json
@@ -244,7 +244,8 @@
         },
         {
                 "start": 2182,
-                "end": 2205
+                "end": 2205,
+                "is_text_content": false
         },
         {
                 "start": 2205,
@@ -306,15 +307,18 @@
         }, 
         {
                 "start": 2508, 
-                "end": 2509
+                "end": 2509,
+                "is_text_content": false
         },
         {
                 "start": 2509,
-                "end": 2663
+                "end": 2663,
+                "is_text_content": false
         },
         {
                 "start": 2663,
-                "end": 2664
+                "end": 2664,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
@@ -338,7 +342,8 @@
         }, 
         {
                 "start": 2708, 
-                "end": 2928
+                "end": 2928,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
@@ -362,7 +367,8 @@
         }, 
         {
                 "start": 2969, 
-                "end": 3077
+                "end": 3077,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
@@ -377,7 +383,8 @@
         },
         {
                 "start": 3087,
-                "end": 3108
+                "end": 3108,
+                "is_text_content": false
         },
         {
                 "start": 3108,
@@ -993,7 +1000,8 @@
         }, 
         {
                 "start": 5449, 
-                "end": 5640
+                "end": 5640,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
@@ -8828,7 +8836,8 @@
         }, 
         {
                 "start": 30096, 
-                "end": 30606
+                "end": 30606,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
@@ -9501,7 +9510,8 @@
         }, 
         {
                 "start": 32812, 
-                "end": 32833
+                "end": 32833,
+                "is_text_content": false
         }, 
         {
                 "attributes": {
@@ -9526,7 +9536,8 @@
         }, 
         {
                 "start": 33044, 
-                "end": 33064
+                "end": 33064,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
@@ -20956,7 +20967,8 @@
         }, 
         {
                 "start": 69651, 
-                "end": 69675
+                "end": 69675,
+                "is_text_content": false
         }, 
         {
                 "attributes": {}, 
diff --git a/scrapely/tests/test_extraction.py b/scrapely/tests/test_extraction.py
index 0b627eb..7af4dcf 100644
--- a/scrapely/tests/test_extraction.py
+++ b/scrapely/tests/test_extraction.py
@@ -915,6 +915,28 @@
 </table>
 """
 
+ANNOTATED_PAGE30 = u"""
+<div data-scrapy-annotate="{&quot;variant&quot;: 0, &quot;generated&quot;: false,
+ &quot;annotations&quot;: {&quot;content&quot;: &quot;phone&quot;}}"><span>029349293</span></div>
+"""
+
+EXTRACT_PAGE30a = u"""
+<div><span style="font-size:100%">Any text</span></div>
+"""
+
+EXTRACT_PAGE30b = u"""
+<div><span style="font-size:100%">029847272</span></div>
+"""
+
+EXTRACT_PAGE30c = u"""
+<div><span><!--item no. 100--></span></div>
+"""
+
+EXTRACT_PAGE30d = u"""
+<div><span><script>var myvar= 10;</script></span></div>
+"""
+
+
 DEFAULT_DESCRIPTOR = ItemDescriptor('test', 
         'item test, removes tags from description attribute',
         [A('description', 'description field without tags', notags)])
@@ -934,6 +956,9 @@
                 contains_any_numbers),
     ])
 
+SAMPLE_DESCRIPTOR3 = ItemDescriptor('test', 
+        'item test',
+        [A('phone', 'phone number', lambda x: contains_any_numbers(x.text_content))])
 
 # A list of (test name, [templates], page, extractors, expected_result)
 TEST_DATA = [
@@ -1178,6 +1203,19 @@
             }
 
     ),
+    ('avoid false positives by allowing to extract only from text content', [ANNOTATED_PAGE30], EXTRACT_PAGE30a, SAMPLE_DESCRIPTOR3,
+        {}
+    ),
+    ('only extract from text content', [ANNOTATED_PAGE30], EXTRACT_PAGE30b, SAMPLE_DESCRIPTOR3,
+        {u'phone': [u'029847272']}
+    ),
+    ('avoid false positives on comments', [ANNOTATED_PAGE30], EXTRACT_PAGE30c, SAMPLE_DESCRIPTOR3,
+        {}
+    ),
+    ('avoid false positives on scripts', [ANNOTATED_PAGE30], EXTRACT_PAGE30d, SAMPLE_DESCRIPTOR3,
+        {}
+    ),
+
 ]
 
 class TestIbl(TestCase):
diff --git a/scrapely/tests/test_htmlpage.py b/scrapely/tests/test_htmlpage.py
index fc06aaf..1d58280 100644
--- a/scrapely/tests/test_htmlpage.py
+++ b/scrapely/tests/test_htmlpage.py
@@ -19,7 +19,7 @@ def _encode_element(el):
         return {"tag": el.tag, "attributes": el.attributes,
             "start": el.start, "end": el.end, "tag_type": el.tag_type}
     if isinstance(el, HtmlDataFragment):
-        return {"start": el.start, "end": el.end}
+        return {"start": el.start, "end": el.end, "is_text_content": el.is_text_content}
     raise TypeError
 
 def _decode_element(dct):
@@ -30,7 +30,7 @@ def _decode_element(dct):
         return HtmlTag(dct["tag_type"], dct["tag"], \
             dct["attributes"], dct["start"], dct["end"])
     if "start" in dct:
-        return HtmlDataFragment(dct["start"], dct["end"])
+        return HtmlDataFragment(dct["start"], dct["end"], dct.get("is_text_content", True))
     return dct
 
 class TestParseHtml(TestCase):
@@ -64,6 +64,12 @@ def _test_sample(self, source, expected_parsed, samplecount=None):
                 self.assertEqual(element.tag, expected.tag)
                 self.assertEqual(element.attributes, expected.attributes)
                 self.assertEqual(element.tag_type, expected.tag_type)
+            if type(element) == HtmlDataFragment:
+                msg = "Got: %s Expected: %s in sample: %d [%d:%d] (%s)" % \
+                        (element.is_text_content, expected.is_text_content, samplecount, element.start, element.end, repr(element_text)) \
+                        if samplecount is not None else None
+                self.assertEqual(element.is_text_content, expected.is_text_content, msg)
+
         if expected_parsed:
             errstring = "Expected %s" % repr(expected_parsed)
             if samplecount is not None:
diff --git a/scrapely/tests/test_htmlpage_data.py b/scrapely/tests/test_htmlpage_data.py
index f54dc9f..72491bc 100644
--- a/scrapely/tests/test_htmlpage_data.py
+++ b/scrapely/tests/test_htmlpage_data.py
@@ -163,11 +163,11 @@
  {'attributes': {}, 'end': 34, 'start': 31, 'tag': u'p', 'tag_type': 1},
  {'end': 51, 'start': 34},
  {'attributes': {}, 'end': 55, 'start': 51, 'tag': u'p', 'tag_type': 2},
- {'end': 70, 'start': 55},
+ {'end': 70, 'start': 55, 'is_text_content': False},
  {'attributes': {u'type': u'text/javascript'}, 'end': 101, 'start': 70, 'tag': u'script', 'tag_type': 1},
- {'end': 104, 'start': 101},
- {'end': 118, 'start': 104},
- {'end': 124, 'start': 118},
+ {'end': 104, 'start': 101, 'is_text_content': False},
+ {'end': 118, 'start': 104, 'is_text_content': False},
+ {'end': 124, 'start': 118, 'is_text_content': False},
  {'attributes': {}, 'end': 133, 'start': 124, 'tag': u'script', 'tag_type': 2},
  {'attributes': {}, 'end': 140, 'start': 133, 'tag': u'body', 'tag_type': 2},
  {'attributes': {}, 'end': 147, 'start': 140, 'tag': u'html', 'tag_type': 2}
@@ -186,7 +186,7 @@
  {'attributes': {}, 'end': 33, 'start': 28, 'tag': u'h1', 'tag_type': 2},
  {'end': 38, 'start': 33},
  {'attributes': {u'type': u'text/javascript'}, 'end': 69, 'start': 38, 'tag': u'script', 'tag_type': 1},
- {'end': 130, 'start': 69},
+ {'end': 130, 'start': 69, 'is_text_content': False},
  {'attributes': {}, 'end': 139, 'start': 130, 'tag': u'script', 'tag_type': 2},
  {'end': 150, 'start': 139},
  {'attributes': {}, 'end': 157, 'start': 150, 'tag': u'body', 'tag_type': 2},
@@ -200,14 +200,14 @@
  {'attributes': {}, 'end': 6, 'start': 0, 'tag': u'html', 'tag_type': 1},
  {'attributes': {}, 'end': 12, 'start': 6, 'tag': u'body', 'tag_type': 1},
  {'attributes': {}, 'end': 20, 'start': 12, 'tag': u'script', 'tag_type': 1},
- {'end': 25, 'start': 20},
+ {'end': 25, 'start': 20, 'is_text_content': False},
  {'attributes': {}, 'end': 34, 'start': 25, 'tag': u'script', 'tag_type': 2},
  {'attributes': {}, 'end': 42, 'start': 34, 'tag': u'script', 'tag_type': 1},
- {'end': 45, 'start': 42},
+ {'end': 45, 'start': 42, 'is_text_content': False},
  {'attributes': {}, 'end': 54, 'start': 45, 'tag': u'script', 'tag_type': 2},
  {'attributes': {}, 'end': 61, 'start': 54, 'tag': u'body', 'tag_type': 2},
- {'end': 76, 'start': 61},
- {'end': 91, 'start': 76},
+ {'end': 76, 'start': 61, 'is_text_content': False},
+ {'end': 91, 'start': 76, 'is_text_content': False},
  {'attributes': {}, 'end': 98, 'start': 91, 'tag': u'html', 'tag_type': 2},
 ]
  
@@ -218,13 +218,13 @@
  {'attributes': {}, 'end': 6, 'start': 0, 'tag': u'html', 'tag_type': 1},
  {'attributes': {}, 'end': 12, 'start': 6, 'tag': u'body', 'tag_type': 1},
  {'attributes': {}, 'end': 20, 'start': 12, 'tag': u'script', 'tag_type': 1},
- {'end': 23, 'start': 20},
- {'end': 37, 'start': 23},
- {'end': 40, 'start': 37},
+ {'end': 23, 'start': 20, 'is_text_content': False},
+ {'end': 37, 'start': 23, 'is_text_content': False},
+ {'end': 40, 'start': 37, 'is_text_content': False},
  {'attributes': {}, 'end': 49, 'start': 40, 'tag': u'script', 'tag_type': 2},
  {'end': 52, 'start': 49},
  {'attributes': {}, 'end': 60, 'start': 52, 'tag': u'script', 'tag_type': 1},
- {'end': 63, 'start': 60},
+ {'end': 63, 'start': 60, 'is_text_content': False},
  {'attributes': {}, 'end': 72, 'start': 63, 'tag': u'script', 'tag_type': 2},
  {'end': 74, 'start': 72},
  {'attributes': {}, 'end': 81, 'start': 74, 'tag': u'body', 'tag_type': 2},
@@ -241,9 +241,9 @@
     {'end': 53, 'start': 15},
     {'attributes': {}, 'end': 57, 'start': 53, 'tag': u'p', 'tag_type': 2},
     {'attributes' : {}, 'end': 65, 'start': 57, 'tag': u'script', 'tag_type': 1},
-    {'end': 76, 'start': 65},
+    {'end': 76, 'start': 65, 'is_text_content': False},
     {'attributes' : {}, 'end': 85, 'start': 76, 'tag': u'script', 'tag_type': 2},
-    {'end': 99, 'start': 85},
+    {'end': 99, 'start': 85, 'is_text_content': False},
 ]
 
 PAGE8 = u"""<a href="/overview.asp?id=277"><img border="0" src="/img/5200814311.jpg" title=\'Vinyl Cornice\'</a></td><table width=\'5\'>"""