From 64a80c7b74360d1794a3945c203606ed8e848943 Mon Sep 17 00:00:00 2001 From: Andrei Varnavskiy Date: Wed, 20 Feb 2013 10:18:28 +0400 Subject: [PATCH] Header tags added in getFormattedText --- goose/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/goose/parsers.py b/goose/parsers.py index 3ccb9e5c..4ce2a8c6 100644 --- a/goose/parsers.py +++ b/goose/parsers.py @@ -165,7 +165,7 @@ def getText(self, node): @classmethod def getFormattedText(self, node): - pars = node.cssselect('p') + pars = node.cssselect('h1,h2,h3,h4,h5,p') for p in pars: if p.text is not None: p.text = u'\ufffc ' + p.text return Parser.getText(node)