From 61b933cbd7221029af73006322451ff53b84b573 Mon Sep 17 00:00:00 2001 From: chillaranand Date: Mon, 24 Nov 2014 23:31:25 +0530 Subject: [PATCH] extract code, text as list --- nidaba/core/objects.py | 14 ++++++++------ nidaba/core/parser.py | 4 ---- 2 files changed, 8 insertions(+), 10 deletions(-) delete mode 100644 nidaba/core/parser.py diff --git a/nidaba/core/objects.py b/nidaba/core/objects.py index 8f9a740..3e936a9 100644 --- a/nidaba/core/objects.py +++ b/nidaba/core/objects.py @@ -1,6 +1,4 @@ -from pyparsing import makeHTMLTags, SkipTo - -from .parser import strip_tags +from bs4 import BeautifulSoup class Base(object): @@ -72,9 +70,13 @@ def __init__(self, data, answers=None, comments=None): @classmethod def _get_code(cls, html): - code_start, code_end = makeHTMLTags('code') - code = code_start + SkipTo(code_end).setResultsName('body') + code_end - return [token.body for token, start, end in code.scanString(html)] + return [i.get_text() for i in BeautifulSoup(html).find_all('code')] + + @classmethod + def _get_text(cls, html): + soup = BeautifulSoup(html) + [s.extract() for s in soup('code')] + return soup.get_text() class User(object): diff --git a/nidaba/core/parser.py b/nidaba/core/parser.py deleted file mode 100644 index 576d948..0000000 --- a/nidaba/core/parser.py +++ /dev/null @@ -1,4 +0,0 @@ -from bs4 import BeautifulSoup - -def strip_tags(html): - return BeautifulSoup(html).get_text()