diff --git a/app/blog_parser.py b/app/blog_parser.py index 75c198d..c991215 100644 --- a/app/blog_parser.py +++ b/app/blog_parser.py @@ -21,6 +21,7 @@ from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import html +import bleach class Blog_Parser(): @@ -29,25 +30,20 @@ class Blog_Parser(): tags = '' summary_text = '' content = '' - body = '' header = '' + re_input = r'^[\s\S]*?---([\s\S]*?)---([\s\S]*)$' + re_header = r'^[\s\S]*?title:\s*([\w\W]*?)\n[\s\S]*?(category|categories):\s*([\w\W]*?)\n[\s\S]*?tags:\s*[[]([\w\W]*?)[]][\s\S]*?$' + re_body = r'^([\s\S]*?)([\s\S]*?)$' - def __init__(self, input): - self.input = input - self.re_input = r'^[\s\S]*?---([\s\S]*?)---([\s\S]*)$' - self.re_header = r'^[\s\S]*?title:\s*([\w\W]*?)\n[\s\S]*?(category|categories):\s*([\w\W]*?)\n[\s\S]*?tags:\s*[[]([\w\W]*?)[]][\s\S]*?$' - self.re_body = r'^([\s\S]*?)([\s\S]*?)$' - self.parse() - - def parse(self): + def parse(self, input): # 先尝试将输入分成 header 和 body(也许包含ummary)两部分 - m = re.match(self.re_input, self.input) + m = re.match(self.re_input, input) try: self.header = m.group(1).strip() self.body = m.group(2).strip() except: - self.body = self.input + self.body = input raise ParsingError('wrong when parsing input') # 尝试从 header 中提取 title, category,tags m = re.match(self.re_header, self.header) @@ -90,7 +86,18 @@ def block_code(self, code, lang): def parse_markdown(markdown_text): + # html中允许的html标签 + allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code', + 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span', 'table', 'thead', 'tbody', 'tr', 'td', 'th', 'del', 'img', 'div'] + allowed_attrs = { + '*': ['class'], + 'a': ['href', 'rel'], + 'img': ['alt', 'src'], + } # renderer for code highlight renderer = HighlightRenderer() parser = mistune.Markdown(renderer=renderer) - return parser(markdown_text) + return bleach.clean( + parser(markdown_text), + tags=allowed_tags, attributes=allowed_attrs, strip=True) diff --git a/app/models.py b/app/models.py index 752ebbd..591f691 100644 --- a/app/models.py +++ b/app/models.py @@ -6,7 +6,6 @@ from flask import current_app, url_for from datetime import datetime from .blog_parser import parse_markdown -import bleach from .blog_parser import Blog_Parser from .exceptions import ParsingError @@ -161,31 +160,19 @@ class Blog(db.Model): author_id = db.Column(db.Integer, db.ForeignKey('users.id')) # 是否草稿 draft = db.Column(db.Boolean, default=False) - # 当body属性更新时,自动从body中解析出其余各项属性并更新(或新建) @staticmethod def on_changed_body(target, value, oldvalue, initiator): - blog_parser = Blog_Parser(value) - # html中允许的html标签 - allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code', - 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', - 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span', 'table', 'thead', 'tbody', 'tr', 'td', 'th', 'del', 'img', 'div'] - allowed_attrs = { - '*': ['class'], - 'a': ['href', 'rel'], - 'img': ['alt', 'src'], - } + blog_parser = Blog_Parser() # 利用parse函数从value(也就是被改变的body)值中解析出各项属性并赋值 try: - title, category_name, tag_names, summary_text, content = blog_parser.parse() + title, category_name, tag_names, summary_text, content = blog_parser.parse( + value) target.title = title target.summary_text = summary_text target.summary = parse_markdown(summary_text) - target.html = bleach.clean( - parse_markdown(content), - tags=allowed_tags, attributes=allowed_attrs, strip=True) - # target.html = parse_markdown(content) + target.html = parse_markdown(content) target.change_category(Category.generate_category( category_name, target.author_id)) target.change_tags(Tag.generate_tags([t.strip() for t in tag_names.strip( diff --git a/data-dev.sqlite b/data-dev.sqlite index 31ad576..f63e5ce 100644 Binary files a/data-dev.sqlite and b/data-dev.sqlite differ diff --git a/tests/test_blog_model.py b/tests/test_blog_model.py index 6c59523..8410c0c 100644 --- a/tests/test_blog_model.py +++ b/tests/test_blog_model.py @@ -42,24 +42,18 @@ def test_on_changed_body(self): def test_body_parsing_error(self): try: blog = Blog(body='wrong format') - db.session.add(blog) - db.session.commit() self.assertTrue(False) except ParsingError as e: - self.assertTrue(e.args[0] == 'wrong when parsing input') + self.assertTrue(str(e) == 'wrong when parsing input') try: blog = Blog( body='---wrong header---\n\n\n') - db.session.add(blog) - db.session.commit() self.assertTrue(False) except ParsingError as e: - self.assertTrue(e.args[0] == 'wrong when parsing header') + self.assertTrue(str(e) == 'wrong when parsing header') try: blog = Blog( body='---\ntitle: \ncategory: <category>\ntags: [tag1, tag2, tag3]\n---\n<Content>') - db.session.add(blog) - db.session.commit() self.assertTrue(True) except Exception as e: self.assertTrue(False)