Skip to content
This repository has been archived by the owner on Oct 5, 2021. It is now read-only.

Commit

Permalink
Deal with unexpected CDATA ....
Browse files Browse the repository at this point in the history
meet unexpected content that triggers this:

xml.parsers.expat.ExpatError: unclosed CDATA section: line 183, column 0
  • Loading branch information
foxmask committed Dec 24, 2017
1 parent f39c6cf commit ae6ceb6
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions th_evernote/sanitize.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# coding: utf-8
import re
from tidylib import tidy_document
from xml.parsers.expat import ExpatError
from xml.dom.minidom import parseString


Expand All @@ -11,13 +12,16 @@ def sanitize(html):
document, errors = tidy_document(
html, options={"output-xhtml": 1, "force-output": 1})

parsed_dom = parseString(document)
document_element = parsed_dom.documentElement
remove_prohibited_elements(document_element)
remove_prohibited_attributes(document_element)
body = document_element.getElementsByTagName("body")[0]
body.tagName = "en-note"
return body.toxml()
try:
parsed_dom = parseString(document)
document_element = parsed_dom.documentElement
remove_prohibited_elements(document_element)
remove_prohibited_attributes(document_element)
body = document_element.getElementsByTagName("body")[0]
body.tagName = "en-note"
return body.toxml()
except ExpatError:
return ''


def remove_prohibited_elements(document_element):
Expand Down

0 comments on commit ae6ceb6

Please sign in to comment.