From 4018ef3d0d4fd8de9d578abf96060652456f799a Mon Sep 17 00:00:00 2001
From: nossbigg <chenggibson@gmail.com>
Date: Sat, 15 Jun 2019 16:09:43 +0800
Subject: [PATCH] Standardize processElement to only emit Paragraph tuples

---
 src/common/config.py          |  2 +-
 src/parsers/contentsParser.py |  9 ++++-----
 src/validators/validators.py  | 10 +++-------
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/src/common/config.py b/src/common/config.py
index 3fef321..e5e305a 100644
--- a/src/common/config.py
+++ b/src/common/config.py
@@ -7,4 +7,4 @@
 ABBREVIATIONS_SAVE_PATH = os.path.join(DATA_SAVE_PATH, "abbreviations.html")
 
 JSON_STORE_PATH = os.path.join(DATA_SAVE_PATH, "ccc.json")
-JSON_STORE_VERSION = '0.0.1'
\ No newline at end of file
+JSON_STORE_VERSION = '0.0.2'
\ No newline at end of file
diff --git a/src/parsers/contentsParser.py b/src/parsers/contentsParser.py
index 32cdcbb..a0961c1 100644
--- a/src/parsers/contentsParser.py
+++ b/src/parsers/contentsParser.py
@@ -17,7 +17,7 @@ def extractStructuredContents(raw_nodes):
 
 def processElement(node):
     if node.name == 'br':
-        return [createSpacerElement()]
+        return [createEmptyParagraph()]
 
     if node.name != 'p':
         return []
@@ -83,11 +83,7 @@ def transformCCCReferenceLine(paragraph):
 
 
 def hasCCCReferenceLine(paragraph):
-    if not isinstance(paragraph, Paragraph):
-        return False
-
     first_element = paragraph.elements[0]
-
     if 'text' not in first_element:
         return False
 
@@ -146,6 +142,9 @@ def createParagraph(node, children):
 
     return Paragraph(children, attrs)
 
+def createEmptyParagraph():
+    return Paragraph([createSpacerElement()], {})
+
 
 def isIndentedParagraph(node):
     style = node.get('style')
diff --git a/src/validators/validators.py b/src/validators/validators.py
index 984423a..4def093 100644
--- a/src/validators/validators.py
+++ b/src/validators/validators.py
@@ -1,15 +1,11 @@
-from parsers.contentsParser import Paragraph
-
-
 def validate_has_all_ccc_refs(page_nodes_dict):
     ccc_refs = {}
 
     for page in page_nodes_dict.values():
         for paragraph in page.paragraphs:
-            if isinstance(paragraph, Paragraph):
-                for element in paragraph.elements:
-                    if element['type'] == 'ref-ccc':
-                        ccc_refs[element['ref_number']] = ''
+            for element in paragraph.elements:
+                if element['type'] == 'ref-ccc':
+                    ccc_refs[element['ref_number']] = ''
 
     expected_num_ccc_refs = 2865
     missing_refs = []