]+class="adoc-h2"[^>]*id=")([^"]+)("[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n[[\\2]]\n=== \\4\n\n++++\n', root_string, flags=re.S) - root_string = re.sub(r'(
]+class="adoc-h3"[^>]*id=")([^"]+)("[^>]*>\s*)(.*?)(<\/p>)', '\n++++\n\n[[\\2]]\n==== \\4\n\n++++\n', root_string, flags=re.S)
- root_string = root_string + "\n++++\n"
- except Exception as e:
- exc_type, exc_obj, exc_tb = sys.exc_info()
- print("ERROR: ", e, exc_tb.tb_lineno)
- return root_string
-
-def decrease_heading_levels(adoc):
- try:
- adoc = re.sub(r"\n==", "\n=", adoc, flags=re.S)
- except Exception as e:
- exc_type, exc_obj, exc_tb = sys.exc_info()
- print("ERROR: ", e, exc_tb.tb_lineno)
- return adoc
-
-def traverse_subitems(subitems, toc_list):
- for item in subitems:
- if "html" in item:
- toc_list.append(item["html"])
- if "subitems" in item:
- toc_list = traverse_subitems(item["subitems"], toc_list)
- return toc_list
-
-def parse_toc(h_json, toc_list):
- try:
- for item in h_json:
- if "filename" in item:
- toc_list.append(item["filename"])
- elif "subitems" in item:
- toc_list = traverse_subitems(item["subitems"], toc_list)
- except Exception as e:
- exc_type, exc_obj, exc_tb = sys.exc_info()
- print("ERROR: ", e, exc_tb.tb_lineno)
- return h_json, toc_list
-
-def parse_header(header_path):
- h_json = [
- { 'group_id': 'index_doxygen', 'name': 'Introduction', 'description': 'An introduction to the Pico SDK', 'html': 'index_doxygen.html', 'subitems': [] }
- ]
- try:
- with open(header_path) as h:
- content = h.read()
- blocks = re.findall(r"^(\s*)(\*|\/\*\*)(\s*)(\s)(\*)(\s)(\\)(defgroup)([^}]*)(\@\})", content, re.M)
- for (a, b, c, d, e, f, g, h, i, j) in blocks:
- items = i.split(r"\defgroup")
- group_id = None
- for item in items:
- if group_id is None: # must be the first item in the list
- m = re.match(r"(\s*)(\S*)(\s*)([^*]*)(.*?)(@\{)", item, re.S)
- group_id = m.group(2)
- group_filename = "group_"+group_id+".html"
- group_filename = re.sub("_", "__", group_filename)
- group_name = m.group(4)
- group_name = re.sub(r"\s*$", "", group_name, re.M)
- group_desc = m.group(5)
- group_desc = re.sub(r"\n", "", group_desc, re.M)
- group_desc = re.sub(r"\*", "", group_desc, re.M)
- group_desc = re.sub(r"^\s", "", group_desc, re.M)
- group_json = { 'group_id': group_id, 'name': group_name, 'description': group_desc, 'html': group_filename, 'subitems': [] }
- h_json.append(group_json)
- else:
- cleaned = item
- cleaned = re.sub(r"\n*", "", cleaned, re.M)
- cleaned = re.sub(r"^\s*", "", cleaned, re.M)
- cleaned = re.sub(r"\s*\*\s*$", "", cleaned, re.M)
- val = cleaned.split(" ")[0]
- filename = re.sub("_", "__", val)
- filename = "group__" + filename
- group_json['subitems'].append({ 'name': val, 'file': filename + ".adoc", 'html': filename + ".html", 'subitems': [] })
- except Exception as e:
- exc_type, exc_obj, exc_tb = sys.exc_info()
- print("ERROR: ", e, exc_tb.tb_lineno)
- return h_json
-
-def compile_json_mappings(json_dir, json_files):
- try:
- compiled = []
- skip = ["table_memname.json"]
- for json_file in sorted(json_files):
- if json_file not in skip:
- # read the json
- file_path = os.path.join(json_dir, json_file)
- with open(file_path) as f:
- data = json.load(f)
- compiled.append(data)
- except Exception as e:
- exc_type, exc_obj, exc_tb = sys.exc_info()
- print("ERROR: ", e, exc_tb.tb_lineno)
- return compiled
-
-def compile_includes(my_adoc, output_path, subitems):
- try:
- for item in subitems:
- # append includes directly to the parent file
- adoc_filename = item["file"]
- full_adoc_path = os.path.join(output_path, adoc_filename)
- # read the adoc
- included_content = ""
- with open(full_adoc_path) as f:
- included_content = f.read()
- my_adoc += "\n\n"
- my_adoc += included_content
- if "subitems" in item and len(item["subitems"]) > 0:
- my_adoc = compile_includes(my_adoc, output_path, item["subitems"])
- os.remove(full_adoc_path)
- except Exception as e:
- exc_type, exc_obj, exc_tb = sys.exc_info()
- print("ERROR: ", e, exc_tb.tb_lineno)
- return my_adoc
-
-def walk_json(item, group_adoc, output_path):
- try:
- filename = item["file"]
- group_adoc = group_adoc + "include::" + filename + "[]\n\n"
- if "subitems" in item and len(item["subitems"]) > 0:
- # compile includes into a single file
- my_adoc = ""
- my_adoc_path = os.path.join(output_path, filename)
- with open(my_adoc_path) as f:
- my_adoc = f.read()
- my_adoc = compile_includes(my_adoc, output_path, item["subitems"])
- # write the new file
- write_output(my_adoc_path, my_adoc)
- except Exception as e:
- exc_type, exc_obj, exc_tb = sys.exc_info()
- print("ERROR: ", e, exc_tb.tb_lineno)
- return group_adoc
-
-def walk_nested_adoc(item, output_path, level):
- try:
- # only adjust nested items
- if level > 1:
- # read the adoc file
- # not all items in the json have an adoc path
- adoc_path = re.sub(".html$", ".adoc", item["html"])
- filepath = os.path.join(output_path, adoc_path)
- with open(filepath) as f:
- content = f.read()
- subs = "="
- for i in range(level-1):
- subs = subs + "="
- content = re.sub("^=", subs, content, flags=re.M)
- write_output(filepath, content)
- # adjust the heading levels
- if "subitems" in item:
- for subitem in item["subitems"]:
- newlevel = level + 1
- newlevel = walk_nested_adoc(subitem, output_path, newlevel)
- except Exception as e:
- exc_type, exc_obj, exc_tb = sys.exc_info()
- print("ERROR: ", e, exc_tb.tb_lineno)
- return level
-
-def find_toc_item(subitems, path, parent_tree):
- try:
- val = None
- original_tree = parent_tree.copy()
- for ix, item in enumerate(subitems):
- if val is None:
- parent_tree.append(ix)
- if "html" in item and item["html"] == path:
- val = item
- elif "subitems" in item:
- val, parent_tree = find_toc_item(item["subitems"], path, parent_tree)
- if val is None:
- parent_tree = original_tree.copy()
- except Exception as e:
- exc_type, exc_obj, exc_tb = sys.exc_info()
- print("ERROR: ", e, exc_tb.tb_lineno)
- return val, parent_tree
-
-def check_toc_level(h_json, html_file, root):
- try:
- # check for the Modules table
- tables = root.xpath(".//table[@class='memberdecls' and ./tr/td/h2[contains(text(),'Modules')]]")
- if len(tables) > 0:
- table = tables[0]
- modules = table.xpath(".//tr[contains(@class, 'memitem:')]//a")
- modules = [f.get("href") for f in modules]
- # also collect this file's parents
- header = root.find(".//div[@class='headertitle']")
- outer_parents = []
- if header is not None:
- h_parents = header.findall(".//div[@class='ingroups']/a")
- for h_item in h_parents:
- outer_parents.append(h_item.get("href"))
- outer_parents.append(html_file)
-
- # first check the outer parents to find our starting point
- level = h_json
- for ix, parent in enumerate(outer_parents):
- #for toc_item in level:
- val, parent_tree = find_toc_item(level, parent, [])
- if val is not None:
- for n in parent_tree:
- level = level[n]
- if "subitems" not in level:
- level["subitems"] = []
- level = level["subitems"]
- # create each toc level as needed
- elif ix > 0:
- new_subitem = {'name': re.sub(".html", "", parent), 'file': re.sub(".html", ".adoc", parent), 'html': parent, 'subitems': []}
- level.append(new_subitem)
- level = new_subitem["subitems"]
-
- # then check all the modules
- for ix, module in enumerate(modules):
- found = False
- for toc_item in level:
- if "html" in toc_item and toc_item["html"] == module:
- found = True
- break
- if found == False:
- level.append({'name': re.sub(".html", "", module), 'file': re.sub(".html", ".adoc", module), 'html': module, 'subitems': []})
- except Exception as e:
- exc_type, exc_obj, exc_tb = sys.exc_info()
- print("ERROR: ", e, exc_tb.tb_lineno)
- return h_json
-
-def parse_individual_file(html_path, html_file, complete_json_mappings, updated_links, h_json):
- try:
- # create the full path
- this_path = os.path.join(html_path, html_file)
- # read the input root
- with open(this_path) as h:
- html_content = h.read()
- html_content = re.sub(r'<\!DOCTYPE html PUBLIC "-\/\/W3C\/\/DTD XHTML 1\.0 Transitional\/\/EN" "https:\/\/www\.w3\.org\/TR\/xhtml1\/DTD\/xhtml1-transitional\.dtd">', '', html_content)
- html_content = re.sub('rel="stylesheet">', 'rel="stylesheet"/>', html_content)
- html_content = re.sub('&display=swap"', '"', html_content)
- html_content = re.sub(r'', '', html_content)
- html_content = re.sub(r'
', '', html_content)
- html_content = re.sub(r"<\!-- HTML header for doxygen \S*?-->", '', html_content)
- html_content = re.sub(' xmlns="http://www.w3.org/1999/xhtml"', '', html_content)
- root = etree.HTML(html_content)
-
- # give everything an id
- root = add_ids(root, html_file)
- # first check to see if this should be in the toc list
- h_json = check_toc_level(h_json, html_file, root)
- # loop over each json file
- skip = ["table_memname.json"]
- for mapping in complete_json_mappings:
- for item in mapping:
- root = transform_element(item, root)
- # fix links
- root, updated_links = fix_internal_links(root, html_file, updated_links)
- # cleanup
- root = merge_lists("ul", root)
- root = merge_lists("ol", root)
- root = wrap_list_items(root)
- # combine multi-para notes into one container
- root = merge_note_paras(root)
- # add some extra items to help with the adoc conversion
- root = prep_for_adoc(root)
- # fix some heading levels
- root = fix_heading_levels(root)
- root = fix_duplicate_ids(root, html_file)
- # cleanup
- root = strip_attribute("data-processed", root)
- # get the document title
- title_text = get_document_title(root, html_file)
- # get only the relevant content
- contents = root.find(".//div[@class='contents']")
- if contents is not None:
- # prep and write the adoc
- final_output = stringify(contents)
- adoc = make_adoc(final_output, title_text, html_file)
- else:
- adoc = None
- except Exception as e:
- exc_type, exc_obj, exc_tb = sys.exc_info()
- print("ERROR: ", e, exc_tb.tb_lineno)
- return adoc, h_json
-
-def handler(html_path, output_path, header_path, output_json):
- try:
- dir_path = os.path.dirname(os.path.realpath(__file__))
- json_dir = os.path.join(dir_path, "doxygen_json_mappings")
- html_dir = os.path.realpath(html_path)
- output_dir = os.path.realpath(output_path)
- # get the file order and groupings
- h_json = parse_header(header_path)
- # read the json transform mappings:
- # get all the json files within a specified directory
- json_files = os.listdir(json_dir)
- # filter for just json files
- json_files = [f for f in json_files if re.search(".json", f) is not None]
- complete_json_mappings = compile_json_mappings(json_dir, json_files)
- # get a list of all the html files
- html_files = os.listdir(html_dir)
- html_files = [f for f in html_files if re.search(".html", f) is not None]
- # sort the files ascending
- html_files.sort()
- # process every html file
- updated_links = {}
-
- for html_file in html_files:
- this_output_path = os.path.join(output_path, html_file)
- # parse the file
- adoc, h_json = parse_individual_file(html_path, html_file, complete_json_mappings, updated_links, h_json)
- # write the final adoc file
- adoc_path = re.sub(".html$", ".adoc", this_output_path)
- if adoc is not None:
- write_output(adoc_path, adoc)
- print("Generated " + adoc_path)
- else:
- print("--------- SKIPPED " + adoc_path)
-
- toc_list = []
- toc_list = parse_toc(h_json, toc_list)
-
- # adjust nested adoc headings
- for item in h_json:
- level = 0
- # walk the tree and adjust as necessary
- level = walk_nested_adoc(item, output_path, level)
-
- # fix any links that were updated from other files
- adoc_files = os.listdir(output_path)
- adoc_files = [f for f in adoc_files if re.search(".adoc", f) is not None]
- for adoc_file in adoc_files:
- this_path = os.path.join(output_path, adoc_file)
- with open(this_path) as h:
- content = h.read()
- # fix links
- content = fix_external_links(content, h_json)
- # fix heading levels for non-included pages
- src_html_file = re.sub(".adoc", ".html", adoc_file)
- if src_html_file not in toc_list:
- adoc = decrease_heading_levels(adoc)
- for link in updated_links:
- content = re.sub(link, updated_links[link], content)
- write_output(this_path, content)
-
- # make the group adoc files
- # include::micropython/what-board.adoc[]
- for item in h_json:
- group_adoc = "= " + item['name'] + "\n\n"
- group_adoc = group_adoc + item['description'] + "\n\n"
- if 'html' in item:
- item_filename = item['html']
- for toc_item in item["subitems"]:
- group_adoc = walk_json(toc_item,group_adoc,output_path)
- group_output_path = os.path.join(output_path, item["group_id"] + ".adoc")
- write_output(group_output_path, group_adoc)
- # write the json structure file as well
- write_output(output_json, json.dumps(h_json, indent="\t"))
- except Exception as e:
- exc_type, exc_obj, exc_tb = sys.exc_info()
- print("ERROR: ", e, exc_tb.tb_lineno)
- return
-
-if __name__ == "__main__":
- html_path = sys.argv[1]
- output_path = sys.argv[2]
- header_path = sys.argv[3]
- output_json = sys.argv[4]
- handler(html_path, output_path, header_path, output_json)