# Generate Table of Contents for a Jupyter Notebook

This notebook generates a table of contents for a specified Notebook in hyperlinked markdown format, that can be pasted at the top of the Notebook for easier navigation. 

In [87]:
# Import needed libraries
import json
import os
import re
from itertools import chain

sep = os.path.sep

In [88]:
# ACTION: specify Notebook

inputNotebook = "generic-results-analysis.ipynb"

In [89]:
# Extract markdown from Notebook

with open(inputNotebook) as f:
    json_dict = json.load(f)

src_list = []

for cell in json_dict["cells"]:
    if cell["cell_type"] == "markdown":
        src_list.append(cell["source"])

print("Loading {}, found {} MD cells."
      .format(inputNotebook.split(sep)[-1], len(src_list)))

markdownCells = list(chain.from_iterable(src_list))

Loading generic-results-analysis.ipynb, found 56 MD cells.


In [90]:
# Extract heading data

output = list()
is_code = False
for line in markdownCells:
    # ignore code block in MarkDown
    if line[:3] == "```":
        is_code = not(is_code)
    if line[0] == "#" and not(is_code) and line != "\n":
        output.append(line)

print("Found {} markdown headers".format(len(output)))

markdownHeaders = output

Found 50 markdown headers


In [92]:
# Regex operations to create TOC with links and indentation

tocText = ""

for line in markdownHeaders:
    line = line.rstrip().replace("\t", "")
    # Calculate indentations for each line (number of # signs -1)
    indent = len(re.findall("#", line))-1
    # Remove markdown headers (# signs) and initial space from each line
    line = re.sub(r"#+ ", r"", line)
    # Generate a Markdown-compatible hyperlink with hyphens instead of spaces
    hyperlink = re.sub(r" ", r"-", line)
    # Put it all together
    line = "\t" * indent + f"- [{line}]" + f"(#{hyperlink})"
    if line != "":
        tocText += line + "\n"

In [93]:
# Output the TOC - note: you may want to delete the first few entries up to the TOC itself

print(tocText)

- [Analysis of Quotation Data Generated from JSTOR Dataset](#Analysis-of-Quotation-Data-Generated-from-JSTOR-Dataset)
- [Table of contents](#Table-of-contents)
- [Terminology](#Terminology)
- [Initial setup](#Initial-setup)
- [Basic information about results](#Basic-information-about-results)
- [Information about historical range](#Information-about-historical-range)
- [Questions about distribution of quotations across source text](#Questions-about-distribution-of-quotations-across-source-text)
	- [How many items have quoted from each chunk of the text?](#How-many-items-have-quoted-from-each-chunk-of-the-text?)
	- [How many quotations were detected from each chunk of the text?](#How-many-quotations-were-detected-from-each-chunk-of-the-text?)
	- [How many quoted words were detected from each chunk of the text?](#How-many-quoted-words-were-detected-from-each-chunk-of-the-text?)
	- [How does the number of items quoting each chunk change over the decades?](#How-does-the-number-of-items-quo