diff --git a/leanpkg.toml b/leanpkg.toml index f257e89..798370e 100644 --- a/leanpkg.toml +++ b/leanpkg.toml @@ -5,4 +5,4 @@ lean_version = "leanprover-community/lean:3.26.0" path = "src" [dependencies] -mathlib = {git = "https://github.com/leanprover-community/mathlib", rev = "97f89af64791b5149678fae75ca75fc44912da42"} +mathlib = {git = "https://github.com/leanprover-community/mathlib", rev = "5b579a2c3c3706e518dae73630c9397a5f7d900f"} diff --git a/mistletoe_renderer.py b/mistletoe_renderer.py index 0dac23a..9ecad65 100644 --- a/mistletoe_renderer.py +++ b/mistletoe_renderer.py @@ -3,7 +3,7 @@ mistletoe to generate HTML from markdown. Extra features include: -- Library note links +- Linkifying raw URLs - Managing LaTeX so that MathJax will be able to process it in the browser - Syntax highlighting with Pygments """ @@ -16,29 +16,33 @@ from mathjax_editing import remove_math, replace_math - -class NoteLink(span_token.SpanToken): +class RawUrl(span_token.SpanToken): """ - Detect library note links + Detect raw URLs. """ parse_inner = False - pattern = re.compile(r'Note \[(.*)\]', re.I) + # regex to extract raw URLs from Markdown from: + # https://github.com/trentm/python-markdown2/wiki/link-patterns#converting-links-into-links-automatically + pattern = re.compile( + r'((([A-Za-z]{3,9}:(?:\/\/)?)' # scheme + r'(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+(:\[0-9]+)?' # user@hostname:port + r'|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)' # www.|user@hostname + r'((?:\/[\+~%\/\.\w\-]*)?' # path + r'\??(?:[\-\+=&;%@\.\w]*)' # query parameters + r'#?(?:[\.\!\/\\\w\-]*))?)' # fragment + r'(?![^<]*?(?:<\/\w+>|\/?>))' # ignore anchor HTML tags + r'(?![^\(]*?\))' # ignore links in brackets (Markdown links and images) + ) def __init__(self, match): - self.body = match.group(0) - self.note = match.group(1) - + self.url = match.group(1) class CustomHTMLRenderer(HTMLRenderer): """ - Call the constructor with `site_root`. - The main rendering function is `render_md`. """ - - def __init__(self, site_root): - self.site_root = site_root - super().__init__(NoteLink) + def __init__(self): + super().__init__(RawUrl) def render_md(self, ds): """ @@ -105,8 +109,8 @@ def render_block_code(self, token): lexer = get_lexer('text') return highlight(code, lexer, self.formatter) - def render_note_link(self, token): + def render_raw_url(self, token): """ - Render library note links + Linkify raw URLs. """ - return f'{token.body}' + return f'{token.url}' diff --git a/print_docs.py b/print_docs.py index a20728d..0ad8709 100644 --- a/print_docs.py +++ b/print_docs.py @@ -3,7 +3,7 @@ Run using ./gen_docs unless debugging Example standalone usage for local testing (requires export.json): -$ python3 print_docs.py -r "_target/deps/mathlib" -w "/" +$ python3 print_docs.py -r "_target/deps/mathlib" -w "/" -l """ import json @@ -21,13 +21,15 @@ from urllib.parse import quote from functools import reduce import textwrap -from collections import defaultdict +from collections import Counter, defaultdict, namedtuple from pathlib import Path -from typing import NamedTuple, List +from typing import NamedTuple, List, Optional import sys from mistletoe_renderer import CustomHTMLRenderer - +import pybtex.database +from pybtex.style.labels.alpha import LabelStyle +from pylatexenc.latex2text import LatexNodes2Text import networkx as nx root = os.getcwd() @@ -79,6 +81,67 @@ # root directory of mathlib. local_lean_root = os.path.join(root, cl_args.r if cl_args.r else '_target/deps/mathlib') + '/' +bib = pybtex.database.parse_file(f'{local_lean_root}docs/references.bib') + +label_style = LabelStyle() +# cf. LabelStyle.format_labels in pybtex.style.labels.alpha: +# label_style.format_label generates a label from author(s) + year +# counted tracks the total number of times a label appears +counted = Counter() + +latexnodes2text = LatexNodes2Text() +def clean_tex(src: str) -> str: + return latexnodes2text.latex_to_text(src) + +for key, data in bib.entries.items(): + for author in data.persons['author']: + # turn LaTeX special characters to Unicode, + # since format_label does not correctly abbreviate names containing LaTeX + author.last_names = list(map(clean_tex, author.last_names)) + label = label_style.format_label(data) + counted.update([label]) + data.alpha_label = label +# count tracks the number of times a duplicate label has been finalized +count = Counter() + +for key, data in bib.entries.items(): + label = data.alpha_label + # Finalize duplicate labels by appending 'a', 'b', 'c', etc. + # Currently the ordering is determined by `docs/references.bib` + if counted[label] > 1: + data.alpha_label += chr(ord('a') + count[label]) + count.update([label]) + + url = None + if 'link' in data.fields: + url = data.fields['link'][5:-1] + elif 'url' in data.fields: + url = data.fields['url'] + elif 'eprint' in data.fields: + eprint = data.fields['eprint'] + if eprint.startswith('arXiv:'): + url = 'https://arxiv.org/abs/'+eprint[6:] + elif (('archivePrefix' in data.fields and data.fields['archivePrefix'] == 'arXiv') or + ('eprinttype' in data.fields and data.fields['eprinttype'] == 'arXiv')): + url = 'https://arxiv.org/abs/'+eprint + else: + url = eprint + # else: + # raise ValueError(f"Couldn't find a url for bib item {key}") + if url: + if url.startswith(r'\url'): + url = url[4:].strip('{}') + url = url.replace(r'\_', '_') + + if 'journal' in data.fields and data.fields['journal'] != 'CoRR': + journal = data.fields['journal'] + elif 'booktitle' in data.fields: + journal = data.fields['booktitle'] + else: + journal = None + data.fields['url'] = url + data.fields['journal'] = journal + data.backrefs = [] with open('leanpkg.toml') as f: parsed_toml = toml.loads(f.read()) @@ -155,7 +218,7 @@ def url(self): env.globals['lean_commit'] = lean_commit env.globals['site_root'] = site_root -markdown_renderer = CustomHTMLRenderer(site_root) +markdown_renderer = CustomHTMLRenderer() def convert_markdown(ds): return markdown_renderer.render_md(ds) @@ -294,16 +357,69 @@ def go(f): return go(['n', f]) -def linkify_markdown(string, loc_map): - def linkify_type(string): +# store number of backref anchors and notes in each file +num_backrefs = defaultdict(int) +num_notes = defaultdict(int) + +def linkify_markdown(string: str, loc_map) -> str: + def linkify_type(string: str): splitstr = re.split(r'([\s\[\]\(\)\{\}])', string) tks = map(lambda s: linkify(s, loc_map), splitstr) return "".join(tks) + def backref_title(filename: str): + parts = filename.split('/') + # drop .html + parts[-1] = parts[-1].split('.')[0] + return f'{current_project}: {".".join(parts)}' + + def note_backref(key: str) -> str: + num_notes[current_filename] += 1 + backref_id = f'noteref{num_notes[current_filename]}' + if current_project and current_project != 'test': + global_notes[key].backrefs.append( + (current_filename, backref_id, backref_title(current_filename)) + ) + return backref_id + def bib_backref(key: str) -> str: + num_backrefs[current_filename] += 1 + backref_id = f'backref{num_backrefs[current_filename]}' + if current_project and current_project != 'test': + bib.entries[key].backrefs.append( + (current_filename, backref_id, backref_title(current_filename)) + ) + return backref_id + + def linkify_note(body: str, note: str) -> str: + if note in global_notes: + return f'{body}' + return body + def linkify_named_ref(body: str, name: str, key: str) -> str: + if key in bib.entries: + alpha_label = bib.entries[key].alpha_label + return f'{name}' + return body + def linkify_standalone_ref(body: str, key: str) -> str: + if key in bib.entries: + alpha_label = bib.entries[key].alpha_label + return f'[{alpha_label}]' + return body + + # notes + string = re.compile(r'Note \[(.*)\]', re.I).sub( + lambda p: linkify_note(p.group(0), p.group(1)), string) + # inline declaration names string = re.sub(r'([^<]+)', - lambda p: '{}'.format(linkify_type(p.group(1))), string) + lambda p: f'{linkify_type(p.group(1))}', string) + # declaration names in highlighted Lean code snippets string = re.sub(r'([^<]+)', - lambda p: '{}'.format(linkify_type(p.group(1))), string) + lambda p: f'{linkify_type(p.group(1))}', string) + # references (don't match if there are illegal characters for a BibTeX key, + # cf. https://tex.stackexchange.com/a/408548) + string = re.sub(r'\[([^\]]+)\]\s*\[([^{ },~#%\\]+)\]', + lambda p: linkify_named_ref(p.group(0), p.group(1), p.group(2)), string) + string = re.sub(r'\[([^{ },~#%\\]+)\]', + lambda p: linkify_standalone_ref(p.group(0), p.group(1)), string) return string def plaintext_summary(markdown, max_chars = 200): @@ -423,25 +539,37 @@ def setup_jinja_globals(file_map, loc_map, instances): env.filters['convert_markdown'] = lambda x: linkify_markdown(convert_markdown(x), loc_map) # TODO: this is probably very broken env.filters['link_to_decl'] = lambda x: link_to_decl(x, loc_map) env.filters['plaintext_summary'] = lambda x: plaintext_summary(x) - + env.filters['tex'] = lambda x: clean_tex(x) + +# stores the full filename of the markdown being rendered +current_filename: Optional[str] = None +# stores the project of the file, e.g. "mathlib", "core", etc. +current_project: Optional[str] = None +global_notes = {} +GlobalNote = namedtuple('GlobalNote', ['md', 'backrefs']) def write_html_files(partition, loc_map, notes, mod_docs, instances, tactic_docs): + global current_filename, current_project + for note_name, note_markdown in notes: + global_notes[note_name] = GlobalNote(note_markdown, []) + with open_outfile('index.html') as out: + current_filename = 'index.html' + current_project = None out.write(env.get_template('index.j2').render( active_path='')) with open_outfile('404.html') as out: + current_filename = '404.html' + current_project = None out.write(env.get_template('404.j2').render( active_path='')) - with open_outfile('notes.html') as out: - out.write(env.get_template('notes.j2').render( - active_path='', - notes = sorted(notes, key = lambda n: n[0]))) - kinds = [('tactic', 'tactics'), ('command', 'commands'), ('hole_command', 'hole_commands'), ('attribute', 'attributes')] + current_project = 'docs' for (kind, filename) in kinds: entries = [e for e in tactic_docs if e['category'] == kind] with open_outfile(filename + '.html') as out: + current_filename = filename + '.html' out.write(env.get_template(filename + '.j2').render( active_path='', entries = sorted(entries, key = lambda n: n['name']), @@ -450,6 +578,8 @@ def write_html_files(partition, loc_map, notes, mod_docs, instances, tactic_docs for filename, decls in partition.items(): md = mod_docs.get(filename, []) with open_outfile(html_root + filename.url) as out: + current_project = filename.project + current_filename = filename.url out.write(env.get_template('module.j2').render( active_path = filename.url, filename = filename, @@ -457,17 +587,40 @@ def write_html_files(partition, loc_map, notes, mod_docs, instances, tactic_docs decl_names = sorted(d['name'] for d in decls), )) + current_project = 'extra' for (filename, displayname, source, _) in extra_doc_files: + current_filename = filename + '.html' write_pure_md_file(local_lean_root + source, filename + '.html', displayname) + current_project = 'test' for (filename, displayname, source) in test_doc_files: + current_filename = filename + '.html' write_pure_md_file(source, filename + '.html', displayname) + # generate notes.html and references.html last + # so that we can add backrefs + with open_outfile('notes.html') as out: + current_project = 'docs' + current_filename = 'notes.html' + out.write(env.get_template('notes.j2').render( + active_path='', + notes = sorted(global_notes.items(), key = lambda n: n[0]))) + + with open_outfile('references.html') as out: + current_project = 'docs' + current_filename = 'references.html' + out.write(env.get_template('references.j2').render( + active_path='', + entries = sorted(bib.entries.items(), key = lambda e: e[1].alpha_label))) + + current_project = None + current_filename = None + def write_site_map(partition): with open_outfile('sitemap.txt') as out: for filename in partition: out.write(site_root + filename.url + '\n') - for n in ['index', 'tactics', 'commands', 'hole_commands', 'notes']: + for n in ['index', 'tactics', 'commands', 'hole_commands', 'notes', 'references']: out.write(site_root + n + '.html\n') for (filename, _, _, _) in extra_doc_files: out.write(site_root + filename + '.html\n') @@ -505,8 +658,8 @@ def cp(a, b): cp('nav.js', path+'nav.js') cp('searchWorker.js', path+'searchWorker.js') -def copy_yaml_files(path): - for fn in ['100.yaml', 'undergrad.yaml', 'overview.yaml']: +def copy_yaml_bib_files(path): + for fn in ['100.yaml', 'undergrad.yaml', 'overview.yaml', 'references.bib']: shutil.copyfile(f'{local_lean_root}docs/{fn}', path+fn) def copy_static_files(path): @@ -551,7 +704,7 @@ def main(): write_html_files(file_map, loc_map, notes, mod_docs, instances, tactic_docs) write_redirects(loc_map, file_map) copy_css(html_root, use_symlinks=cl_args.l) - copy_yaml_files(html_root) + copy_yaml_bib_files(html_root) copy_static_files(html_root) write_export_db(mk_export_db(loc_map, file_map)) write_site_map(file_map) diff --git a/requirements.txt b/requirements.txt index 4d406d5..243ba67 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,8 @@ mathlibtools pygments >= 2.7.1 jinja2 networkx +pybtex>=0.22.2 +latexcodec>=2.0.0 +PyYAML>=5.3.1 +six>=1.15.0 +pylatexenc>=2.4 diff --git a/style.css b/style.css index f578c38..5743e2f 100644 --- a/style.css +++ b/style.css @@ -314,16 +314,35 @@ nav { } /* Make `#id` links appear below header. */ -.decl::before, h1[id]::before, h2[id]::before, h3[id]::before, - h4[id]::before, h5[id]::before, h6[id]::before, a[id]::before { - content: ""; - display: block; - height: var(--fragment-offset); - margin-top: calc(-1 * var(--fragment-offset)); - box-sizing: inherit; - visibility: hidden; +.decl, h1[id], h2[id], h3[id], h4[id], h5[id], h6[id] { + scroll-margin-top: var(--fragment-offset); +} +/* don't need as much vertical space for these +inline elements */ +a[id], li[id] { + scroll-margin-top: var(--header-height); +} + +/* HACK: Safari doesn't support scroll-margin-top for +fragment links (yet?) +https://caniuse.com/mdn-css_properties_scroll-margin-top +https://bugs.webkit.org/show_bug.cgi?id=189265 +*/ +@supports not (scroll-margin-top: var(--fragment-offset)) { + .decl::before, h1[id]::before, h2[id]::before, h3[id]::before, + h4[id]::before, h5[id]::before, h6[id]::before, + a[id]::before, li[id]::before { + content: ""; + display: block; + height: var(--fragment-offset); + margin-top: calc(-1 * var(--fragment-offset)); + box-sizing: inherit; + visibility: hidden; + width: 1px; + } } + /* hide # after markdown headings except on hover */ .markdown-heading:not(:hover) > .hover-link { visibility: hidden; diff --git a/templates/attributes.j2 b/templates/attributes.j2 index 345e218..bbdd59f 100644 --- a/templates/attributes.j2 +++ b/templates/attributes.j2 @@ -3,7 +3,7 @@ {% block title %}Attributes{% endblock %} {%- block metadesc -%} - {{ "Attributes are a tool for associating information with declarations. + {{ "Attributes are a tool for associating information with declarations. In the simplest case, an attribute is a tag that can be applied to a declaration. simp is a common example of this." | plaintext_summary }} {%- endblock -%} @@ -29,6 +29,6 @@ attribute [attr_name] decl_name_1 decl_name_2 decl_name 3 ``` The core API for creating and using attributes can be found in -[core.init.meta.attribute](core/init/meta/attribute.html). +[init.meta.attribute](init/meta/attribute.html). {% endfilter %} -{% endblock %} \ No newline at end of file +{% endblock %} diff --git a/templates/navbar.j2 b/templates/navbar.j2 index 7746530..7ad0d80 100644 --- a/templates/navbar.j2 +++ b/templates/navbar.j2 @@ -5,6 +5,7 @@ +

Additional documentation

{% for (filename, displayname, _, community_site_url) in extra_doc_files %} @@ -31,4 +32,4 @@ {{ item.name }} {%- endif %} -{%- endfor %} \ No newline at end of file +{%- endfor %} diff --git a/templates/notes.j2 b/templates/notes.j2 index a012b46..0ddb64b 100644 --- a/templates/notes.j2 +++ b/templates/notes.j2 @@ -14,10 +14,16 @@

Various implementation details are noted in the mathlib source, and referenced later on. We collect these notes here.

-{% for note_name, note_markdown in notes %} +{% for note_name, note_data in notes %}

{{note_name}}

- {{ note_markdown | convert_markdown }} + {{ note_data.md | convert_markdown }} + + {% if note_data.backrefs | length > 0 %} + referenced by: {% for filename, backref_id, title in note_data.backrefs %} + [{{ loop.index }}] + {% endfor %} + {% endif %}
{% endfor %} {% endblock %} diff --git a/templates/references.j2 b/templates/references.j2 new file mode 100644 index 0000000..77a80d8 --- /dev/null +++ b/templates/references.j2 @@ -0,0 +1,38 @@ +{% extends "base.j2" %} +{% block title %}references{% endblock %} + +{%- block metadesc -%} + {{ "Works referenced in mathlib doc strings are listed on this page." | plaintext_summary }} +{%- endblock -%} + +{% block content %} +
+ +

References

+ +

Works cited in mathlib are collected on this page. This is generated from the BibTeX file docs/references.bib (view on GitHub).

+ + +{% endblock %} + +{% block internal_nav %} +

References

+{% for _, entry in entries %} + +{% endfor %} + +
+{% endblock %}