Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

references page and links #117

Merged
merged 16 commits into from Feb 22, 2021
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion leanpkg.toml
Expand Up @@ -5,4 +5,4 @@ lean_version = "leanprover-community/lean:3.26.0"
path = "src"

[dependencies]
mathlib = {git = "https://github.com/leanprover-community/mathlib", rev = "97f89af64791b5149678fae75ca75fc44912da42"}
mathlib = {git = "https://github.com/leanprover-community/mathlib", rev = "5b579a2c3c3706e518dae73630c9397a5f7d900f"}
38 changes: 21 additions & 17 deletions mistletoe_renderer.py
Expand Up @@ -3,7 +3,7 @@
mistletoe to generate HTML from markdown.

Extra features include:
- Library note links
- Linkifying raw URLs
- Managing LaTeX so that MathJax will be able to process it in the browser
- Syntax highlighting with Pygments
"""
Expand All @@ -16,29 +16,33 @@

from mathjax_editing import remove_math, replace_math


class NoteLink(span_token.SpanToken):
class RawUrl(span_token.SpanToken):
"""
Detect library note links
Detect raw URLs.
"""
parse_inner = False
pattern = re.compile(r'Note \[(.*)\]', re.I)
# regex to extract raw URLs from Markdown from:
# https://github.com/trentm/python-markdown2/wiki/link-patterns#converting-links-into-links-automatically
pattern = re.compile(
r'((([A-Za-z]{3,9}:(?:\/\/)?)' # scheme
r'(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+(:\[0-9]+)?' # user@hostname:port
r'|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)' # www.|user@hostname
r'((?:\/[\+~%\/\.\w\-]*)?' # path
r'\??(?:[\-\+=&;%@\.\w]*)' # query parameters
r'#?(?:[\.\!\/\\\w\-]*))?)' # fragment
r'(?![^<]*?(?:<\/\w+>|\/?>))' # ignore anchor HTML tags
r'(?![^\(]*?\))' # ignore links in brackets (Markdown links and images)
)

def __init__(self, match):
self.body = match.group(0)
self.note = match.group(1)

self.url = match.group(1)

class CustomHTMLRenderer(HTMLRenderer):
"""
Call the constructor with `site_root`.

The main rendering function is `render_md`.
"""

def __init__(self, site_root):
self.site_root = site_root
super().__init__(NoteLink)
def __init__(self):
super().__init__(RawUrl)

def render_md(self, ds):
"""
Expand Down Expand Up @@ -105,8 +109,8 @@ def render_block_code(self, token):
lexer = get_lexer('text')
return highlight(code, lexer, self.formatter)

def render_note_link(self, token):
def render_raw_url(self, token):
"""
Render library note links
Linkify raw URLs.
"""
return f'<a href="{self.site_root}notes.html#{token.note}">{token.body}</a>'
return f'<a href="{token.url}">{token.url}</a>'
188 changes: 170 additions & 18 deletions print_docs.py
Expand Up @@ -3,7 +3,7 @@
Run using ./gen_docs unless debugging

Example standalone usage for local testing (requires export.json):
$ python3 print_docs.py -r "_target/deps/mathlib" -w "/"
$ python3 print_docs.py -r "_target/deps/mathlib" -w "/" -l

"""
import json
Expand All @@ -21,13 +21,15 @@
from urllib.parse import quote
from functools import reduce
import textwrap
from collections import defaultdict
from collections import Counter, defaultdict
from pathlib import Path
from typing import NamedTuple, List
from typing import NamedTuple, List, Optional
import sys

from mistletoe_renderer import CustomHTMLRenderer

import pybtex.database
from pybtex.style.labels.alpha import LabelStyle
from pylatexenc.latex2text import LatexNodes2Text
import networkx as nx

root = os.getcwd()
Expand Down Expand Up @@ -79,6 +81,63 @@
# root directory of mathlib.
local_lean_root = os.path.join(root, cl_args.r if cl_args.r else '_target/deps/mathlib') + '/'

bib = pybtex.database.parse_file(f'{local_lean_root}docs/references.bib')

label_style = LabelStyle()
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using LabelStyle here is a bit of a hack since I couldn't figure out how to use pybtex's bibliography formatting classes here. (If someone can figure it out, feel free to fix this!)

# cf. LabelStyle.format_labels in pybtex.style.labels.alpha:
# first generate label from author(s) + year
# counted will be the total number of times the label appears
counted = Counter()

latexnodes2text = LatexNodes2Text()
def clean_tex(src: str) -> str:
return latexnodes2text.latex_to_text(src)

for key, data in bib.entries.items():
for author in data.persons['author']:
# turn LaTeX special characters to Unicode,
# since format_label does not correctly abbreviate names containing LaTeX
author.last_names = list(map(clean_tex, author.last_names))
label = label_style.format_label(data)
counted.update([label])
data.fields['alpha_label'] = label
# count will track number of times label has been finalized
count = Counter()

for key, data in bib.entries.items():
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ordering of the bib entries here is whatever it is in mathlib's docs/references.bib. This could potentially lead to strange ordering of the a, b, c that are appended to labels.

label = data.fields['alpha_label']
# If there are duplicates, fix label by appending 'a', 'b', etc.
if counted[label] > 1:
data.fields['alpha_label'] += chr(ord('a') + count[label])
count.update([label])

url = None
if 'link' in data.fields:
url = data.fields['link'][5:-1]
elif 'url' in data.fields:
url = data.fields['url']
elif 'eprint' in data.fields:
eprint = data.fields['eprint']
if eprint.startswith('arXiv:'):
url = 'https://arxiv.org/abs/'+eprint[6:]
else:
url = eprint
# else:
# raise ValueError(f"Couldn't find a url for bib item {key}")
if url:
if url.startswith(r'\url'):
url = url[4:].strip('{}')
url = url.replace(r'\_', '_')

if 'journal' in data.fields and data.fields['journal'] != 'CoRR':
journal = data.fields['journal']
elif 'booktitle' in data.fields:
journal = data.fields['booktitle']
else:
journal = None
data.fields['url'] = url
data.fields['journal'] = journal
data.fields['backrefs'] = []

with open('leanpkg.toml') as f:
parsed_toml = toml.loads(f.read())
Expand Down Expand Up @@ -155,7 +214,7 @@ def url(self):
env.globals['lean_commit'] = lean_commit
env.globals['site_root'] = site_root

markdown_renderer = CustomHTMLRenderer(site_root)
markdown_renderer = CustomHTMLRenderer()

def convert_markdown(ds):
return markdown_renderer.render_md(ds)
Expand Down Expand Up @@ -294,16 +353,70 @@ def go(f):

return go(['n', f])

def linkify_markdown(string, loc_map):
def linkify_type(string):
# store number of backref anchors and notes in each file
num_backrefs = defaultdict(int)
num_notes = defaultdict(int)
note_pattern = re.compile(r'Note \[(.*)\]', re.I)

def linkify_markdown(string: str, loc_map) -> str:
def linkify_type(string: str):
splitstr = re.split(r'([\s\[\]\(\)\{\}])', string)
tks = map(lambda s: linkify(s, loc_map), splitstr)
return "".join(tks)

def backref_title(filename: str):
parts = filename.split('/')
# drop .html
parts[-1] = parts[-1].split('.')[0]
return f'{current_project}: {".".join(parts)}'

def note_backref(key: str) -> str:
num_notes[current_filename] += 1
backref_id = f'noteref{num_notes[current_filename]}'
if current_project and current_project != 'test':
global_notes[key]['backrefs'].append(
(current_filename, backref_id, backref_title(current_filename))
)
return backref_id
def bib_backref(key: str) -> str:
num_backrefs[current_filename] += 1
backref_id = f'backref{num_backrefs[current_filename]}'
if current_project and current_project != 'test':
bib.entries[key].fields['backrefs'].append(
(current_filename, backref_id, backref_title(current_filename))
)
return backref_id

def linkify_note(body: str, note: str) -> str:
if note in global_notes:
return f'<a id="{note_backref(note)}" href="{site_root}notes.html#{note}">{body}</a>'
return body
def linkify_named_ref(body: str, name: str, key: str) -> str:
if key in bib.entries:
alpha_label = bib.entries[key].fields["alpha_label"]
return f'<a id="{bib_backref(key)}" href="{site_root}references.html#{alpha_label}">{name}</a>'
return body
def linkify_standalone_ref(body: str, key: str) -> str:
if key in bib.entries:
alpha_label = bib.entries[key].fields["alpha_label"]
return f'<a id="{bib_backref(key)}" href="{site_root}references.html#{alpha_label}">[{alpha_label}]</a>'
return body

# notes
string = re.sub(note_pattern,
lambda p: linkify_note(p.group(0), p.group(1)), string)
# inline declaration names
string = re.sub(r'<code>([^<]+)</code>',
lambda p: '<code>{}</code>'.format(linkify_type(p.group(1))), string)
lambda p: f'<code>{linkify_type(p.group(1))}</code>', string)
# declaration names in highlighted Lean code snippets
string = re.sub(r'<span class="n">([^<]+)</span>',
lambda p: '<span class="n">{}</span>'.format(linkify_type(p.group(1))), string)
lambda p: f'<span class="n">{linkify_type(p.group(1))}</span>', string)
# references (don't match if there are illegal characters for a BibTeX key,
# cf. https://tex.stackexchange.com/a/408548)
string = re.sub(r'\[([^\]]+)\]\s*\[([^{ },~#%\\]+)\]',
lambda p: linkify_named_ref(p.group(0), p.group(1), p.group(2)), string)
string = re.sub(r'\[([^{ },~#%\\]+)\]',
lambda p: linkify_standalone_ref(p.group(0), p.group(1)), string)
return string

def plaintext_summary(markdown, max_chars = 200):
Expand Down Expand Up @@ -423,25 +536,39 @@ def setup_jinja_globals(file_map, loc_map, instances):
env.filters['convert_markdown'] = lambda x: linkify_markdown(convert_markdown(x), loc_map) # TODO: this is probably very broken
env.filters['link_to_decl'] = lambda x: link_to_decl(x, loc_map)
env.filters['plaintext_summary'] = lambda x: plaintext_summary(x)
env.filters['tex'] = lambda x: clean_tex(x)

# stores the full filename of the markdown being rendered
current_filename: Optional[str] = None
# stores the project of the file, e.g. "mathlib", "core", etc.
current_project: Optional[str] = None
global_notes = {}
def write_html_files(partition, loc_map, notes, mod_docs, instances, tactic_docs):
global current_filename, current_project
for note_name, note_markdown in notes:
global_notes[note_name] = {
'md': note_markdown,
'backrefs': [],
}

with open_outfile('index.html') as out:
current_filename = 'index.html'
current_project = None
out.write(env.get_template('index.j2').render(
active_path=''))

with open_outfile('404.html') as out:
current_filename = '404.html'
current_project = None
out.write(env.get_template('404.j2').render(
active_path=''))

with open_outfile('notes.html') as out:
out.write(env.get_template('notes.j2').render(
active_path='',
notes = sorted(notes, key = lambda n: n[0])))

kinds = [('tactic', 'tactics'), ('command', 'commands'), ('hole_command', 'hole_commands'), ('attribute', 'attributes')]
current_project = 'docs'
for (kind, filename) in kinds:
entries = [e for e in tactic_docs if e['category'] == kind]
with open_outfile(filename + '.html') as out:
current_filename = filename + '.html'
out.write(env.get_template(filename + '.j2').render(
active_path='',
entries = sorted(entries, key = lambda n: n['name']),
Expand All @@ -450,24 +577,49 @@ def write_html_files(partition, loc_map, notes, mod_docs, instances, tactic_docs
for filename, decls in partition.items():
md = mod_docs.get(filename, [])
with open_outfile(html_root + filename.url) as out:
current_project = filename.project
current_filename = filename.url
out.write(env.get_template('module.j2').render(
active_path = filename.url,
filename = filename,
items = sorted(md + decls, key = lambda d: d['line']),
decl_names = sorted(d['name'] for d in decls),
))

current_project = 'extra'
for (filename, displayname, source, _) in extra_doc_files:
current_filename = filename + '.html'
write_pure_md_file(local_lean_root + source, filename + '.html', displayname)

current_project = 'test'
for (filename, displayname, source) in test_doc_files:
current_filename = filename + '.html'
write_pure_md_file(source, filename + '.html', displayname)

# generate notes.html and references.html last
# so that we can add backrefs
with open_outfile('notes.html') as out:
current_project = 'docs'
current_filename = 'notes.html'
out.write(env.get_template('notes.j2').render(
active_path='',
notes = sorted(global_notes.items(), key = lambda n: n[0])))

with open_outfile('references.html') as out:
current_project = 'docs'
current_filename = 'references.html'
out.write(env.get_template('references.j2').render(
active_path='',
entries = sorted(bib.entries.items(), key = lambda e: e[1].fields['alpha_label'])))

current_project = None
current_filename = None

def write_site_map(partition):
with open_outfile('sitemap.txt') as out:
for filename in partition:
out.write(site_root + filename.url + '\n')
for n in ['index', 'tactics', 'commands', 'hole_commands', 'notes']:
for n in ['index', 'tactics', 'commands', 'hole_commands', 'notes', 'references']:
out.write(site_root + n + '.html\n')
for (filename, _, _, _) in extra_doc_files:
out.write(site_root + filename + '.html\n')
Expand Down Expand Up @@ -505,8 +657,8 @@ def cp(a, b):
cp('nav.js', path+'nav.js')
cp('searchWorker.js', path+'searchWorker.js')

def copy_yaml_files(path):
for fn in ['100.yaml', 'undergrad.yaml', 'overview.yaml']:
def copy_yaml_bib_files(path):
for fn in ['100.yaml', 'undergrad.yaml', 'overview.yaml', 'references.bib']:
shutil.copyfile(f'{local_lean_root}docs/{fn}', path+fn)

def copy_static_files(path):
Expand Down Expand Up @@ -551,7 +703,7 @@ def main():
write_html_files(file_map, loc_map, notes, mod_docs, instances, tactic_docs)
write_redirects(loc_map, file_map)
copy_css(html_root, use_symlinks=cl_args.l)
copy_yaml_files(html_root)
copy_yaml_bib_files(html_root)
copy_static_files(html_root)
write_export_db(mk_export_db(loc_map, file_map))
write_site_map(file_map)
Expand Down
5 changes: 5 additions & 0 deletions requirements.txt
Expand Up @@ -4,3 +4,8 @@ mathlibtools
pygments >= 2.7.1
jinja2
networkx
pybtex>=0.22.2
latexcodec>=2.0.0
PyYAML>=5.3.1
six>=1.15.0
pylatexenc>=2.4