Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DOC: Implementing redirect system, and adding user_guide redirects #24715

Merged
merged 7 commits into from
Jan 23, 2019
74 changes: 73 additions & 1 deletion doc/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,19 @@
import sys
import os
import shutil
import csv
import subprocess
import argparse
import webbrowser
import docutils
import docutils.parsers.rst


DOC_PATH = os.path.dirname(os.path.abspath(__file__))
SOURCE_PATH = os.path.join(DOC_PATH, 'source')
BUILD_PATH = os.path.join(DOC_PATH, 'build')
BUILD_DIRS = ['doctrees', 'html', 'latex', 'plots', '_static', '_templates']
REDIRECTS_FILE = os.path.join(DOC_PATH, 'redirects.csv')
BASE_REDIRECT_URL = 'https://pandas.pydata.org/pandas-docs/stable/'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be hard-coded?
I think ideally it uses the same base url as the version of doc the redirect is coming from? (as later we might change pages again, so on stable it will not always be available)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the current redirect system uses relative link in href, which seems to work OK



class DocBuilder:
Expand Down Expand Up @@ -139,6 +143,72 @@ def _open_browser(self, single_doc_html):
single_doc_html)
webbrowser.open(url, new=2)

def _get_page_title(self, page):
"""
Open the rst file `page` and extract its title.
"""
fname = os.path.join(SOURCE_PATH, '{}.rst'.format(page))
option_parser = docutils.frontend.OptionParser(
components=(docutils.parsers.rst.Parser,))
doc = docutils.utils.new_document(
'<doc>',
option_parser.get_default_values())
with open(fname) as f:
data = f.read()

parser = docutils.parsers.rst.Parser()
# do not generate any warning when parsing the rst
with open(os.devnull, 'a') as f:
doc.reporter.stream = f
parser.parse(data, doc)

section = next(node for node in doc.children
if isinstance(node, docutils.nodes.section))
title = next(node for node in section.children
if isinstance(node, docutils.nodes.title))

return title.astext()

def _add_redirects(self):
"""
Create in the build directory an html file with a redirect,
for every row in REDIRECTS_FILE.
"""
html = '''
<html>
<head>
<meta http-equiv="refresh" content="0;URL={url}"/>
</head>
<body>
<p>
The page has been moved to <a href="{url}">{title}</a>
</p>
</body>
<html>
'''
with open(REDIRECTS_FILE) as mapping_fd:
reader = csv.reader(mapping_fd)
for row in reader:
if not row or row[0].strip().startswith('#'):
continue

path = os.path.join(BUILD_PATH,
'html',
*row[0].split('/')) + '.html'

try:
title = self._get_page_title(row[1])
except Exception:
# the file can be an ipynb and not an rst, or docutils
# may not be able to read the rst because it has some
# sphinx specific stuff
title = 'this page'

with open(path, 'w') as redirects_fd:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why wouldn't you open with before the loop?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are two files, the csv file with the mapping of redirects, which is opened before the loop, and this one, that redirects the old page to the new, which is a different file in every iteration of the loop. May be could find a name less ambiguous, but I think this is correct.

Am I understanding what you mean?

redirects_fd.write(html.format(
url='{}{}.html'.format(BASE_REDIRECT_URL, row[1]),
title=title))

def html(self):
"""
Build HTML documentation.
Expand All @@ -150,6 +220,8 @@ def html(self):

if self.single_doc_html is not None:
self._open_browser(self.single_doc_html)
else:
self._add_redirects()
return ret_code

def latex(self, force=False):
Expand Down
23 changes: 23 additions & 0 deletions doc/redirects.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# This file should contain all the redirects in the documentation
# in the format `<old_path>,<new_path>`

# user guide
advanced,user_guide/advanced
categorical,user_guide/categorical
computation,user_guide/computation
enhancingperf,user_guide/enhancingperf
gotchas,user_guide/gotchas
groupby,user_guide/groupby
indexing,user_guide/indexing
integer_na,user_guide/integer_na
io,user_guide/io
merging,user_guide/merging
missing_data,user_guide/missing_data
options,user_guide/options
reshaping,user_guide/reshaping
sparse,user_guide/sparse
style,user_guide/style
text,user_guide/text
timedeltas,user_guide/timedeltas
timeseries,user_guide/timeseries
visualization,user_guide/visualization