/
metadata.py
218 lines (191 loc) · 8.2 KB
/
metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
"""
Tools for manuscript metadata processing including thumbnail detection and processing.
"""
import functools
import logging
import pathlib
import subprocess
from typing import Optional
from urllib.parse import urljoin
def get_header_includes(variables: dict) -> str:
"""
Render `header-includes-template.html` using information from `variables`.
"""
from .util import template_with_jinja2
path = pathlib.Path(__file__).parent.joinpath("header-includes-template.html")
try:
template = path.read_text(encoding="utf-8-sig")
return template_with_jinja2(template, variables)
except Exception:
logging.exception(f"Error generating header-includes.")
return ""
def get_thumbnail_url(thumbnail=None):
"""
Starting with a user-specified `thumbnail` as either a path, URL, or None,
return an absolute URL pointing to the thumbnail image. If the provided `thumbnail`
is a URL, return this URL unmodified. If `thumbnail` is None, search for `thumbnail.png`
within the git repository from which this function is executed. If `thumbnail`
is a local path, the path should be relative to root directory of the git repository
it is located in. If a local path is provided or detected,
it is converted to a GitHub raw URL.
"""
from manubot.util import is_http_url
if not thumbnail:
message = "get_thumbnail_url: thumbnail location not explicitly provided. "
thumbnail = _find_thumbnail_path()
message += (
f"Thumbnail detected at {thumbnail!r}"
if thumbnail
else "No local thumbnail detected"
)
logging.debug(message)
elif is_http_url(thumbnail):
logging.debug("provided thumbnail is a URL. Pass it through.")
return thumbnail
return _thumbnail_path_to_url(thumbnail)
def _find_thumbnail_path():
"""
If this this function is executed with a working directory that is inside a git repository,
return the path to a `thumbnail.png` file located anywhere in that repository. Otherwise,
return `None`.
"""
directory = git_repository_root()
if not directory:
return None
paths = directory.glob("**/thumbnail.png")
paths = [path.relative_to(directory) for path in paths]
paths = sorted(paths, key=lambda x: (len(x.parents), x))
if not paths:
return None
return paths[0].as_posix()
def _thumbnail_path_to_url(path):
"""
Convert a local thumbnail path (string) to an absolute URL using the GitHub
repository location detected using `get_continuous_integration_parameters`.
"""
if not path:
return None
from .ci import get_continuous_integration_parameters
info = get_continuous_integration_parameters()
try:
url = f"https://github.com/{info['repo_slug']}/raw/{info['triggering_commit']}/{path}"
except (TypeError, KeyError):
return None
return url
@functools.lru_cache()
def git_repository_root():
"""
Return the path to repository root directory or `None` if indeterminate.
"""
for cmd in (
["git", "rev-parse", "--show-superproject-working-tree"],
["git", "rev-parse", "--show-toplevel"],
):
try:
path = subprocess.check_output(cmd, universal_newlines=True).rstrip("\r\n")
if path:
return pathlib.Path(path)
except (subprocess.CalledProcessError, OSError):
pass
return None
def get_manuscript_urls(html_url: Optional[str] = None) -> dict:
"""
Return a dictionary with URLs for a manuscript.
An example for a manuscript where all URLs get set, inferred from continuous integration environment variables, is:
```python
{
"html_url": "https://manubot.github.io/rootstock/",
"pdf_url": "https://manubot.github.io/rootstock/manuscript.pdf",
"html_url_versioned": "https://manubot.github.io/rootstock/v/7cf9071212ce33116ad09cf2237a370b180a3c35/",
"pdf_url_versioned": "https://manubot.github.io/rootstock/v/7cf9071212ce33116ad09cf2237a370b180a3c35/manuscript.pdf",
}
```
Provide `html_url` to set a custom domain.
If `html_url="https://git.dhimmel.com/bitcoin-whitepaper/"`,
the return dictionary will be like:
```python
{
"html_url": "https://git.dhimmel.com/bitcoin-whitepaper/",
"pdf_url": "https://git.dhimmel.com/bitcoin-whitepaper/manuscript.pdf",
"html_url_versioned": "https://git.dhimmel.com/bitcoin-whitepaper/v/cb1f2c12eec8b56db9ef5f641ec805e2d449d319/",
"pdf_url_versioned": "https://git.dhimmel.com/bitcoin-whitepaper/v/cb1f2c12eec8b56db9ef5f641ec805e2d449d319/manuscript.pdf",
}
```
Note the trailing `/` in `html_url`, which is required for proper functioning.
"""
import requests
from .ci import get_continuous_integration_parameters
urls = dict()
ci_params = get_continuous_integration_parameters()
if html_url is None:
if not ci_params:
return urls
html_url = "https://{repo_owner}.github.io/{repo_name}/".format(**ci_params)
urls["html_url"] = html_url
urls["pdf_url"] = urljoin(html_url, "manuscript.pdf")
if not ci_params:
return urls
urls["html_url_versioned"] = urljoin(html_url, "v/{commit}/".format(**ci_params))
urls["pdf_url_versioned"] = urljoin(urls["html_url_versioned"], "manuscript.pdf")
response = requests.head(html_url, allow_redirects=True)
if not response.ok:
logging.warning(
"html_url is not web accessible. "
f"{html_url} returned status code {response.status_code}. "
"Ignore this warning if the manuscript has not yet been deployed for the first time. "
)
if response.history:
logging.info(
"html_url includes redirects. In order of oldest to most recent:\n"
+ "\n".join(x.url for x in response.history + [response])
)
return urls
def get_software_versions() -> dict:
"""
Return a dictionary of software versions for softwares components:
- manubot_version: the semantic version number of the manubot python package.
- rootstock_commit: the version of the rootstock repository, as a commit hash,
included in the manuscript repository.
Values whose detection fails are set to None.
"""
from manubot import __version__ as manubot_version
return {
"manubot_version": manubot_version,
"rootstock_commit": get_rootstock_commit(),
}
def get_rootstock_commit() -> Optional[str]:
"""
Return the most recent commit in common between the git repository
this function is run within (usually a Manubot manuscript repository)
and the `master` branch of the `rootstock` remote.
WARNING: This function may modify the git repository its executed within:
- if the repository has not set the `roostock` remote, it is set to
point to the default Rootstock repository of <https://github.com/manubot/rootstock>.
- fetches the latest commits in the `master` branch of the `rootstock` remote
"""
from manubot.util import shlex_join
# add rootstock remote if remote is not already set
rootstock_remote = "https://github.com/manubot/rootstock.git"
args = ["git", "remote", "add", "rootstock", rootstock_remote]
process = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if process.returncode == 0:
logging.info(
"get_rootstock_commit added a `rootstock` remote to the git repository."
)
# find most recent common ancestor commit
try:
args = ["git", "fetch", "rootstock", "master"]
subprocess.check_output(args, stderr=subprocess.PIPE, universal_newlines=True)
args = ["git", "merge-base", "master", "rootstock/master"]
output = subprocess.check_output(
args, stderr=subprocess.PIPE, universal_newlines=True
)
except subprocess.CalledProcessError as error:
logging.warning(
f"get_rootstock_commit: {shlex_join(error.cmd)!r} returned exit code {error.returncode} "
f"with the following stdout:\n{error.stdout}\n"
f"And the following stderr:\n{error.stderr}"
)
return None
rootstock_commit = output.strip()
return rootstock_commit