-
Notifications
You must be signed in to change notification settings - Fork 384
/
header.py
275 lines (226 loc) · 8.62 KB
/
header.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
"""Parse header of text notebooks
"""
import re
import nbformat
import yaml
from nbformat.v4.nbbase import new_raw_cell
from yaml.representer import SafeRepresenter
from .languages import _SCRIPT_EXTENSIONS, comment_lines
from .metadata_filter import _DEFAULT_NOTEBOOK_METADATA, filter_metadata
from .pep8 import pep8_lines_between_cells
from .version import __version__
SafeRepresenter.add_representer(nbformat.NotebookNode, SafeRepresenter.represent_dict)
_HEADER_RE = re.compile(r"^---\s*$")
_BLANK_RE = re.compile(r"^\s*$")
_JUPYTER_RE = re.compile(r"^jupyter\s*:\s*$")
_LEFTSPACE_RE = re.compile(r"^\s")
_UTF8_HEADER = " -*- coding: utf-8 -*-"
# Change this to False in tests
INSERT_AND_CHECK_VERSION_NUMBER = True
def insert_or_test_version_number():
"""Should the format name and version number be inserted in text
representations (not in tests!)"""
return INSERT_AND_CHECK_VERSION_NUMBER
def uncomment_line(line, prefix, suffix=""):
"""Remove prefix (and space) from line"""
if prefix:
if line.startswith(prefix + " "):
line = line[len(prefix) + 1 :]
elif line.startswith(prefix):
line = line[len(prefix) :]
if suffix:
if line.endswith(suffix + " "):
line = line[: -(1 + len(suffix))]
elif line.endswith(suffix):
line = line[: -len(suffix)]
return line
def encoding_and_executable(notebook, metadata, ext):
"""Return encoding and executable lines for a notebook, if applicable"""
lines = []
comment = _SCRIPT_EXTENSIONS.get(ext, {}).get("comment")
jupytext_metadata = metadata.get("jupytext", {})
if comment is not None and "executable" in jupytext_metadata:
lines.append("#!" + jupytext_metadata.pop("executable"))
if comment is not None:
if "encoding" in jupytext_metadata:
lines.append(jupytext_metadata.pop("encoding"))
else:
for cell in notebook.cells:
try:
cell.source.encode("ascii")
except (UnicodeEncodeError, UnicodeDecodeError):
lines.append(comment + _UTF8_HEADER)
break
return lines
def insert_jupytext_info_and_filter_metadata(metadata, fmt, text_format):
"""Update the notebook metadata to include Jupytext information, and filter
the notebook metadata according to the default or user filter"""
if insert_or_test_version_number():
metadata.setdefault("jupytext", {})["text_representation"] = {
"extension": fmt["extension"],
"format_name": text_format.format_name,
"format_version": text_format.current_version_number,
"jupytext_version": __version__,
}
if "jupytext" in metadata and not metadata["jupytext"]:
del metadata["jupytext"]
notebook_metadata_filter = fmt.get("notebook_metadata_filter")
return filter_metadata(
metadata, notebook_metadata_filter, _DEFAULT_NOTEBOOK_METADATA
)
def metadata_and_cell_to_header(notebook, metadata, text_format, fmt):
"""
Return the text header corresponding to a notebook, and remove the
first cell of the notebook if it contained the header
"""
header = []
lines_to_next_cell = None
root_level_metadata = {}
root_level_metadata_as_raw_cell = fmt.get("root_level_metadata_as_raw_cell", True)
if not root_level_metadata_as_raw_cell:
root_level_metadata = metadata.get("jupytext", {}).pop(
"root_level_metadata", {}
)
elif notebook.cells:
cell = notebook.cells[0]
if cell.cell_type == "raw":
lines = cell.source.strip("\n\t ").splitlines()
if (
len(lines) >= 2
and _HEADER_RE.match(lines[0])
and _HEADER_RE.match(lines[-1])
):
header = lines[1:-1]
lines_to_next_cell = cell.metadata.get("lines_to_next_cell")
notebook.cells = notebook.cells[1:]
metadata = insert_jupytext_info_and_filter_metadata(metadata, fmt, text_format)
if metadata:
root_level_metadata["jupyter"] = metadata
if root_level_metadata:
header.extend(
yaml.safe_dump(root_level_metadata, default_flow_style=False).splitlines()
)
if header:
header = ["---"] + header + ["---"]
if (
fmt.get("hide_notebook_metadata", False)
and text_format.format_name == "markdown"
):
header = ["<!--", ""] + header + ["", "-->"]
return (
comment_lines(header, text_format.header_prefix, text_format.header_suffix),
lines_to_next_cell,
)
def recursive_update(target, update):
"""Update recursively a (nested) dictionary with the content of another.
Inspired from https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth
"""
for key in update:
value = update[key]
if value is None:
del target[key]
elif isinstance(value, dict):
target[key] = recursive_update(target.get(key, {}), value)
else:
target[key] = value
return target
def header_to_metadata_and_cell(
lines, header_prefix, ext=None, root_level_metadata_as_raw_cell=True
):
"""
Return the metadata, a boolean to indicate if a jupyter section was found,
the first cell of notebook if some metadata is found outside of the jupyter section, and next loc in text
"""
header = []
jupyter = []
in_jupyter = False
in_html_div = False
start = 0
started = False
ended = False
metadata = {}
i = -1
comment = "#" if header_prefix == "#'" else header_prefix
encoding_re = re.compile(
r"^[ \t\f]*{}.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)".format(re.escape(comment))
)
for i, line in enumerate(lines):
if i == 0 and line.startswith("#!"):
metadata.setdefault("jupytext", {})["executable"] = line[2:]
start = i + 1
continue
if i == 0 or (i == 1 and not encoding_re.match(lines[0])):
encoding = encoding_re.match(line)
if encoding:
if encoding.group(1) != "utf-8":
raise ValueError("Encodings other than utf-8 are not supported")
metadata.setdefault("jupytext", {})["encoding"] = line
start = i + 1
continue
if not line.startswith(header_prefix):
break
if not comment:
if line.strip().startswith("<!--"):
in_html_div = True
continue
if in_html_div:
if ended:
if "-->" in line:
break
if not started and not line.strip():
continue
# OCAML
if header_prefix == "(*":
header_suffix = "*)"
else:
header_suffix = ""
line = uncomment_line(line, header_prefix, header_suffix)
if _HEADER_RE.match(line):
if not started:
started = True
continue
ended = True
if in_html_div:
continue
break
if _JUPYTER_RE.match(line):
in_jupyter = True
elif line and not _LEFTSPACE_RE.match(line):
in_jupyter = False
if in_jupyter:
jupyter.append(line)
else:
header.append(line)
if ended:
if jupyter:
extra_metadata = metadata
metadata = yaml.safe_load("\n".join(jupyter))["jupyter"]
recursive_update(metadata, extra_metadata)
lines_to_next_cell = 1
if len(lines) > i + 1:
line = uncomment_line(lines[i + 1], header_prefix)
if not _BLANK_RE.match(line):
lines_to_next_cell = 0
else:
i = i + 1
else:
lines_to_next_cell = 0
if header:
if root_level_metadata_as_raw_cell:
cell = new_raw_cell(
source="\n".join(["---"] + header + ["---"]),
metadata={}
if lines_to_next_cell
== pep8_lines_between_cells(["---"], lines[i + 1 :], ext)
else {"lines_to_next_cell": lines_to_next_cell},
)
else:
cell = None
root_level_metadata = yaml.safe_load("\n".join(header))
metadata.setdefault("jupytext", {})[
"root_level_metadata"
] = root_level_metadata
else:
cell = None
return metadata, jupyter, cell, i + 1
return metadata, False, None, start