/
contentsmanager.py
554 lines (479 loc) · 22.1 KB
/
contentsmanager.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
"""ContentsManager that allows to open Rmd, py, R and ipynb files as notebooks
"""
import itertools
import os
from collections import namedtuple
from datetime import datetime, timedelta
import nbformat
from tornado.web import HTTPError
# import notebook.transutils before notebook.services.contents.filemanager #75
try:
import notebook.transutils # noqa
except ImportError:
pass
import jupytext
from .config import (
JUPYTEXT_CONFIG_FILES,
JupytextConfiguration,
JupytextConfigurationError,
find_global_jupytext_configuration_file,
load_jupytext_configuration_file,
preferred_format,
prepare_notebook_for_save,
validate_jupytext_configuration_file,
)
from .formats import (
long_form_multiple_formats,
short_form_multiple_formats,
short_form_one_format,
)
from .kernels import set_kernelspec_from_language
from .paired_paths import (
InconsistentPath,
base_path,
find_base_path_and_format,
full_path,
paired_paths,
)
from .pairs import latest_inputs_and_outputs, read_pair, write_pair
def build_jupytext_contents_manager_class(base_contents_manager_class):
"""Derives a TextFileContentsManager class from the given base class"""
class JupytextContentsManager(base_contents_manager_class, JupytextConfiguration):
"""
A FileContentsManager Class that reads and stores notebooks to classical
Jupyter notebooks (.ipynb), R Markdown notebooks (.Rmd), Julia (.jl),
Python (.py) or R scripts (.R)
"""
def __init__(self, *args, **kwargs):
# Dictionary: notebook path => (fmt, formats) where
# fmt is the current format, and formats the paired formats.
self.paired_notebooks = dict()
# Configuration cache, useful when notebooks are listed in a given directory
self.cached_config = namedtuple(
"cached_config", "path timestamp config_file config"
)
self.super = super(JupytextContentsManager, self)
self.super.__init__(*args, **kwargs)
def all_nb_extensions(self):
"""All extensions that should be classified as notebooks"""
return [
ext if ext.startswith(".") else "." + ext
for ext in self.notebook_extensions.split(",")
]
def drop_paired_notebook(self, path):
"""Remove the current notebook from the list of paired notebooks"""
if path not in self.paired_notebooks:
return
fmt, formats = self.paired_notebooks.pop(path)
prev_paired_paths = paired_paths(path, fmt, formats)
for alt_path, _ in prev_paired_paths:
if alt_path in self.paired_notebooks:
self.drop_paired_notebook(alt_path)
def update_paired_notebooks(self, path, formats):
"""Update the list of paired notebooks to include/update the current pair"""
if not formats:
self.drop_paired_notebook(path)
return
formats = long_form_multiple_formats(formats)
_, fmt = find_base_path_and_format(path, formats)
new_paired_paths = paired_paths(path, fmt, formats)
for alt_path, _ in new_paired_paths:
self.drop_paired_notebook(alt_path)
if len(formats) == 1 and set(formats[0]) <= {"extension"}:
return
short_formats = short_form_multiple_formats(formats)
for alt_path, alt_fmt in new_paired_paths:
self.paired_notebooks[alt_path] = (
short_form_one_format(alt_fmt),
short_formats,
)
def create_prefix_dir(self, path, fmt):
"""Create the prefix dir, if missing"""
if "prefix" in fmt and "/" in path:
parent_dir = self.get_parent_dir(path)
if not self.dir_exists(parent_dir):
self.create_prefix_dir(parent_dir, fmt)
self.log.info("Creating directory %s", parent_dir)
self.super.save(dict(type="directory"), parent_dir)
def save(self, model, path=""):
"""Save the file model and return the model with no content."""
if model["type"] != "notebook":
return self.super.save(model, path)
path = path.strip("/")
nbk = model["content"]
try:
config = self.get_config(path)
jupytext_formats = prepare_notebook_for_save(nbk, config, path)
self.update_paired_notebooks(path, jupytext_formats)
def save_one_file(path, fmt):
if "format_name" in fmt and fmt["extension"] not in [
".md",
".markdown",
".Rmd",
]:
self.log.info(
"Saving %s in format %s:%s",
os.path.basename(path),
fmt["extension"][1:],
fmt["format_name"],
)
else:
self.log.info("Saving %s", os.path.basename(path))
self.create_prefix_dir(path, fmt)
if fmt["extension"] == ".ipynb":
return self.super.save(model, path)
if (
model["content"]["metadata"]
.get("jupytext", {})
.get("notebook_metadata_filter")
== "-all"
):
self.log.warning(
"Stripping metadata from {} as 'Include Metadata' is off "
"(toggle 'Include Metadata' in the Jupytext Menu or Commands if desired)".format(
path
)
)
text_model = dict(
type="file",
format="text",
content=jupytext.writes(
nbformat.from_dict(model["content"]), fmt=fmt
),
)
return self.super.save(text_model, path)
return write_pair(path, jupytext_formats, save_one_file)
except Exception as e:
self.log.error(
u"Error while saving file: %s %s", path, e, exc_info=True
)
raise HTTPError(
500, u"Unexpected error while saving file: %s %s" % (path, e)
)
def get(
self,
path,
content=True,
type=None,
format=None,
load_alternative_format=True,
):
""" Takes a path for an entity and returns its model"""
path = path.strip("/")
ext = os.path.splitext(path)[1]
# Not a notebook?
if (
not self.file_exists(path)
or self.dir_exists(path)
or (type != "notebook" if type else ext not in self.all_nb_extensions())
):
return self.super.get(path, content, type, format)
config = self.get_config(path, use_cache=content is False)
fmt = preferred_format(ext, config.preferred_jupytext_formats_read)
if ext == ".ipynb":
model = self.super.get(path, content, type="notebook", format=format)
else:
model = self.super.get(path, content, type="file", format=format)
model["type"] = "notebook"
config.set_default_format_options(fmt, read=True)
if content:
# We may need to update these keys, inherited from text files formats
# Cf. https://github.com/mwouts/jupytext/issues/659
model["format"] = "json"
model["mimetype"] = None
try:
model["content"] = jupytext.reads(model["content"], fmt=fmt)
except Exception as err:
self.log.error(
u"Error while reading file: %s %s", path, err, exc_info=True
)
raise HTTPError(500, str(err))
if not load_alternative_format:
return model
if not content:
# Modification time of a paired notebook, in this context - Jupyter is checking timestamp
# before saving - is the most recent among all representations #118
if path not in self.paired_notebooks:
return model
fmt, formats = self.paired_notebooks.get(path)
for alt_path, _ in paired_paths(path, fmt, formats):
if alt_path != path and self.exists(alt_path):
alt_model = self.super.get(alt_path, content=False)
if alt_model["last_modified"] > model["last_modified"]:
model["last_modified"] = alt_model["last_modified"]
return model
# We will now read a second file if this is a paired notebooks.
nbk = model["content"]
formats = nbk.metadata.get("jupytext", {}).get(
"formats"
) or config.default_formats(path)
formats = long_form_multiple_formats(
formats, nbk.metadata, auto_ext_requires_language_info=False
)
# Compute paired notebooks from formats
alt_paths = [(path, fmt)]
if formats:
try:
_, fmt = find_base_path_and_format(path, formats)
alt_paths = paired_paths(path, fmt, formats)
self.update_paired_notebooks(path, formats)
except InconsistentPath as err:
self.log.error(
u"Unable to read paired notebook: %s %s",
path,
err,
exc_info=True,
)
else:
if path in self.paired_notebooks:
fmt, formats = self.paired_notebooks.get(path)
alt_paths = paired_paths(path, fmt, formats)
formats = long_form_multiple_formats(formats)
if len(alt_paths) > 1 and ext == ".ipynb":
# Apply default options (like saving and reloading would do)
jupytext_metadata = model["content"]["metadata"].get("jupytext", {})
config.set_default_format_options(jupytext_metadata, read=True)
if jupytext_metadata:
model["content"]["metadata"]["jupytext"] = jupytext_metadata
def get_timestamp(alt_path):
if not self.exists(alt_path):
return None
if alt_path == path:
return model["last_modified"]
return self.super.get(alt_path, content=False)["last_modified"]
def read_one_file(alt_path, alt_fmt):
if alt_path == path:
return model["content"]
if alt_path.endswith(".ipynb"):
self.log.info(u"Reading OUTPUTS from {}".format(alt_path))
return self.super.get(
alt_path, content=True, type="notebook", format=format
)["content"]
self.log.info(u"Reading SOURCE from {}".format(alt_path))
config.set_default_format_options(alt_fmt, read=True)
text = self.super.get(
alt_path, content=True, type="file", format=format
)["content"]
return jupytext.reads(text, fmt=alt_fmt)
inputs, outputs = latest_inputs_and_outputs(
path, fmt, formats, get_timestamp, contents_manager_mode=True
)
# Before we combine the two files, we make sure we're not overwriting ipynb cells
# with an outdated text file
try:
if (
outputs.timestamp
and outputs.timestamp
> inputs.timestamp
+ timedelta(seconds=config.outdated_text_notebook_margin)
):
raise HTTPError(
400,
"""{out} (last modified {out_last})
seems more recent than {src} (last modified {src_last})
Please either:
- open {src} in a text editor, make sure it is up to date, and save it,
- or delete {src} if not up to date,
- or increase check margin by adding, say,
outdated_text_notebook_margin = 5 # default is 1 (second)
to your jupytext.toml file
""".format(
src=inputs.path,
src_last=inputs.timestamp,
out=outputs.path,
out_last=outputs.timestamp,
),
)
except OverflowError:
pass
try:
model["content"] = read_pair(inputs, outputs, read_one_file)
except HTTPError:
raise
except Exception as err:
self.log.error(
u"Error while reading file: %s %s", path, err, exc_info=True
)
raise HTTPError(500, str(err))
if not outputs.timestamp:
set_kernelspec_from_language(model["content"])
# Trust code cells when they have no output
for cell in model["content"].cells:
if (
cell.cell_type == "code"
and not cell.outputs
and cell.metadata.get("trusted") is False
):
cell.metadata["trusted"] = True
return model
def new_untitled(self, path="", type="", ext=""):
"""Create a new untitled file or directory in path
We override the base function because that one does not take the 'ext' argument
into account when type=="notebook". See https://github.com/mwouts/jupytext/issues/443
"""
if type != "notebook" and ext != ".ipynb":
return self.super.new_untitled(path, type, ext)
ext = ext or ".ipynb"
if ":" in ext:
ext, format_name = ext.split(":", 1)
else:
format_name = ""
path = path.strip("/")
if not self.dir_exists(path):
raise HTTPError(404, "No such directory: %s" % path)
untitled = self.untitled_notebook
name = self.increment_notebook_filename(untitled + ext, path)
path = u"{0}/{1}".format(path, name)
model = {"type": "notebook"}
if format_name:
model["format"] = "json"
model["content"] = nbformat.v4.nbbase.new_notebook(
metadata={"jupytext": {"formats": ext + ":" + format_name}}
)
return self.new(model, path)
def increment_notebook_filename(self, filename, path=""):
"""Increment a notebook filename until it is unique, regardless of extension"""
# Extract the full suffix from the filename (e.g. .tar.gz)
path = path.strip("/")
basename, dot, ext = filename.partition(".")
ext = dot + ext
for i in itertools.count():
if i:
insert_i = "{}".format(i)
else:
insert_i = ""
basename_i = basename + insert_i
name = basename_i + ext
if not any(
self.exists(u"{}/{}{}".format(path, basename_i, nb_ext))
for nb_ext in self.notebook_extensions.split(",")
):
break
return name
def trust_notebook(self, path):
"""Trust the current notebook"""
if path.endswith(".ipynb") or path not in self.paired_notebooks:
self.super.trust_notebook(path)
return
fmt, formats = self.paired_notebooks[path]
for alt_path, alt_fmt in paired_paths(path, fmt, formats):
if alt_fmt["extension"] == ".ipynb":
self.super.trust_notebook(alt_path)
def rename_file(self, old_path, new_path):
"""Rename the current notebook, as well as its alternative representations"""
if old_path not in self.paired_notebooks:
try:
# we do not know yet if this is a paired notebook (#190)
# -> to get this information we open the notebook
self.get(old_path, content=True)
except Exception:
pass
if old_path not in self.paired_notebooks:
self.super.rename_file(old_path, new_path)
return
fmt, formats = self.paired_notebooks.get(old_path)
old_alt_paths = paired_paths(old_path, fmt, formats)
# Is the new file name consistent with suffix?
try:
new_base = base_path(new_path, fmt)
except HTTPError:
raise
except Exception as err:
self.log.error(
u"Error while renaming file from %s to %s: %s",
old_path,
new_path,
err,
exc_info=True,
)
raise HTTPError(500, str(err))
for old_alt_path, alt_fmt in old_alt_paths:
new_alt_path = full_path(new_base, alt_fmt)
if self.exists(old_alt_path):
self.super.rename_file(old_alt_path, new_alt_path)
self.drop_paired_notebook(old_path)
self.update_paired_notebooks(new_path, formats)
def get_parent_dir(self, path):
"""The parent directory"""
if "/" in path:
return path.rsplit("/", 1)[0]
# jupyter-fs
if ":" in path and hasattr(self, "_managers"):
if path.endswith(":"):
return ""
return path.rsplit(":", 1)[0] + ":"
return ""
def get_config_file(self, directory):
"""Return the jupytext configuration file, if any"""
for jupytext_config_file in JUPYTEXT_CONFIG_FILES:
path = directory + "/" + jupytext_config_file
if self.file_exists(path):
return path
if not directory:
return None
parent_dir = self.get_parent_dir(directory)
return self.get_config_file(parent_dir)
def load_config_file(self, config_file, is_os_path=False):
"""Load the configuration file"""
if config_file is None:
return None
self.log.info("Loading Jupytext configuration file at %s", config_file)
if config_file.endswith(".py") and not is_os_path:
config_file = self._get_os_path(config_file)
is_os_path = True
if is_os_path:
config_dict = load_jupytext_configuration_file(config_file)
else:
model = self.super.get(config_file, content=True, type="file")
config_dict = load_jupytext_configuration_file(
config_file, model["content"]
)
return validate_jupytext_configuration_file(config_file, config_dict)
def get_config(self, path, use_cache=False):
"""Return the Jupytext configuration for the given path"""
parent_dir = self.get_parent_dir(path)
# When listing the notebooks for the tree view, we use a one-second
# cache for the configuration file
if (
not use_cache
or parent_dir != self.cached_config.path
or (
self.cached_config.timestamp + timedelta(seconds=1) < datetime.now()
)
):
try:
config_file = self.get_config_file(parent_dir)
if config_file:
self.cached_config.config = self.load_config_file(config_file)
else:
config_file = find_global_jupytext_configuration_file()
self.cached_config.config = self.load_config_file(
config_file, True
)
self.cached_config.config_file = config_file
self.cached_config.path = parent_dir
self.cached_config.timestamp = datetime.now()
except JupytextConfigurationError as err:
self.log.error(
u"Error while reading config file: %s %s",
config_file,
err,
exc_info=True,
)
raise HTTPError(500, "{}".format(err))
if self.cached_config.config is not None:
self.log.debug(
"Configuration file for %s is %s",
path,
self.cached_config.config_file,
)
return self.cached_config.config
return self
return JupytextContentsManager
try:
from notebook.services.contents.largefilemanager import LargeFileManager
TextFileContentsManager = build_jupytext_contents_manager_class(LargeFileManager)
except ImportError:
# Older versions of notebook do not have the LargeFileManager #217
from notebook.services.contents.filemanager import FileContentsManager
TextFileContentsManager = build_jupytext_contents_manager_class(FileContentsManager)