Skip to content

Commit

Permalink
Merge pull request #92 from casperdcl/master
Browse files Browse the repository at this point in the history
Custom blacklist metadata fields
  • Loading branch information
kynan committed Mar 26, 2019
2 parents 7aaed05 + 4edbb8e commit 7fdb961
Show file tree
Hide file tree
Showing 9 changed files with 226 additions and 87 deletions.
1 change: 1 addition & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
Copyright (c) 2015 Min RK, Florian Rathgeber, Michael McNeil Forbes
2019 Casper da Costa-Luis

Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
Expand Down
14 changes: 14 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,20 @@ This is the same metadata used by the `init_cell nbextension`__.

__ https://github.com/ipython-contrib/jupyter_contrib_nbextensions/tree/master/src/jupyter_contrib_nbextensions/nbextensions/init_cell

Stripping metadata
++++++++++++++++++

This is configurable via `git config (--global) filter.nbstripout.extrakeys`.
An example would be:

git config --global filter.nbstripout.extrakeys '
metadata.celltoolbar metadata.kernel_spec.display_name
metadata.kernel_spec.name metadata.language_info.codemirror_mode.version
metadata.language_info.pygments_lexer metadata.language_info.version
metadata.toc metadata.notify_time metadata.varInspector
cell.metadata.heading_collapsed cell.metadata.hidden
cell.metadata.code_folding cell.metadata.tags cell.metadata.init_cell'

Manual filter installation
==========================

Expand Down
4 changes: 4 additions & 0 deletions nbstripout/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from ._nbstripout import install, uninstall, status, main
from ._utils import pop_recursive, strip_output
__all__ = ["install", "uninstall", "status", "main",
"pop_recursive", "strip_output"]
113 changes: 31 additions & 82 deletions nbstripout.py → nbstripout/_nbstripout.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,29 +85,11 @@
*.ipynb diff=ipynb
"""

from __future__ import print_function
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import io
import sys

input_stream = None
if sys.version_info < (3, 0):
import codecs
# Use UTF8 reader/writer for stdin/stdout
# http://stackoverflow.com/a/1169209
if sys.stdin:
input_stream = codecs.getreader('utf8')(sys.stdin)
output_stream = codecs.getwriter('utf8')(sys.stdout)
else:
# Wrap input/output stream in UTF-8 encoded text wrapper
# https://stackoverflow.com/a/16549381
if sys.stdin:
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

__version__ = '0.3.3'

from nbstripout._utils import strip_output
try:
# Jupyter >= 4
from nbformat import read, write, NO_CONVERT
Expand All @@ -131,67 +113,8 @@ def read(f, as_version):
def write(nb, f):
return current.write(nb, f, 'json')


def _cells(nb):
"""Yield all cells in an nbformat-insensitive manner"""
if nb.nbformat < 4:
for ws in nb.worksheets:
for cell in ws.cells:
yield cell
else:
for cell in nb.cells:
yield cell


def strip_output(nb, keep_output, keep_count):
"""
Strip the outputs, execution count/prompt number and miscellaneous
metadata from a notebook object, unless specified to keep either the outputs
or counts.
"""

nb.metadata.pop('signature', None)
nb.metadata.pop('widgets', None)

for cell in _cells(nb):

keep_output_this_cell = keep_output

# Keep the output for these cells, but strip count and metadata
if cell.metadata.get('init_cell') or cell.metadata.get('keep_output'):
keep_output_this_cell = True

# Remove the outputs, unless directed otherwise
if 'outputs' in cell:

# Default behavior strips outputs. With all outputs stripped,
# there are no counts to keep and keep_count is ignored.
if not keep_output_this_cell:
cell['outputs'] = []

# If keep_output_this_cell, but not keep_count, strip the counts
# from the output.
if keep_output_this_cell and not keep_count:
for output in cell['outputs']:
if 'execution_count' in output:
output['execution_count'] = None

# If keep_output_this_cell and keep_count, do nothing.

# Remove the prompt_number/execution_count, unless directed otherwise
if 'prompt_number' in cell and not keep_count:
cell['prompt_number'] = None
if 'execution_count' in cell and not keep_count:
cell['execution_count'] = None

# Always remove this metadata
for output_style in ['collapsed', 'scrolled']:
if output_style in cell.metadata:
cell.metadata[output_style] = False
if 'metadata' in cell:
for field in ['collapsed', 'scrolled', 'ExecuteTime']:
cell.metadata.pop(field, None)
return nb
__all__ = ["install", "uninstall", "status", "main"]
__version__ = '0.3.3'


def install(attrfile=None):
Expand Down Expand Up @@ -273,6 +196,10 @@ def status(verbose=False):
diff = check_output(['git', 'config', 'diff.ipynb.textconv']).strip()
attributes = check_output(['git', 'check-attr', 'filter', '--', '*.ipynb']).strip()
diff_attributes = check_output(['git', 'check-attr', 'diff', '--', '*.ipynb']).strip()
try:
extra_keys = check_output(['git', 'config', 'filter.nbstripout.extrakeys']).strip()
except CalledProcessError:
extra_keys = ''
if attributes.endswith(b'unspecified'):
if verbose:
print('nbstripout is not installed in repository', git_dir)
Expand All @@ -284,6 +211,7 @@ def status(verbose=False):
print(' smudge =', smudge)
print(' required =', required)
print(' diff=', diff)
print(' extrakeys=', extra_keys)
print('\nAttributes:\n ', attributes)
print('\nDiff Attributes:\n ', diff_attributes)
return 0
Expand All @@ -294,6 +222,7 @@ def status(verbose=False):


def main():
from subprocess import check_output, CalledProcessError
parser = ArgumentParser(epilog=__doc__, formatter_class=RawDescriptionHelpFormatter)
task = parser.add_mutually_exclusive_group()
task.add_argument('--install', action='store_true',
Expand Down Expand Up @@ -336,13 +265,33 @@ def main():
print(__version__)
sys.exit(0)

try:
extra_keys = check_output(['git', 'config', 'filter.nbstripout.extrakeys']).strip()
except CalledProcessError:
extra_keys = ''

input_stream = None
if sys.version_info < (3, 0):
import codecs
# Use UTF8 reader/writer for stdin/stdout
# http://stackoverflow.com/a/1169209
if sys.stdin:
input_stream = codecs.getreader('utf8')(sys.stdin)
output_stream = codecs.getwriter('utf8')(sys.stdout)
else:
# Wrap input/output stream in UTF-8 encoded text wrapper
# https://stackoverflow.com/a/16549381
if sys.stdin:
input_stream = io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')
output_stream = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')

for filename in args.files:
if not (args.force or filename.endswith('.ipynb')):
continue
try:
with io.open(filename, 'r', encoding='utf8') as f:
nb = read(f, as_version=NO_CONVERT)
nb = strip_output(nb, args.keep_output, args.keep_count)
nb = strip_output(nb, args.keep_output, args.keep_count, extra_keys)
if args.textconv:
write(nb, output_stream)
output_stream.flush()
Expand All @@ -360,7 +309,7 @@ def main():
if not args.files and input_stream:
try:
nb = strip_output(read(input_stream, as_version=NO_CONVERT),
args.keep_output, args.keep_count)
args.keep_output, args.keep_count, extra_keys)
write(nb, output_stream)
output_stream.flush()
except NotJSONError:
Expand Down
104 changes: 104 additions & 0 deletions nbstripout/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import sys

__all__ = ["pop_recursive", "strip_output"]


def pop_recursive(d, key, default=None):
"""dict.pop(key) where `key` is a `.`-delimited list of nested keys.
>>> d = {'a': {'b': 1, 'c': 2}}
>>> pop_recursive(d, 'a.c')
2
>>> d
{'a': {'b': 1}}
"""
nested = key.split('.')
current = d
for k in nested[:-1]:
if hasattr(current, 'get'):
current = current.get(k, {})
else:
return default
if not hasattr(current, 'pop'):
return default
return current.pop(nested[-1], default)


def _cells(nb):
"""Yield all cells in an nbformat-insensitive manner"""
if nb.nbformat < 4:
for ws in nb.worksheets:
for cell in ws.cells:
yield cell
else:
for cell in nb.cells:
yield cell


def strip_output(nb, keep_output, keep_count, extra_keys=''):
"""
Strip the outputs, execution count/prompt number and miscellaneous
metadata from a notebook object, unless specified to keep either the outputs
or counts.
`extra_keys` could be 'metadata.foo cell.metadata.bar metadata.baz'
"""
if hasattr(extra_keys, 'decode'):
extra_keys = extra_keys.decode()
extra_keys = extra_keys.split()
keys = {'metadata': [], 'cell': {'metadata': []}}
for key in extra_keys:
if key.startswith('metadata.'):
keys['metadata'].append(key[len('metadata.'):])
elif key.startswith('cell.metadata.'):
keys['cell']['metadata'].append(key[len('cell.metadata.'):])
else:
sys.stderr.write('ignoring extra key `%s`' % key)

nb.metadata.pop('signature', None)
nb.metadata.pop('widgets', None)
for field in keys['metadata']:
pop_recursive(nb.metadata, field)

for cell in _cells(nb):
keep_output_this_cell = keep_output

# Keep the output for these cells, but strip count and metadata
if cell.metadata.get('init_cell') or cell.metadata.get('keep_output'):
keep_output_this_cell = True

# Remove the outputs, unless directed otherwise
if 'outputs' in cell:

# Default behavior strips outputs. With all outputs stripped,
# there are no counts to keep and keep_count is ignored.
if not keep_output_this_cell:
cell['outputs'] = []

# If keep_output_this_cell, but not keep_count, strip the counts
# from the output.
if keep_output_this_cell and not keep_count:
for output in cell['outputs']:
if 'execution_count' in output:
output['execution_count'] = None

# If keep_output_this_cell and keep_count, do nothing.

# Remove the prompt_number/execution_count, unless directed otherwise
if 'prompt_number' in cell and not keep_count:
cell['prompt_number'] = None
if 'execution_count' in cell and not keep_count:
cell['execution_count'] = None

# Always remove this metadata
for output_style in ['collapsed', 'scrolled']:
if output_style in cell.metadata:
cell.metadata[output_style] = False
if 'metadata' in cell:
for field in ['collapsed', 'scrolled', 'ExecuteTime']:
cell.metadata.pop(field, None)
for (extra, fields) in keys['cell'].items():
if extra in cell:
for field in fields:
pop_recursive(getattr(cell, extra), field)
return nb
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from setuptools import setup
from setuptools import setup, find_packages

with open('README.rst') as f:
long_description = f.read()
Expand Down Expand Up @@ -32,10 +32,11 @@

description='Strips outputs from Jupyter and IPython notebooks',
long_description=long_description,
py_modules=['nbstripout'],
packages=find_packages(),
provides=['nbstripout'],
entry_points={
'console_scripts': [
'nbstripout = nbstripout:main'
'nbstripout = nbstripout._nbstripout:main'
],
},

Expand Down
30 changes: 28 additions & 2 deletions tests/test-git.t
Original file line number Diff line number Diff line change
@@ -1,17 +1,43 @@
$ git init foobar
Initialized empty Git repository in .* (re)
$ cd foobar
$ git config --local filter.nbstripout.extrakeys ' '
$ echo -n "*.txt text" >> .git/info/attributes
$ ${NBSTRIPOUT_EXE:-nbstripout} --is-installed
[1]
$ ${NBSTRIPOUT_EXE:-nbstripout} --install
$ ${NBSTRIPOUT_EXE:-nbstripout} --is-installed
$ git diff --no-index --no-ext-diff --unified=0 --exit-code -a --no-prefix ${TESTDIR}/test_diff.ipynb ${TESTDIR}/test_diff_output.ipynb
$ git diff --no-index ${TESTDIR}/test_diff.ipynb ${TESTDIR}/test_diff_different.ipynb
$ git diff --no-index ${TESTDIR}/test_diff.ipynb ${TESTDIR}/test_diff_different_extrakeys.ipynb
(diff --git.*) (re)
(index .*) (re)
(--- .*test_diff.ipynb) (re)
(\+\+\+ .*test_diff_different.ipynb) (re)
(\+\+\+ .*test_diff_different_extrakeys.ipynb) (re)
@@ -6,15 +6,14 @@
"metadata": {},
"outputs": [],
"source": [
- "print(\"aou\")"
+ "print(\"aou now it is different\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
- "language": "python",
- "name": "python2"
+ "language": "python"
},
"language_info": {
"codemirror_mode": {
[1]
$ git config --local filter.nbstripout.extrakeys 'cell.metadata.collapsed metadata.kernelspec.name'
$ git diff --no-index ${TESTDIR}/test_diff.ipynb ${TESTDIR}/test_diff_different_extrakeys.ipynb
(diff --git.*) (re)
(index .*) (re)
(--- .*test_diff.ipynb) (re)
(\+\+\+ .*test_diff_different_extrakeys.ipynb) (re)
@@ -6,7 +6,7 @@
"metadata": {},
"outputs": [],
Expand Down
Loading

0 comments on commit 7fdb961

Please sign in to comment.