Skip to content

Commit

Permalink
Merge pull request #40 from hartym/0.2
Browse files Browse the repository at this point in the history
Filesystem as a service and service configuration for directories/files (#37, #38).
  • Loading branch information
hartym authored Apr 28, 2017
2 parents 1493686 + ee9d5d6 commit 9a3fa98
Show file tree
Hide file tree
Showing 41 changed files with 479 additions and 318 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# This file has been auto-generated.
# All changes will be lost, see Projectfile.
#
# Updated at 2017-04-25 23:05:05.062813
# Updated at 2017-04-28 06:33:29.712011

PYTHON ?= $(shell which python)
PYTHON_BASENAME ?= $(shell basename $(PYTHON))
Expand Down
12 changes: 6 additions & 6 deletions Projectfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ enable_features = {
}

install_requires = [
'colorama >=0.3,<0.4',
'psutil >=5.2,<5.3',
'requests >=2.13,<2.14',
'stevedore >=1.19,<1.20',
'colorama ==0.3.9',
'fs ==2.0.3',
'psutil ==5.2.2',
'requests ==2.13.0',
'stevedore ==1.21.0',
]

extras_require = {
Expand All @@ -33,8 +34,7 @@ extras_require = {
'ipywidgets >=6.0.0.beta5'
],
'dev': [
'coverage >=4.3,<4.4',
'mock >=2.0,<2.1',
'coverage >=4,<5',
'pylint >=1,<2',
'pytest >=3,<4',
'pytest-cov >=2,<3',
Expand Down
2 changes: 1 addition & 1 deletion bin/run_all_examples.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#! /bin/bash

__PATH__=$(cd $(dirname "$0")/..; pwd)
EXAMPLES=$(cd $__PATH__; find bonobo/examples -name \*.py -not -name __init__.py)
EXAMPLES=$(cd $__PATH__; find bonobo/examples -name \*.py -not -name _\*)

for example in $EXAMPLES; do
echo "===== $example ====="
Expand Down
109 changes: 3 additions & 106 deletions bonobo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,113 +7,10 @@
"""Bonobo data-processing toolkit main module."""

import sys
import warnings

assert (sys.version_info >= (3, 5)), 'Python 3.5+ is required to use Bonobo.'
from bonobo._api import *
from bonobo._api import __all__

from ._version import __version__
from .basics import __all__ as __all_basics__
from .config import __all__ as __all_config__
from .execution import __all__ as __all_execution__
from .io import __all__ as __all_io__
from .strategies import __all__ as __all_strategies__

__all__ = __all_basics__ + __all_config__ + __all_execution__ + __all_io__ + __all_strategies__ + [
'Bag',
'ErrorBag'
'Graph',
'Token',
'__version__',
'create_strategy',
'get_examples_path',
'run',
]

from .basics import *
from .config import *
from .execution import *
from .io import *
from .strategies import *
from .structs.bags import *
from .structs.graphs import *
from .structs.tokens import *

DEFAULT_STRATEGY = 'threadpool'

STRATEGIES = {
'naive': NaiveStrategy,
'processpool': ProcessPoolExecutorStrategy,
'threadpool': ThreadPoolExecutorStrategy,
}


def get_examples_path(*pathsegments):
import os
import pathlib
return str(pathlib.Path(os.path.dirname(__file__), 'examples', *pathsegments))


def create_strategy(name=None):
"""
Create a strategy, or just returns it if it's already one.
:param name:
:return: Strategy
"""
from bonobo.strategies.base import Strategy
import logging

if isinstance(name, Strategy):
return name

if name is None:
name = DEFAULT_STRATEGY

logging.debug('Creating strategy {}...'.format(name))

try:
factory = STRATEGIES[name]
except KeyError as exc:
raise RuntimeError(
'Invalid strategy {}. Available choices: {}.'.format(repr(name), ', '.join(sorted(STRATEGIES.keys())))
) from exc

return factory()


def _is_interactive_console():
import sys
return sys.stdout.isatty()


def _is_jupyter_notebook():
try:
return get_ipython().__class__.__name__ == 'ZMQInteractiveShell'
except NameError:
return False


def run(graph, *chain, strategy=None, plugins=None, services=None):
if len(chain):
warnings.warn('DEPRECATED. You should pass a Graph instance instead of a chain.')
from bonobo import Graph
graph = Graph(graph, *chain)

strategy = create_strategy(strategy)
plugins = []

if _is_interactive_console():
from bonobo.ext.console import ConsoleOutputPlugin
if ConsoleOutputPlugin not in plugins:
plugins.append(ConsoleOutputPlugin)

if _is_jupyter_notebook():
from bonobo.ext.jupyter import JupyterOutputPlugin
if JupyterOutputPlugin not in plugins:
plugins.append(JupyterOutputPlugin)

return strategy.execute(graph, plugins=plugins, services=services)


__all__ = __all__
del sys
del warnings
80 changes: 80 additions & 0 deletions bonobo/_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from bonobo._version import __version__

__all__ = [
'__version__',
]

from bonobo.structs import Bag, Graph

__all__ += ['Bag', 'Graph']

# Filesystem. This is a shortcut from the excellent filesystem2 library, that we make available there for convenience.
from fs import open_fs as _open_fs
open_fs = lambda url, *args, **kwargs: _open_fs(str(url), *args, **kwargs)
__all__ += ['open_fs']

# Basic transformations.
from bonobo.basics import *
from bonobo.basics import __all__ as _all_basics

__all__ += _all_basics

# Execution strategies.
from bonobo.strategies import create_strategy

__all__ += ['create_strategy']


# Extract and loads from stdlib.
from bonobo.io import *
from bonobo.io import __all__ as _all_io

__all__ += _all_io


# XXX This may be belonging to the bonobo.examples package.
def get_examples_path(*pathsegments):
import os
import pathlib
return str(pathlib.Path(os.path.dirname(__file__), 'examples', *pathsegments))


__all__.append(get_examples_path.__name__)


def _is_interactive_console():
import sys
return sys.stdout.isatty()


def _is_jupyter_notebook():
try:
return get_ipython().__class__.__name__ == 'ZMQInteractiveShell'
except NameError:
return False


# @api
def run(graph, *chain, strategy=None, plugins=None, services=None):
if len(chain):
warnings.warn('DEPRECATED. You should pass a Graph instance instead of a chain.')
from bonobo import Graph
graph = Graph(graph, *chain)

strategy = create_strategy(strategy)
plugins = []

if _is_interactive_console():
from bonobo.ext.console import ConsoleOutputPlugin
if ConsoleOutputPlugin not in plugins:
plugins.append(ConsoleOutputPlugin)

if _is_jupyter_notebook():
from bonobo.ext.jupyter import JupyterOutputPlugin
if JupyterOutputPlugin not in plugins:
plugins.append(JupyterOutputPlugin)

return strategy.execute(graph, plugins=plugins, services=services)


__all__.append(run.__name__)
1 change: 1 addition & 0 deletions bonobo/basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
'noop',
]


def identity(x):
return x

Expand Down
30 changes: 28 additions & 2 deletions bonobo/commands/run.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,33 @@
import argparse

import os

import bonobo

DEFAULT_SERVICES_FILENAME = '_services.py'
DEFAULT_SERVICES_ATTR = 'get_services'


def get_default_services(filename, services=None):
dirname = os.path.dirname(filename)
services_filename = os.path.join(dirname, DEFAULT_SERVICES_FILENAME)
if os.path.exists(services_filename):
with open(services_filename) as file:
code = compile(file.read(), services_filename, 'exec')
context = {
'__name__': '__bonobo__',
'__file__': services_filename,
}
try:
exec(code, context)
except Exception as exc:
raise
return {
**context[DEFAULT_SERVICES_ATTR](),
**(services or {}),
}
return services or {}


def execute(file, quiet=False):
with file:
Expand Down Expand Up @@ -32,8 +59,7 @@ def execute(file, quiet=False):

# todo if console and not quiet, then add the console plugin
# todo when better console plugin, add it if console and just disable display

return bonobo.run(graph)
return bonobo.run(graph, plugins=[], services=get_default_services(file.name, context.get(DEFAULT_SERVICES_ATTR)() if DEFAULT_SERVICES_ATTR in context else None))


def register(parser):
Expand Down
3 changes: 2 additions & 1 deletion bonobo/config/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from bonobo.config.configurables import Configurable
from bonobo.config.options import Option
from bonobo.config.services import Container, Service
from bonobo.config.processors import ContextProcessor
from bonobo.config.services import Container, Service

__all__ = [
'Configurable',
'Container',
'ContextProcessor',
'Option',
'Service',
]
9 changes: 9 additions & 0 deletions bonobo/examples/datasets/_services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from os.path import dirname

import bonobo


def get_services():
return {
'fs': bonobo.open_fs(dirname(__file__))
}
10 changes: 4 additions & 6 deletions bonobo/examples/datasets/coffeeshops.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
from os.path import dirname, realpath, join

import bonobo
from bonobo.commands.run import get_default_services
from bonobo.ext.opendatasoft import OpenDataSoftAPI

OUTPUT_FILENAME = realpath(join(dirname(__file__), 'coffeeshops.txt'))
filename = 'coffeeshops.txt'

graph = bonobo.Graph(
OpenDataSoftAPI(dataset='liste-des-cafes-a-un-euro', netloc='opendata.paris.fr'),
lambda row: '{nom_du_cafe}, {adresse}, {arrondissement} Paris, France'.format(**row),
bonobo.FileWriter(path=OUTPUT_FILENAME),
bonobo.FileWriter(path=filename),
)

if __name__ == '__main__':
bonobo.run(graph)
print('Import done, read {} for results.'.format(OUTPUT_FILENAME))
bonobo.run(graph, services=get_default_services(__file__))
20 changes: 8 additions & 12 deletions bonobo/examples/datasets/fablabs.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import json
import os

from bonobo import JsonWriter, Graph, get_examples_path
from bonobo.basics import Tee
from colorama import Fore, Style

import bonobo
from bonobo.commands.run import get_default_services
from bonobo.ext.opendatasoft import OpenDataSoftAPI

from colorama import Fore, Style
try:
import pycountry
except ImportError as exc:
Expand All @@ -15,8 +15,6 @@
API_NETLOC = 'datanova.laposte.fr'
ROWS = 100

__path__ = os.path.dirname(__file__)


def _getlink(x):
return x.get('url', None)
Expand Down Expand Up @@ -55,15 +53,13 @@ def display(row):
print(' - {}source{}: {source}'.format(Fore.BLUE, Style.RESET_ALL, source='datanova/' + API_DATASET))


graph = Graph(
graph = bonobo.Graph(
OpenDataSoftAPI(dataset=API_DATASET, netloc=API_NETLOC, timezone='Europe/Paris'),
normalize,
filter_france,
Tee(display),
JsonWriter(path=get_examples_path('datasets/fablabs.txt')),
bonobo.Tee(display),
bonobo.JsonWriter(path='fablabs.txt'),
)

if __name__ == '__main__':
from bonobo import run

run(graph)
bonobo.run(graph, services=get_default_services(__file__))
13 changes: 13 additions & 0 deletions bonobo/examples/files/_fixme_json_handlers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import bonobo
from bonobo.commands.run import get_default_services

# XXX does not work anymore because of filesystem service, can't read HTTP
url = 'https://data.toulouse-metropole.fr/explore/dataset/theatres-et-salles-de-spectacles/download?format=json&timezone=Europe/Berlin&use_labels_for_header=true'

graph = bonobo.Graph(
bonobo.JsonReader(path=url),
print
)

if __name__ == '__main__':
bonobo.run(graph, services=get_default_services(__file__))
Loading

0 comments on commit 9a3fa98

Please sign in to comment.