Skip to content

Commit

Permalink
Merge pull request #94 from bartvm/settings
Browse files Browse the repository at this point in the history
Module-level configuration
  • Loading branch information
bartvm committed Jan 14, 2015
2 parents 72d8d6b + 7c61a1d commit 3a0c945
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 5 deletions.
2 changes: 2 additions & 0 deletions blocks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,4 @@
"""The blocks library for parameterized Theano ops."""
from blocks.config_parser import config # flake8: noqa

__version__ = '0.1a1' # PEP 440 compliant
116 changes: 116 additions & 0 deletions blocks/config_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
"""Module level configuration.
Blocks allows module-wide configuration values to be set using a YAML_
configuration file and `environment variables`_. Environment variables
override the configuration file which in its turn overrides the defaults.
The configuration is read from ``~/.blocksrc`` if it exists. A custom
configuration file can be used by setting the ``BLOCKS_CONFIG`` environment
variable. A configuration file is of the form:
.. code-block:: yaml
data_path: /home/user/datasets
Which could be overwritten by using environment variables:
.. code-block:: bash
BLOCKS_DATA_PATH=/home/users/other_datasets python
If a setting is not configured and does not provide a default, a
:class:`ConfigurationError` is raised when it is accessed.
Configuration values can be accessed as attributes of ``blocks.config``.
>>> from blocks import config
>>> print(config.data_path) # doctest: +SKIP
'~/datasets'
The following configurations are supported:
.. option:: data_path
The path where dataset files are stored. Can also be set using the
environment variable ``BLOCKS_DATA_PATH``.
.. _YAML: http://yaml.org/
.. _environment variables:
https://en.wikipedia.org/wiki/Environment_variable
"""
import logging
import os

import yaml

logger = logging.getLogger(__name__)

NOT_SET = object()


class ConfigurationError(Exception):
pass


class Configuration(object):
def __init__(self):
if 'BLOCKS_CONFIG' in os.environ:
yaml_file = os.environ['BLOCKS_CONFIG']
else:
yaml_file = os.path.expanduser('~/.blocksrc')
if os.path.isfile(yaml_file):
with open(yaml_file) as f:
self.yaml_settings = yaml.safe_load(f)
else:
self.yaml_settings = {}
self.config = {}

def __getattr__(self, key):
if key not in self.config:
raise ConfigurationError("Unknown configuration: {}".format(key))
config = self.config[key]
if config['env_var'] in os.environ:
value = os.environ[config['env_var']]
elif key in self.yaml_settings:
value = self.yaml_settings[key]
else:
value = config['default']
if value is NOT_SET:
raise ConfigurationError("Configuration not set and no default "
"provided: {}.".format(key))
if type is None:
return value
else:
return config['type'](value)

def add_config(self, key, type, default=NOT_SET, env_var=None):
"""Add a configuration setting.
Parameters
----------
key : str
The name of the configuration setting. This must be a valid
Python attribute name i.e. alphanumeric with underscores.
type : function
A function such as ``float``, ``int`` or ``str`` which takes
the configuration value and returns an object of the correct
type. Note that the values retrieved from environment
variables are always strings, while those retrieved from the
YAML file might already be parsed. Hence, the function provided
here must accept both types of input.
default : object, optional
The default configuration to return if not set. By default none
is set and an error is raised instead.
env_var : str, optional
The environment variable name that holds this configuration
value. If not given, this configuration can only be set in the
YAML configuration file.
"""
self.config[key] = {'default': default,
'env_var': env_var,
'type': type}

config = Configuration()
config.add_config('data_path', env_var='BLOCKS_DATA_PATH', type=str)
4 changes: 2 additions & 2 deletions blocks/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ def reset(self, state):
Notes
-----
The default implementation closes the state and opens a new one. A
more efficient implementation (e.g. using ``file.seek(0) instead of
closing and re-opening the file) can override the default one in
more efficient implementation (e.g. using ``file.seek(0)`` instead
of closing and re-opening the file) can override the default one in
derived classes.
"""
Expand Down
7 changes: 4 additions & 3 deletions blocks/datasets/mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy
import theano

from blocks import config
from blocks.datasets import Dataset
from blocks.datasets.schemes import SequentialScheme

Expand Down Expand Up @@ -44,7 +45,6 @@ class MNIST(Dataset):
sources = ('features', 'targets')

def __init__(self, which_set, start=None, stop=None, **kwargs):
data_path = '/Users/bartvanmerrienboer/data/'
if which_set == 'train':
data = 'train-images-idx3-ubyte'
labels = 'train-labels-idx1-ubyte'
Expand All @@ -54,10 +54,11 @@ def __init__(self, which_set, start=None, stop=None, **kwargs):
else:
raise ValueError("MNIST only has a train and test set")
X = read_mnist_images(
os.path.join(data_path, data), theano.config.floatX)[start:stop]
os.path.join(config.data_path, data),
theano.config.floatX)[start:stop]
X = X.reshape((X.shape[0], numpy.prod(X.shape[1:])))
y = read_mnist_labels(
os.path.join(data_path, labels))[start:stop, numpy.newaxis]
os.path.join(config.data_path, labels))[start:stop, numpy.newaxis]
self.X, self.y = X, y
self.num_examples = len(X)
self.default_scheme = SequentialScheme(self.num_examples, 1)
Expand Down
4 changes: 4 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Configuration
=============

.. automodule:: blocks.config_parser
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Table of contents
.. toctree::

getting_started
configuration
blocks
initialization
datasets
Expand Down

1 comment on commit 3a0c945

@rizar
Copy link
Contributor

@rizar rizar commented on 3a0c945 Jan 14, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool, it is nice is to have configuration!

Please sign in to comment.