Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add script and methods to save YAML parameter tree on disk #1023

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion openfisca_core/parameters/__init__.py
Expand Up @@ -33,7 +33,7 @@
)

from .at_instant_like import AtInstantLike # noqa: F401
from .helpers import contains_nan, load_parameter_file # noqa: F401
from .helpers import contains_nan, load_parameter_file, save_parameters_to_dir # noqa: F401
from .parameter_at_instant import ParameterAtInstant # noqa: F401
from .parameter_node_at_instant import ParameterNodeAtInstant # noqa: F401
from .vectorial_parameter_node_at_instant import VectorialParameterNodeAtInstant # noqa: F401
Expand Down
15 changes: 15 additions & 0 deletions openfisca_core/parameters/config.py
Expand Up @@ -2,6 +2,7 @@
import os
import yaml
import typing
from collections import OrderedDict
from openfisca_core.warnings import LibYAMLWarning


Expand Down Expand Up @@ -42,3 +43,17 @@ def dict_no_duplicate_constructor(loader, node, deep = False):


yaml.add_constructor(yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, dict_no_duplicate_constructor, Loader = Loader)


def represent_ordereddict(dumper, data):
value = []

for item_key, item_value in data.items():
node_key = dumper.represent_data(item_key)
node_value = dumper.represent_data(item_value)

value.append((node_key, node_value))

return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value)

yaml.add_representer(OrderedDict, represent_ordereddict)
30 changes: 30 additions & 0 deletions openfisca_core/parameters/helpers.py
@@ -1,7 +1,9 @@
import os
import re
import traceback

import numpy
import yaml

from openfisca_core import parameters, periods
from openfisca_core.errors import ParameterParsingError
Expand Down Expand Up @@ -29,6 +31,31 @@ def load_parameter_file(file_path, name = ''):
return _parse_child(name, data, file_path)


def save_parameters_to_dir(node, dir_path):
def dump_node(file_basename: str):
file_path = dir_path / "{}.yaml".format(file_basename)
node_yaml = node.to_yaml()
if node_yaml:
node_text = yaml.dump(node_yaml, allow_unicode=True, default_flow_style=False, sort_keys=False)
# Hack: remove quotes from dict keys containing dates.
# Cf https://github.com/fpagnoux/baremes-ipp-parser/blob/master/bareme_ipp_parsers/commons.py
node_text = re.sub(r"'(\d{4}-\d{2}-\d{2})':", r"\1:", node_text)
file_path.write_text(node_text)

if isinstance(node, (parameters.Parameter, parameters.ParameterScale)):
file_basename = node.name.split(".")[-1]
dump_node(file_basename=file_basename)
else:
dump_node(file_basename="index")
for name, sub_node in node.children.items():
if isinstance(sub_node, (parameters.Parameter, parameters.ParameterScale)):
save_parameters_to_dir(node=sub_node, dir_path=dir_path)
else:
sub_dir = dir_path / name
sub_dir.mkdir(exist_ok=True)
save_parameters_to_dir(node=sub_node, dir_path=sub_dir)


def _compose_name(path, child_name = None, item_name = None):
if not path:
return child_name
Expand Down Expand Up @@ -97,3 +124,6 @@ def _validate_parameter(parameter, data, data_type = None, allowed_keys = None):
.format(key, parameter.name, list(allowed_keys)),
parameter.file_path
)

def _without_none_values(d):
return {k: v for k, v in d.items() if v is not None}
16 changes: 16 additions & 0 deletions openfisca_core/parameters/parameter.py
@@ -1,6 +1,7 @@
import copy
import os
import typing
from collections import OrderedDict

from openfisca_core import commons, periods
from openfisca_core.errors import ParameterParsingError
Expand Down Expand Up @@ -169,6 +170,21 @@ def update(self, period = None, start = None, stop = None, value = None):
def get_descendants(self):
return iter(())

def to_yaml(self):
"""Return a representation of the Parameter ready to be serialized to YAML."""
return helpers._without_none_values({
"description": self.description,
"documentation": self.documentation,
"metadata": self.metadata or None,
"values": self.values_as_yaml(),
})

def values_as_yaml(self):
return OrderedDict([
(value.instant_str, {"value": value.value})
for value in self.values_list
])

def _get_at_instant(self, instant):
for value_at_instant in self.values_list:
if value_at_instant.instant_str <= instant:
Expand Down
8 changes: 8 additions & 0 deletions openfisca_core/parameters/parameter_node.py
Expand Up @@ -151,5 +151,13 @@ def clone(self):

return clone

def to_yaml(self):
"""Return a representation of the ParameterNode ready to be serialized to YAML."""
return helpers._without_none_values({
"description": self.description,
"documentation": self.documentation,
"metadata": self.metadata or None,
})

def _get_at_instant(self, instant):
return ParameterNodeAtInstant(self.name, self, instant)
8 changes: 8 additions & 0 deletions openfisca_core/parameters/parameter_scale.py
Expand Up @@ -73,6 +73,14 @@ def clone(self):

return clone

def to_yaml(self):
"""Return a representation of the Scale ready to be serialized to YAML."""
return helpers._without_none_values({
"brackets": [bracket.to_yaml() for bracket in self.brackets],
"description": self.description,
"metadata": self.metadata or None,
})

def _get_at_instant(self, instant):
brackets = [bracket.get_at_instant(instant) for bracket in self.brackets]

Expand Down
13 changes: 12 additions & 1 deletion openfisca_core/parameters/parameter_scale_bracket.py
@@ -1,4 +1,6 @@
from openfisca_core.parameters import ParameterNode
from collections import OrderedDict

from openfisca_core.parameters import helpers, ParameterNode


class ParameterScaleBracket(ParameterNode):
Expand All @@ -7,3 +9,12 @@ class ParameterScaleBracket(ParameterNode):
"""

_allowed_keys = set(['amount', 'threshold', 'rate', 'average_rate', 'base'])

def to_yaml(self):
"""Return a representation of the Bracket ready to be serialized to YAML."""
yaml_dict = {}
for key in self._allowed_keys:
value = getattr(self, key, None)
if value is not None:
yaml_dict[key] = value.values_as_yaml()
return OrderedDict(sorted(helpers._without_none_values(yaml_dict).items()))
52 changes: 52 additions & 0 deletions openfisca_core/scripts/normalize_yaml_parameters.py
@@ -0,0 +1,52 @@
#! /usr/bin/env python


"""Normalize a YAML parameter tree, loading it from a directory and re-writing it to another one.

This allows in particular to ensure that each YAML file contains exactly one parameter.
"""


import argparse
import logging
from pathlib import Path
import sys

from openfisca_core.parameters import load_parameter_file, Parameter, ParameterNode, save_parameters_to_dir


logger = logging.getLogger(__name__)


def check_path_length(base_dir, max_path_length):
for path in base_dir.rglob("*.yaml"):
relative_path = path.relative_to(base_dir)
relative_path_len = len(str(relative_path))
if relative_path_len > max_path_length:
logger.error("%r length is %d but max length is %d", str(relative_path), relative_path_len, max_path_length)


def main():
parser = argparse.ArgumentParser(description = __doc__)
parser.add_argument('--max-path-length', type = int, default = None,
help = "log error if path is longer than specified value")
parser.add_argument('-v', '--verbose', action = 'store_true', default = False, help = "increase output verbosity")
parser.add_argument('source_dir', type = Path, help = "directory with parameters to read")
parser.add_argument('target_dir', type = Path, help = "directory where parameters are written")
args = parser.parse_args()

if not args.source_dir.is_dir():
parser.error("Invalid source_dir")
if not args.target_dir.is_dir():
args.target_dir.mkdir()

logging.basicConfig()

parameters = load_parameter_file(args.source_dir)
save_parameters_to_dir(parameters, args.target_dir)

if args.max_path_length is not None:
check_path_length(base_dir = args.target_dir, max_path_length = args.max_path_length)

if __name__ == "__main__":
sys.exit(main())