From c366aca8a44c446d0191e7c10afb95f58e08431d Mon Sep 17 00:00:00 2001 From: Ilja Heitlager Date: Tue, 2 May 2017 19:55:26 +0200 Subject: [PATCH] #35 json (#42) * fixig make dev_env * rename null to nullable * adding key and required fields * #35 adding json_schema export * base properties done * #41 strict flag added * some documenation and adding a model set method * completing model.update * first attempt to singer emitter * altering escape/replacement order * updating changelog * pylint shitsu * fix issues --- CHANGELOG.md | 7 +- Makefile | 2 + README.rst | 2 +- docs/concepts/scan-emit.txt | 2 +- docs/example.txt | 2 +- docs/ref/emitter.txt | 20 +---- docs/ref/fields.txt | 30 +++++-- docs/ref/meta.txt | 114 ++++--------------------- docs/ref/model.txt | 4 +- docs/tutorial/tutorial0.txt | 2 +- docs/tutorial/tutorial2.txt | 2 +- docs/tutorial/tutorial3.txt | 4 +- src/data_migrator/emitters/__init__.py | 32 ++++++- src/data_migrator/emitters/base.py | 12 ++- src/data_migrator/emitters/csv.py | 1 - src/data_migrator/emitters/mysql.py | 5 +- src/data_migrator/emitters/singer.py | 42 +++++++++ src/data_migrator/models/__init__.py | 2 +- src/data_migrator/models/base.py | 66 +++++++++++--- src/data_migrator/models/fields.py | 89 +++++++++++++------ src/data_migrator/models/options.py | 65 ++++++++++++++ src/data_migrator/transform.py | 2 + tests/test_contrib.py | 22 ++--- tests/test_defaults.py | 23 ++++- tests/test_fields.py | 60 +++++++++---- tests/test_model.py | 55 +++++++++--- tests/test_options.py | 1 + 27 files changed, 443 insertions(+), 225 deletions(-) create mode 100644 src/data_migrator/emitters/singer.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e5cf5943..9c3df1f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,10 +4,15 @@ This is a simple data-migration package for python lovers. It is declarative lan The format is based on [Keep a Changelog](http://keepachangelog.com/) ## [0.5.1] - 2017-04-24 +### Changed +- pep 263 make shebang virtualenv aware #36 +- edit schema's more inline with json_schema #35 +- simplify semver to final/dev #37 + ### Added - test metadata in circle-ci #25 - pyup.ip added #31 -- simplified semver release to dev/final #37 +- add strict flag #41 ## [0.5.0] - 2017-04-21 diff --git a/Makefile b/Makefile index 41e112b0..3082cb11 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,8 @@ dev_requirements: @pip install -r py.requirements/build.txt dev_env: + @pip install -r py.requirements/docs.txt + @pip install -r py.requirements/build.txt @pip install -r py.requirements/environment.txt tox: diff --git a/README.rst b/README.rst index 0affbec2..435ee475 100644 --- a/README.rst +++ b/README.rst @@ -49,7 +49,7 @@ fast, readable and extendable class Result(models.Model): id = models.IntField(pos=0) # keep id uuid = models.UUIDField() # generate new uuid4 field - a = models.StringField(pos=1, default='NO_NULL', max_length=5, null='NULL', replacement=lambda x:x.upper()) + a = models.StringField(pos=1, default='NO_NULL', max_length=5, nullable='NULL', replacement=lambda x:x.upper()) b = models.StringField(pos=2, name='my_b') if __name__ == "__main__": diff --git a/docs/concepts/scan-emit.txt b/docs/concepts/scan-emit.txt index 415e18c0..b517e668 100644 --- a/docs/concepts/scan-emit.txt +++ b/docs/concepts/scan-emit.txt @@ -12,7 +12,7 @@ transformer will read stdin and send every CSV row to the model for scanning. Ou the fields define a scan loop: #. **select** the specified column from the row. -#. **null** test if not allowed and replace by default. +#. **nullable** test if not allowed and replace by None. #. **validate** the input (if validator is provided). #. **parse** the input (if parser is provided). #. **store** as native python value (aka NULL=>None). diff --git a/docs/example.txt b/docs/example.txt index 90b03527..57d568a5 100644 --- a/docs/example.txt +++ b/docs/example.txt @@ -53,7 +53,7 @@ is helpful. That is why we came up with *data-migrator*. One could simply replac id = models.IntField(pos=0) # keep id uuid = models.UUIDField() # generate new uuid4 field # replace NULLs and trim - a = models.StringField(pos=1, default='NO_NULL', max_length=5, null='NULL', replacement=lambda x:x.upper()) + a = models.StringField(pos=1, default='NO_NULL', max_length=5, nullable='NULL', replacement=lambda x:x.upper()) # parse this field b = models.StringField(pos=2, parse=parse_b, name='my_b') diff --git a/docs/ref/emitter.txt b/docs/ref/emitter.txt index d16f478c..456c1f30 100644 --- a/docs/ref/emitter.txt +++ b/docs/ref/emitter.txt @@ -4,25 +4,7 @@ Emitter class reference .. currentmodule:: data_migrator.emitters -This document covers features of the :class:`~data_migrator.emitter.BaseEmitter` class. Currently the -system has two emitters: ``CSVEmitter`` and ``MySQLEmitter`` implemented, of which the last is the -default emitter. An emitter provides the export format for the scanned and cleaned datasets. It also -provides preambles in the output files, for example to clean the target table before loading it. - -The basic structure for emitting is a combination between ``manager`` and ``emitter``: - -.. code-block:: python - - e = Emitter(manager=Model.objects) - print e.preamble(header=[..my header lines to add..]) - for l in Model.objects.all(): - print e.emit(l) # emit is returning a list of strings! - - -.. note:: - - At this moment *data-migrator* does not an actively take part in schema migrations of any - sort. It is purely about cleaning and transforming data (yet!). +.. automodule:: data_migrator.emitters MySQLEmitter diff --git a/docs/ref/fields.txt b/docs/ref/fields.txt index a882ff0f..5757a5a0 100644 --- a/docs/ref/fields.txt +++ b/docs/ref/fields.txt @@ -7,15 +7,15 @@ Model field reference .. currentmodule: data_migrator.models -This document contains all API references of BaseClass :class:`~.fields.BaseField` +This document contains all API references of BaseClass :class:`~.BaseField` including the `field options`_ and `field types`_ data_migrator offers. .. note:: - Technically, these models are defined in :mod:`data_migrator.models.fields`, but - for convenience they're imported into :mod:`data_migrator.models`, the standard - convention is to use ``from data_migrator import models`` and refer to fields as - ``models.Field`` + Technically, these models are defined in :mod:`data_migrator.models.fields` + , but for convenience they're imported into :mod:`data_migrator.models`, + the standard convention is to use ``from data_migrator import models`` and + refer to fields as ``models.Field`` Field options ============= @@ -53,10 +53,17 @@ value. StringField only has empty string as empty value. With this field it can changed to some other standard value. Consider a Country field as string and setting it to the home country by default. -``null`` --------- +``key`` +------- + +.. attribute:: Field.key + +If set, this indicates the field is a key field for identification of the object. + +``nullable`` +------------ -.. attribute:: Field.null +.. attribute:: Field.nullable If set it will match the source column value and consider this a ``None`` value. By default this attribute is set to ``None``. Note that for none Null fields ``None`` @@ -71,6 +78,13 @@ If set, this is a pre-emit replacement function. This could be used to insert dy replacement lookup select queries, adding more indirection into the data generation. Value could be either function or a string. +``required`` +--------------- + +.. attribute:: Field.required + +If set, this indicates the field is required to be set. + ``parse`` --------- diff --git a/docs/ref/meta.txt b/docs/ref/meta.txt index 647d6ce8..bd8335a3 100644 --- a/docs/ref/meta.txt +++ b/docs/ref/meta.txt @@ -4,113 +4,31 @@ Meta class reference .. currentmodule:: data_migrator.models -This document covers features of the :class:`~data_migrator.models.Meta` class. The meta class -defines model specific settings. +This document covers features of the *Meta* class. +The meta class defines model specific settings and is used as an inner class +in the model: -.. note:: - - Technically, Meta is just a container and forwarded to :class:`~.Options` - -Field options -============= - -The following arguments are available to all field types. All are optional. +.. code-block:: python -``drop_if_none`` ----------------- + from data_migrator import models -.. attribute:: Meta.drop_if_none + class SampleModel(models.Model): + a = models.IntField(pos=1) -Is a list of field names as defined. If set *data-migrator* will check if fields are not None -and drop if one of the columns is. + class Meta: + drop_if_none = True -Any field listed in this attribute is checked after scanning and just before save-ing. +Every model can have its own meta class to define model specific options. .. note:: - Note that only NullXXXFields actually can be ``None`` after scanning and parsing. Non - Null fields are set to their default value. - - -``drop_non_unique`` -------------------- - -.. attribute:: Meta.drop_non_unique - -If ``True``, *data-migrator* will drop values if the column uniqueness check fails -(after parsing). Default is ``False``. - -Any field can be defined as a unique column. Any field set so, is checked after -scanning and just before save-ing. - -``emitter`` ------------ - -.. attribute:: Meta.emitter - -If set, *data-migrator* will use this emitter instead of the default emitter. - -``fail_non_unique`` -------------------- - -.. attribute:: Meta.fail_non_unique - -If ``True``, *data-migrator* will fail as a whole if the column uniqueness check fails -(after parsing). Default is ``False``. - -Any field can be defined as a unique column. Any field set so, is checked after -scanning and just before save-ing. - -``fail_non_validated`` ----------------------- - -.. attribute:: Meta.fail_non_validated - -If ``True``, *data-migrator* will fail as a whole if the column validation check fails -(after parsing). Default is ``False``. - -Any field can have its own validator, this is a rough method to prevent bad data from -being transformed and loaded. - -``file_name`` -------------- - -.. attribute:: Meta.file_name - -If set, *data-migrator* will use this as file_name for the emitter instead of the default -filename based on table_name. - - -``table_name`` --------------- - -.. attribute:: Meta.table_name - -If set, *data-migrator* will use this as table_name for the emitter instead of the default -tablename based on model_name. - - -``prefix`` ----------- - -.. attribute:: Meta.prefix - -If set, *data-migrator* will use this list of statements as a preamble in the generation of -the output statements. By default an emitter uses this to clear the old records. - - -``remark`` ----------- - -.. attribute:: Meta.remark + Technically, Meta is just a container and forwarded to :class:`~.Options` -If set, *data-migrator* will use this as the remark attribute in the Model, default='remark'. -Use this for example if you have a ``remark`` field in your model and need to free the keyword. -``manager`` ------------ +.. autoclass:: data_migrator.models.options.Options + :members: -.. attribute:: Meta.manager +.. note:: -If set, *data-migrator* will use this as the manager for this model. This is useful if -the ``transform`` method needs to be overridden. + Note that only NullXXXFields actually can be ``None`` after scanning and + parsing. Non Null fields are set to their default value. diff --git a/docs/ref/model.txt b/docs/ref/model.txt index c8615aa1..bc0a57d1 100644 --- a/docs/ref/model.txt +++ b/docs/ref/model.txt @@ -5,9 +5,9 @@ Model class reference .. module:: data_migrator.models.base :synopsis: Base for model definitions -.. currentmodule:: data_migrator.models +.. currentmodule:: data_migrator.models.base -This document covers features of the :class:`~data_migrator.models.Model` class. +This document covers features of the :class:`~Model` class. Model diff --git a/docs/tutorial/tutorial0.txt b/docs/tutorial/tutorial0.txt index 97fd0866..d7c78b65 100644 --- a/docs/tutorial/tutorial0.txt +++ b/docs/tutorial/tutorial0.txt @@ -70,7 +70,7 @@ build a simple transformer: id = models.IntField(pos=0) # keep id uuid = models.UUIDField() # generate new uuid4 field # replace NULLs and trim - a = models.StringField(pos=1, default='NO_NULL', max_length=5, null='NULL', replace=lambda x:x.upper()) + a = models.StringField(pos=1, default='NO_NULL', max_length=5, nullable='NULL', replace=lambda x:x.upper()) # parse this field b = models.StringField(pos=2, parse=parse_b, name='my_b') diff --git a/docs/tutorial/tutorial2.txt b/docs/tutorial/tutorial2.txt index b1efc10c..20bd2830 100644 --- a/docs/tutorial/tutorial2.txt +++ b/docs/tutorial/tutorial2.txt @@ -18,7 +18,7 @@ can be set on a model basis in the Meta block class Result(models.Model): id = models.IntField(pos=0) # keep id a = models.StringField(pos=1) - b = models.StringField(pos=2, null=None) + b = models.StringField(pos=2, nullable=None) class Meta: drop_if_none = ['b'] diff --git a/docs/tutorial/tutorial3.txt b/docs/tutorial/tutorial3.txt index 71935e8a..1494ee8c 100644 --- a/docs/tutorial/tutorial3.txt +++ b/docs/tutorial/tutorial3.txt @@ -29,7 +29,7 @@ many, this is easily achieved by adding a dedicated manager: class Result(models.Model): id = models.IntField(pos=0) # keep id a = models.StringField(pos=1) - b = models.StringField(pos=2, null=None) + b = models.StringField(pos=2, nullable=None) class Meta: manager = ResultManager @@ -68,7 +68,7 @@ example permissions or links to models) class Result(models.Model): id = models.IntField(pos=0) # keep id a = models.StringField(pos=1) - b = models.StringField(pos=2, null=None) + b = models.StringField(pos=2, nullable=None) if __name__ == "__main__": diff --git a/src/data_migrator/emitters/__init__.py b/src/data_migrator/emitters/__init__.py index de5df095..e12a0670 100644 --- a/src/data_migrator/emitters/__init__.py +++ b/src/data_migrator/emitters/__init__.py @@ -2,13 +2,37 @@ # -*- coding: UTF-8 -*- """Emitters are used to export models to output format. -This module contains all classes for emitters: base and actuals +This module contains all classes for emitters: base and actuals. Currently +the system has two emitters: :class:`~.CSVEmitter` and :class:`~.MySQLEmitter` +implemented, of which the last is the default emitter. An emitter provides the +export format for the scanned and cleaned datasets. It also provides preambles +and postambles in the output files, for example to clean the target table +before loading it. -* :class:`BaseEmitter` -* :class:`MySQLEmitter` -* :class:`CSVEmitter` +The following classes are defined in this module: + +* :class:`~.BaseEmitter` +* :class:`~.MySQLEmitter` +* :class:`~.CSVEmitter` + +The basic structure for emitting is a combination between +:class:`~.BaseManager` and :class:`~.BaseEmitter`: + +.. code-block:: python + + e = Emitter(manager=Model.objects) + print e.preamble(header=[..my header lines to add..]) + for l in Model.objects.all(): + print e.emit(l) # emit is returning a list of strings! + +.. note:: + + At this moment *data-migrator* does not an actively take part in schema + migrations of any sort. It is purely about cleaning and transforming + data (yet!). """ from .mysql import MySQLEmitter # noqa from .csv import CSVEmitter # noqa +from .singer import SingerEmitter # noqa diff --git a/src/data_migrator/emitters/base.py b/src/data_migrator/emitters/base.py index b6ae77aa..0ed3e271 100644 --- a/src/data_migrator/emitters/base.py +++ b/src/data_migrator/emitters/base.py @@ -36,8 +36,8 @@ def emit(self, o): def filename(self): '''generate filename for this emitter. - generates a filename bases on :attr:`BaseEmitter.extension` and either - :attr:`~.Meta.file_name` or :attr:`~.Meta.table_name` + generates a filename bases on :attr:`~.BaseEmitter.extension` and + either :attr:`~.Options.file_name` or :attr:`~.Options.table_name` Returns: str: filename @@ -58,3 +58,11 @@ def preamble(self, headers): list: preamble lines ''' raise NotImplementedError + + def postamble(self): + '''generate a postamble for the file to emit. + + Returns: + list: postamble lines + ''' + return [] diff --git a/src/data_migrator/emitters/csv.py b/src/data_migrator/emitters/csv.py index 094d31f8..19682b50 100644 --- a/src/data_migrator/emitters/csv.py +++ b/src/data_migrator/emitters/csv.py @@ -14,7 +14,6 @@ class CSVEmitter(BaseEmitter): Attributes: base_template: base template to output the object extension (str): file extension for output file of this emitter - ''' extension = '.csv' base_template = '''%s''' diff --git a/src/data_migrator/emitters/mysql.py b/src/data_migrator/emitters/mysql.py index a9afc0dc..3a571505 100644 --- a/src/data_migrator/emitters/mysql.py +++ b/src/data_migrator/emitters/mysql.py @@ -15,7 +15,7 @@ class MySQLEmitter(BaseEmitter): base_template: base template to output the object extension (str): file extension for output file of this emitter ''' - base_template = '''INSERT INTO `%s` (%s) VALUES (%s);''' + base_template = '''INSERT %sINTO `%s` (%s) VALUES (%s);''' extension = '.sql' def __init__(self, *args, **kwargs): @@ -58,6 +58,7 @@ def _prepare(self): c = [f.name for k, f in self.meta.fields.items() if not isinstance(f, HiddenField)] columns = ", ".join(["`" + x + "`" for x in c]) replacements = ", ".join(["%(" + x + ")s" for x in c]) - template = self.base_template % (self.meta.table_name, columns, replacements) + _ignore = 'IGNORE ' if self.meta.drop_non_unique else '' + template = self.base_template % (_ignore, self.meta.table_name, columns, replacements) log.debug('emit template: %s', template) self._template = template diff --git a/src/data_migrator/emitters/singer.py b/src/data_migrator/emitters/singer.py new file mode 100644 index 00000000..c070f26f --- /dev/null +++ b/src/data_migrator/emitters/singer.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# -*- coding: UTF-8 -*- +import json + +from data_migrator.emitters.base import BaseEmitter +from data_migrator.utils import default_logger + +log = default_logger() + + +class SingerEmitter(BaseEmitter): + '''Singer.IO emitter to output transformations into singer format + + Attributes: + extension (str): file extension for output file of this emitter + ''' + extension = '.sng' + + def __init__(self, *args, **kwargs): + super(SingerEmitter, self).__init__(*args, **kwargs) + + def emit(self, l): + '''Output the result set of an object a singer.io record''' + res = [] + if hasattr(l, self.meta.remark): + res.append("# %s" % getattr(l, self.meta.remark)) + _record = {'type': "RECORD", "stream": self.meta.table_name, "record": l.emit()} + res.append(json.dumps(_record)) + return res + + def preamble(self, headers): + '''Singer has a schema as preamble''' + # before we spit out the data + _schema = self.model_class.json_schema() + _schema = {'type': "SCHEMA", "stream": self.meta.table_name, "schema": _schema, "key_properties": _schema.get('required', [])} + return [json.dumps(_schema)] + + def postamble(self): + '''Singer has a state as postamble''' + _state = self.model_class.json_schema() + _state = {'type': "STATE", "stream": self.meta.table_name, "value": []} + return [json.dumps(_state)] diff --git a/src/data_migrator/models/__init__.py b/src/data_migrator/models/__init__.py index 6a68bf3a..0f5736c7 100644 --- a/src/data_migrator/models/__init__.py +++ b/src/data_migrator/models/__init__.py @@ -10,7 +10,7 @@ """ from .base import Model # noqa -from .manager import SimpleManager # noqa +from .manager import BaseManager, SimpleManager # noqa from .fields import (IntField, NullIntField, StringField, NullField, # noqa NullStringField, BooleanField, UUIDField, # noqa JSONField, MappingField, HiddenField) # noqa diff --git a/src/data_migrator/models/base.py b/src/data_migrator/models/base.py index 9315c9fb..1a0eb89e 100644 --- a/src/data_migrator/models/base.py +++ b/src/data_migrator/models/base.py @@ -27,7 +27,7 @@ def __new__(mcs, name, bases, attrs): module = attrs.pop('__module__') new_class = super_new(mcs, name, bases, {'__module__': module}) - # Chek if we have a meta class + # Check if we have a meta class attr_meta = attrs.pop('Meta', None) if not attr_meta: meta = getattr(new_class, 'Meta', None) @@ -40,7 +40,8 @@ def __new__(mcs, name, bases, attrs): if isinstance(d, BaseField): fields[n] = d setattr(d, 'name', getattr(d, 'name') or n) - # fields[d.name]=d + + # now prepare the meta class/options setattr(new_class, '_meta', Options(new_class, meta, fields=fields)) # instantiate the manager @@ -52,14 +53,14 @@ def __new__(mcs, name, bases, attrs): class Model(with_metaclass(ModelBase)): - """Model is foundation for every transformation + """Model is foundation for every transformation. - Each non-abstract :class:`~data_migrator.models.Model` class must have a - :class:`~data_migrator.models.Manager` instance added to it. - Data-migrator ensures that in your model class you have at least a - standard ``SimpleManager`` specified. If you add your own - :class:`~data_migrator.models.Manager` instance attribute, the default one - does not appear. + Each non-abstract :class:`~.Model` class must have a + :class:`~.BaseManager` instance added to it. *data-migrator* ensures that + in your model class you have at least a standard :class:`~.SimpleManager` + specified, on case you do add your own specialization of + :class:`~.BaseManager` through the Meta class :attr:`~.Options.manager` + attribute. Attributes: objects: reference to manager @@ -78,8 +79,10 @@ def __init__(self, **kwargs): elif k in f: setattr(self, k, _fields[k]._value(v)) f.remove(k) - else: + elif _meta.strict: raise DataException("trying to set unknown field %s" % k) + else: + setattr(self, k, v) # add missing fields, put in None values (to be replaced by default # at emit) for k in f: @@ -113,6 +116,25 @@ def emit(self, escaper=None): res[f.name] = f.emit(self.__dict__[k], escaper) return res + def update(self, **kwargs): + '''Update method for chaining operations. + + Returns: + self, so that methods can be chained + + Raises: + :class:`~.DataException`: raised if trying to set non defined field + and strict model. + ''' + _meta = self.__class__._meta + _fields = self.__class__._meta.fields.keys() + for k, v in kwargs.items(): + if k in _fields or not _meta.strict: + setattr(self, k, v) + else: + raise DataException("trying to set unknown field %s" % k) + return self + def save(self): '''Save this object and add it to the list. @@ -122,6 +144,30 @@ def save(self): self.objects.save(self) return self + @classmethod + def json_schema(cls): + '''generate the json schema representation of this model. + + Returns: + dict with python representation of json schema. + ''' + _fields = [f for f in cls._meta.fields.values() + if not isinstance(f, HiddenField)] + _required = [x.name for x in _fields if x.required] + _key = [x.name for x in _fields if x.key] + _required = list(set(_required + _key)) + _res = {} + for f in _fields: + _res.update(f.json_schema()) + _res = {'properties': _res, 'type': 'object'} + if _required: + _res['required'] = _required + if cls._meta.strict: + _res['additionalProperties'] = False + elif cls._meta.strict is not None: + _res['additionalProperties'] = True + return _res + def __repr__(self): try: u = str(self) diff --git a/src/data_migrator/models/fields.py b/src/data_migrator/models/fields.py index e00003c6..e088ebf8 100644 --- a/src/data_migrator/models/fields.py +++ b/src/data_migrator/models/fields.py @@ -22,22 +22,26 @@ class BaseField(object): '''Base column definition for the transformation DSL ''' creation_order = 0 + schema_type = 'object' def __init__(self, - pos=-1, name="", - default=None, null="NULL", - replacement=None, parse=None, validate=None, - max_length=None, unique=False, - validate_output=None): + pos=-1, name="", + default=None, nullable="NULL", + key=False, required=False, + replacement=None, parse=None, validate=None, + max_length=None, unique=False, + validate_output=None): # default value if null self.default = default if default is not None else getattr(self.__class__, 'default', default) + # key indicated key field + self.key = key # fixed position in the row to read self.max_length = max_length # name of this field (will be set in Model class construction) self.name = name # input string that defines null -> None - self.null = null + self.nullable = nullable # some function to apply to value self.parse = parse or getattr(self.__class__, 'parse', None) self.pos = int(pos) @@ -45,6 +49,9 @@ def __init__(self, if isstr(replacement): replacement = partial(_replace, replacement) self.replace = getattr(self.__class__, 'replace', replacement) + # required indicates must be filled in + self.required = required + # unique indicates a unique field self.unique = unique # some function to apply to value self.validate = validate or getattr(self.__class__, 'validate', None) @@ -55,12 +62,24 @@ def __init__(self, BaseField.creation_order += 1 def scan(self, row): - '''scan row and harvest distinct value''' + '''scan row and harvest distinct value. + + Takes a row of data and parses the required fields out of this. + + Args: + row (list): array of source data + + Returns: + parsed and process value. + Raises: + :class:`~.ValidationException`: raised if explicit validation + fails. + ''' # see if we want to read a column in the row v = None if self.pos >= 0: # do null check if enabled - if self.null is not None and row[self.pos] == self.null: + if self.nullable is not None and row[self.pos] == self.nullable: return v v = row[self.pos] if self.validate and not self.validate(v): @@ -74,21 +93,32 @@ def scan(self, row): return self._value(v) def emit(self, v, escaper=None): + '''helper function to export this field''' if self.max_length and isstr(v): v = v[:self.max_length] v = v or self.default if self.validate_output and not self.validate_output(v): raise ValidationException("not able to validate %s=%s" % (self.name, v)) # allow external function (e.g. SQL escape) - if escaper: - v = escaper(v) # check if we have a replacement string to take into account if self.replace: v = self.replace(v) + elif escaper: + v = escaper(v) return v - def _value(self, value): - return value + def json_schema(self): + '''generate json_schema representation of this field''' + t = self.schema_type + if 'Null' in self.__class__.__name__: + t = [t, "null"] + t = {'type': t} + if self.key: + t['key'] = True + return {self.name: t} + + def _value(self, v): # pylint: disable=R0201 + return v class HiddenField(BaseField): @@ -103,9 +133,10 @@ class HiddenField(BaseField): class IntField(BaseField): '''Basic integer field handler''' default = 0 + schema_type = 'integer' - def _value(self, value): - return int(value) if isstr(value) else value + def _value(self, v): + return int(v) if isstr(v) else v class NullIntField(BaseField): @@ -114,16 +145,19 @@ class NullIntField(BaseField): a field that accepts the column to be integer and can also be None, which is not the same as 0 (zero). ''' - def _value(self, value): - return int(value) if isstr(value) else value + schema_type = 'integer' + + def _value(self, v): + return int(v) if isstr(v) else v class StringField(BaseField): '''String field handler, a field that accepts the column to be string.''' default = "" + schema_type = 'string' - def _value(self, value): - return value.strip() if isinstance(value, str) else value + def _value(self, v): + return v.strip() if isinstance(v, str) else v class NullStringField(BaseField): @@ -132,8 +166,10 @@ class NullStringField(BaseField): a field that accepts the column to be string and can also be None, which is not the same as empty string (""). ''' - def _value(self, value): - return value.strip() if isinstance(value, str) else value + schema_type = 'string' + + def _value(self, v): + return v.strip() if isinstance(v, str) else v class BooleanField(BaseField): @@ -143,10 +179,11 @@ class BooleanField(BaseField): into ``True`` or ``False`` otherwise. ''' default = False + schema_type = 'boolean' - def _value(self, value): + def _value(self, v): try: - return value.lower()[0] in ['y', 't', '1'] + return v.lower()[0] in ['y', 't', '1'] except (AttributeError, IndexError): return False @@ -164,7 +201,9 @@ def _value(self, v): class NullField(DefaultField): '''NULL returning field by generating None''' - pass + def json_schema(self): + '''generate json_schema representation of this field''' + return {self.name: {'type': 'null'}} class UUIDField(BaseField): @@ -172,6 +211,8 @@ class UUIDField(BaseField): a field that generates a ``str(uuid.uuid4())`` ''' + schema_type = 'string' + def __init__(self, *args, **kwargs): kwargs['default'] = None super(UUIDField, self).__init__(*args, **kwargs) @@ -228,7 +269,7 @@ def emit(self, v, escaper=None): raise DataException("%s - %s not in map" % (self.name, v)) else: v = self.data_map.get(v, self.default if self.default is not None - else v) + else v) if self.as_json: v = json.dumps(v) return super(MappingField, self).emit(v, escaper) diff --git a/src/data_migrator/models/options.py b/src/data_migrator/models/options.py index f872e041..270f61ad 100644 --- a/src/data_migrator/models/options.py +++ b/src/data_migrator/models/options.py @@ -5,6 +5,7 @@ from data_migrator.exceptions import DefinitionException +# list of extendable options for the Meta class _options = { 'drop_if_none': [], 'drop_non_unique': False, @@ -13,6 +14,7 @@ 'fail_not_validated': False, 'file_name': None, 'prefix': None, + 'strict': None, 'remark': 'remark' } @@ -23,6 +25,69 @@ class _EmptyMeta: class Options(object): def __init__(self, cls, meta, fields): + """Options is the Model Meta data container + + The Options class is the true meta data container and parser for a + :class:`~.Model`. It contains all flag and fields references for model + handling. Use these flags in the Meta sub class of a :class:`~.Model`. + + Args: + cls: the Model this Options object is refering too + meta: the reference to a Meta class + fields (list): list of all field definitions + + Attributes: + drop_if_none (list): names of the columns to check for None, Is a + list of field names as defined. If set *data-migrator* will + check if fields are not None and drop if one of the columns is. + drop_non_unique (boolean): If ``True``, *data-migrator* will drop + values if the column uniqueness check fails (after parsing). + Default is ``False``. + + Any field can be defined as a unique column. Any field set so, + is checked after scanning and just before save-ing. + emitter (:class:`~.BaseEmitter`): If set, *data-migrator* will use + this emitter instead of the default emitter. + fail_non_unique (boolean): If ``True``, *data-migrator* will fail + as a whole if the column uniqueness check fails (after + parsing). Default is ``False``. + + Any field can be defined as a unique column. Any field set so, + is checked after scanning and just before save-ing. + fail_non_validated (boolean): If ``True``, *data-migrator* will + fail as a whole if the column validation check fails (after + parsing). Default is ``False``. + + Any field can have its own validator, this is a rough method to + prevent bad data from being transformed and loaded. + file_name (string): If set, *data-migrator* will use this as + file_name for the emitter instead of the default filename based + on model_name. + table_name (string): If set, *data-migrator* will use this as + table_name for the emitter instead of the default table_name + based on model_name. + prefix (string): If set, *data-migrator* will use this list of + statements as a preamble in the generation of the output + statements. + + By default an emitter uses this to clear the old state. + remark (string): If set, *data-migrator* will use this as the + remark attribute in the Model, default='remark'. Use this for + example if you have a ``remark`` field in your model and need + to free the keyword. + strict (boolean): If ``True``, *data-migrator* will be strict on + the model and does not allow values outside of the definitions. + Default is ``None``. + manager (:class:`~.BaseManager`): If set, *data-migrator* will use + this as the manager for this model. + + This is useful if the ``transform`` method needs to be + overridden. + + Raises: + :class:`~.DefinitionException`: raised if any of the defintions is + not to spec. + """ self.cls = cls self.meta = meta or _EmptyMeta self.model_name = cls.__name__ diff --git a/src/data_migrator/transform.py b/src/data_migrator/transform.py index 06ecbdce..c79dc017 100644 --- a/src/data_migrator/transform.py +++ b/src/data_migrator/transform.py @@ -131,6 +131,8 @@ def _write_output(self): except AssertionError as err: raise ValidationException("object: %d, %s" % (lineno, err)) lineno += 1 + for l in _emitter.postamble(): + f.write(l + '\n') if f != sys.stdout: f.close() self.log.info( diff --git a/tests/test_contrib.py b/tests/test_contrib.py index 70337e0a..13742fcd 100644 --- a/tests/test_contrib.py +++ b/tests/test_contrib.py @@ -12,11 +12,11 @@ class TestDutch(unittest.TestCase): def test_phone(self): '''test phone cleaner''' l = [ - ('00 31 6 - 20 20 20 20','+31620202020'), - ('06 20 20 20 20','+31620202020'), - ('020 -123 345 6','+31201233456'), - ('+440.203.020.23','+44020302023'), - ('+440 ada 203.020 // 23','+44020302023'), + ('00 31 6 - 20 20 20 20', '+31620202020'), + ('06 20 20 20 20', '+31620202020'), + ('020 -123 345 6', '+31201233456'), + ('+440.203.020.23', '+44020302023'), + ('+440 ada 203.020 // 23', '+44020302023'), ] for i, o in l: self.assertEqual(o, clean_phone(i)) @@ -62,12 +62,12 @@ def test_reader_no_header_reverse(self): def test_reader_fail(self): o = [ - ('bla', 'value', u'key,value\nhello,world\nhappy,camper\n',',',False,DefinitionException), - ('key', 'bla', u'key,value\nhello,world\nhappy,camper\n',',',False,DefinitionException), - (0, 'value', u'key,value\nhello,world\nhappy,camper\n',',',False,DefinitionException), - ('key', 0, u'key,value\nhello,world\nhappy,camper\n',',',False,DefinitionException), - ('key', 'value', u'key,value\nhello,world\nhello,camper\n',',',False,DefinitionException), - ('key', 'value', u'key,value\nhello,world\nhello,camper\n',',',True,NonUniqueDataException), + ('bla', 'value', u'key,value\nhello,world\nhappy,camper\n', ',', False, DefinitionException), + ('key', 'bla', u'key,value\nhello,world\nhappy,camper\n', ',', False, DefinitionException), + (0, 'value', u'key,value\nhello,world\nhappy,camper\n', ',', False, DefinitionException), + ('key', 0, u'key,value\nhello,world\nhappy,camper\n', ',', False, DefinitionException), + ('key', 'value', u'key,value\nhello,world\nhello,camper\n', ',', False, DefinitionException), + ('key', 'value', u'key,value\nhello,world\nhello,camper\n', ',', True, NonUniqueDataException), ] for k,v,f,d,u,exc in o: f = StringIO(f) diff --git a/tests/test_defaults.py b/tests/test_defaults.py index 6d98a0fd..a5c78a64 100644 --- a/tests/test_defaults.py +++ b/tests/test_defaults.py @@ -8,7 +8,7 @@ class DefaultModel(models.Model): a = models.HiddenField(pos=0) - b = models.IntField(pos=1) + b = models.IntField(pos=1, key=True) c = models.NullIntField(pos=2) d = models.StringField(pos=3) e = models.NullStringField(pos=4) @@ -18,10 +18,9 @@ class DefaultModel(models.Model): i = models.JSONField(pos=8) j = models.MappingField(pos=8, data_map={"M":"Male", "F": "Female"}) -class TestModel(unittest.TestCase): +class TestDefaultModel(unittest.TestCase): def test_basic(self): - '''Model Testing''' m = DefaultModel() self.assertIsNone(m.a) self.assertEqual(m.b, 0) @@ -35,7 +34,6 @@ def test_basic(self): self.assertEqual(m.j, None) def test_init(self): - '''Model Testing''' m = DefaultModel(a="bla", b=2, c=None, d="d", e=None, f=True, g="bla", h="uuid", i=["json"], j="M") self.assertEqual(m.a, "bla") self.assertEqual(m.b, 2) @@ -47,3 +45,20 @@ def test_init(self): six.assertRegex(self, m.h, u'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}') self.assertEqual(m.i, ["json"]) self.assertEqual(m.j, "M") + + def test_json_fields(self): + m = DefaultModel._meta + f = m.fields + self.assertEqual(f['a'].json_schema(), {'a': {'type': 'object'}}) + self.assertEqual(f['b'].json_schema(), {'b': {'type': 'integer', 'key': True}}) + self.assertEqual(f['c'].json_schema(), {'c': {'type': ['integer', 'null']}}) + self.assertEqual(f['d'].json_schema(), {'d': {'type': 'string'}}) + self.assertEqual(f['e'].json_schema(), {'e': {'type': ['string', 'null']}}) + self.assertEqual(f['f'].json_schema(), {'f': {'type': 'boolean'}}) + self.assertEqual(f['g'].json_schema(), {'g': {'type': 'null'}}) + self.assertEqual(f['h'].json_schema(), {'h': {'type': 'string'}}) + self.assertEqual(f['i'].json_schema(), {'i': {'type': 'object'}}) + self.assertEqual(f['j'].json_schema(), {'j': {'type': 'object'}}) + + def test_json_schema(self): + self.assertEqual(DefaultModel.json_schema(), {'properties': {'b': {'type': 'integer', 'key': True}, 'c': {'type': ['integer', 'null']}, 'd': {'type': 'string'}, 'e': {'type': ['string', 'null']}, 'f': {'type': 'boolean'}, 'g': {'type': 'null'}, 'h': {'type': 'string'}, 'i': {'type': 'object'}, 'j': {'type': 'object'}}, 'type': 'object', 'required': ['b']}) diff --git a/tests/test_fields.py b/tests/test_fields.py index 3234dbe5..1efea274 100644 --- a/tests/test_fields.py +++ b/tests/test_fields.py @@ -11,36 +11,59 @@ class TestFields(unittest.TestCase): def test_basics(self): '''get the basics of parsing and emitting''' - f = models.IntField(pos=0) + f = models.IntField(pos=0, name='f') self.assertEqual(f.default, 0) - self.assertEqual(f.scan(row=["10","20"]), 10) + self.assertFalse(f.key) + self.assertFalse(f.required) + self.assertEqual(f.scan(row=["10", "20"]), 10) self.assertEqual(f.emit(10), 10) + self.assertEqual(f.json_schema(), {'f': {'type': 'integer'}}) def test_replacement_string(self): '''replacement facility''' - f = models.StringField(replacement='hello {}') + f = models.StringField(replacement='hello {}', name='f') self.assertEqual(f.emit("world"), "hello world") + self.assertEqual(f.json_schema(), {'f': {'type': 'string'}}) + + def test_replacement_escape_string(self): + '''replacement precedence''' + f = models.StringField(replacement='hello {}', name='f') + self.assertEqual(f.emit("world", escaper=lambda x: "xx%sxx" % x), + "hello world") + self.assertEqual(f.json_schema(), {'f': {'type': 'string'}}) def test_functions(self): '''check the functions for parsing and emitting''' - f1 = lambda x:abs(int(x)) - f2 = lambda x:"number = %s"%x + f1 = lambda x: abs(int(x)) + f2 = lambda x: "number = %s"%x f = models.IntField(pos=0, parse=f1, replacement=f2) - self.assertEqual(f.scan(row=["-10","20"]), 10) + self.assertEqual(f.scan(row=["-10", "20"]), 10) self.assertEqual(f.emit(10), 'number = 10') - self.assertEqual(f.emit(10, escaper=lambda x:"xx%sxx" % x), 'number = xx10xx') + self.assertEqual(f.emit(10, escaper=lambda x: "xx%sxx" % x), + 'number = 10') + f3 = models.IntField(pos=0, parse=f1, replacement="number = {}") + self.assertEqual(f3.scan(row=["-10", "20"]), 10) + self.assertEqual(f3.emit(10), 'number = 10') + self.assertEqual(f3.emit(10, escaper=lambda x: "xx%sxx" % x), + 'number = 10') def test_default_null(self): '''null handling''' - f = models.IntField(pos=0, null="NULL", default=10) - self.assertEqual(f.scan(row=["NULL","20"]), None) + f = models.IntField(pos=0, nullable="NULL", default=10) + self.assertEqual(f.scan(row=["NULL", "20"]), None) self.assertEqual(f.default, 10) self.assertEqual(f.emit(None), 10) + def test_set_fields(self): + '''null handling''' + f = models.IntField(pos=0, key=True, required=True) + self.assertTrue(f.key) + self.assertTrue(f.required) + def test_exception_int(self): '''exception generation''' f = models.IntField(pos=0) - self.assertRaises(ValueError, f.scan, row=["BLA","20"]) + self.assertRaises(ValueError, f.scan, row=["BLA", "20"]) def test_string_length(self): '''build in string trimming''' @@ -49,37 +72,39 @@ def test_string_length(self): def test_null_string(self): '''dedicated null string fields''' - f = models.NullStringField(pos=0) + f = models.NullStringField(pos=0, name='f') r = f.scan(row=["NULL"]) self.assertIsNone(r) self.assertEqual(f.emit(r, escaper=sql_escape), "NULL") + self.assertEqual(f.json_schema(), {'f': {'type': ['string', 'null']}}) def test_null_int(self): '''dedicated null string fields''' - f = models.NullIntField(pos=0) + f = models.NullIntField(pos=0, name='f') r = f.scan(row=["NULL"]) self.assertIsNone(r) self.assertEqual(f.emit(r, escaper=sql_escape), "NULL") + self.assertEqual(f.json_schema(), {'f': {'type': ['integer', 'null']}}) def test_parse_value(self): '''add a parse function for a field''' f = models.IntField(pos=0, parse=lambda x: int(x) * 2) - self.assertEqual(f.scan(row=["10","20"]), 20) + self.assertEqual(f.scan(row=["10", "20"]), 20) def test_parse_row(self): '''add a parse function for a field''' f = models.IntField(parse=lambda x: int(x[1]) * 2) - self.assertEqual(f.scan(row=["10","20"]), 40) + self.assertEqual(f.scan(row=["10", "20"]), 40) def test_validation(self): '''validation exception generation''' f = models.IntField(pos=0, validate=lambda x: int(x) < 100) - self.assertRaises(ValidationException, f.scan, row=["200","20"]) + self.assertRaises(ValidationException, f.scan, row=["200", "20"]) def test_mapping_field(self): '''basic mapping field''' f = models.MappingField(pos=0, default="bad", data_map={"10": "hello", "200": "world"}) - self.assertEqual(f.scan(row=["200","20"]), "200") + self.assertEqual(f.scan(row=["200", "20"]), "200") self.assertEqual(f.emit("10"), "hello") self.assertEqual(f.emit("200"), "world") self.assertEqual(f.emit("mis"), "bad") @@ -103,9 +128,10 @@ def test_mapping_field_strict(self): def test_uuid_field(self): '''uuid field''' - f = models.UUIDField() + f = models.UUIDField(name='f') self.assertIsNone(f.default) self.assertEqual(f.emit("some value"), "some value") + self.assertEqual(f.json_schema(), {'f': {'type': 'string'}}) def test_uuid_field_default(self): '''uuid field, trying to set default''' diff --git a/tests/test_model.py b/tests/test_model.py index 299f1384..32de2d65 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -6,11 +6,11 @@ from data_migrator.models import Model, StringField, NullField, UUIDField from data_migrator.exceptions import DataException -class TrailModel(Model): - a = StringField(pos=0) +class TrialModel(Model): + a = StringField(pos=0, key=True) b = StringField(pos=1) empty = NullField() - uuid = UUIDField() + uuid = UUIDField(required=True) class Meta: table_name = "new_name" @@ -26,59 +26,86 @@ class BasicModel(Model): def test_default_init(self): '''model default initialization''' - o = TrailModel() + o = TrialModel() self.assertEqual(o.a, '') self.assertEqual(o.b, '') self.assertIsNone(o.empty) + self.assertIsNone(o._meta.strict) self.assertIsNotNone(o.uuid) self.assertNotEqual(o.uuid, '') - self.assertEqual(str(o), 'TrailModel object') + self.assertEqual(str(o), 'TrialModel object') def test_init(self): '''model initialization''' d = {"a": "hello", "b": "World"} - o1 = TrailModel(a="hello", b="world", empty="somevalue", uuid='bla') - o2 = TrailModel(**d) + o1 = TrialModel(a="hello", b="world", empty="somevalue", uuid='bla') + o2 = TrialModel(**d) self.assertEqual(o1.a, "hello") self.assertEqual(o1.empty, None) self.assertNotEqual(o1.uuid, 'bla') self.assertIsNotNone(o1.uuid) self.assertEqual(o2.a, "hello") - self.assertEqual(o2._meta.model_name, "TrailModel") + self.assertEqual(o2._meta.model_name, "TrialModel") self.assertEqual(o2._meta.table_name, "new_name") def test_fail_extra_fields(self): '''don't except extra fields''' d = {"a":"hello", "b":"World", "c":"fail"} - self.assertRaises(DataException, TrailModel, a="hello", b="world", c="fail") - self.assertRaises(DataException, TrailModel, **d) + self.assertTrue(TrialModel(**d)) + t, TrialModel._meta.strict = TrialModel._meta.strict, True + self.assertRaises(DataException, TrialModel, a="hello", b="world", c="fail") + self.assertRaises(DataException, TrialModel, **d) + TrialModel._meta.strict = t def test_remark(self): '''can set an addtional remark on this object''' remark = "additional remark" d = {"a": "hello", "b": "World", "description": remark} - o1 = TrailModel(**d) + o1 = TrialModel(**d) self.assertEqual(o1.description, remark) def test_scan_row(self): '''can set object based on scan''' row = ['hello', 'world'] - o1 = TrailModel.objects.scan_row(row) + o1 = TrialModel.objects.scan_row(row) self.assertEqual(o1[0].a, 'hello') + def test_set(self): + '''set values in a chain''' + row = ['hello', 'world'] + o1 = TrialModel().scan(row).update(a='hallo') + self.assertEqual(o1.a, 'hallo') + + def test_set_fail(self): + '''set values in a chain''' + row = ['hello', 'world'] + o1 = TrialModel().scan(row).update(a='hallo') + t, TrialModel._meta.strict = TrialModel._meta.strict, True + self.assertRaises(DataException, o1.update, d='hallo') + TrialModel._meta.strict = t + def test_default_emit(self): '''default values are returned on emit''' - o = TrailModel() + o = TrialModel() e = o.emit() self.assertEqual(e['a'], '') self.assertEqual(e['b'], '') def test_init_emit(self): '''(default) values are returned on emit''' - o = TrailModel(a="hello", b=None) + o = TrialModel(a="hello", b=None) e = o.emit() self.assertEqual(e['a'], 'hello') self.assertEqual(e['b'], '') + def test_json_schema(self): + d = {"a": "hello", "b": "World", "c": "fail"} + m = TrialModel(**d) + self.assertTrue(m) + t, TrialModel._meta.strict = TrialModel._meta.strict, True + # print(TrialModel.json_schema()) + # print(m.json_schema()) + TrialModel._meta.strict = t + if __name__ == '__main__': unittest.main() diff --git a/tests/test_options.py b/tests/test_options.py index 6cb49d85..581ed4ef 100644 --- a/tests/test_options.py +++ b/tests/test_options.py @@ -30,6 +30,7 @@ class MetaA: self.assertFalse(o.drop_non_unique) self.assertFalse(o.fail_non_unique) self.assertEqual(o.file_name, None) + self.assertEqual(o.strict, None) self.assertTrue(str(o).startswith('Options:'))