Skip to content

Commit

Permalink
Added new command 'tdda detect' for CSV/Feather files.
Browse files Browse the repository at this point in the history
Usage is:
    tdda detect foo.csv foo.tdda output.csv

which is equivalent to:
    tdda verify foo.csv foo.tdda --detect output.csv

No new functionality, it's just slightly easier syntax on the command line
if you're doing detection rather than verification.

It takes all the same optional flags as verify does.
  • Loading branch information
simonbrownsb committed Apr 25, 2018
1 parent 95eb446 commit 8cd396d
Show file tree
Hide file tree
Showing 15 changed files with 544 additions and 98 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from setuptools import setup, find_packages

# version import is from tdda subdirectory here, not from some other install.
from tdda.version import version as __version__

def read(fname):
Expand Down
13 changes: 13 additions & 0 deletions tdda/constraints/db/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
:py:func:`verify_db_table`:
Verify (check) a single database table, against a set of previously
discovered constraints.
:py:func:`detect_db_table`:
Verify (check) a single database table, against a set of previously
discovered constraints.
"""
from __future__ import division
from __future__ import print_function
Expand Down Expand Up @@ -310,6 +314,15 @@ def verify_db_table(dbtype, db, tablename, constraints_path, epsilon=None,
VerificationClass=DatabaseVerification, **kwargs)


def detect_db_table(dbtype, db, tablename, constraints_path, epsilon=None,
type_checking='strict', testing=False, **kwargs):
"""
Detect failures from verification of constraints
"""
raise NotImplementedException('Detection is not implemented (yet) '
'for databases.')


def discover_db_table(dbtype, db, tablename, inc_rex=False):
"""
Automatically discover potentially useful constraints that characterize
Expand Down
93 changes: 93 additions & 0 deletions tdda/constraints/db/detect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# -*- coding: utf-8 -*-

"""
Support for database constraint detection from the command-line tool
"""

from __future__ import division
from __future__ import print_function

USAGE = '''
Parameters:
* table is one of:
- a database table name
- a schema-qualified table name of the form schema.table
- a database table name qualified by database type, of the
form dbtype:table or dbtype:schema.table
* constraints.tdda is a JSON .tdda file constaining constraints.
* detection output file is not implemented yet.
'''

import argparse
import os
import sys

from tdda import __version__
from tdda.constraints.flags import detect_parser, detect_flags
from tdda.constraints.db.constraints import detect_db_table
from tdda.constraints.db.drivers import (database_connection, parse_table_name,
database_arg_parser,
database_arg_flags)


def detect_database_table_from_file(table, constraints_path,
conn=None, dbtype=None, db=None,
host=None, port=None, user=None,
password=None, **kwargs):
"""
detect using the given database table, against constraints in the .tdda
file specified.
Not implemented
"""
(table, dbtype) = parse_table_name(table, dbtype)
db = database_connection(table=table, conn=conn, dbtype=dbtype, db=db,
host=host, port=port,
user=user, password=password)
print(detect_db_table(dbtype, db, table, constraints_path, **kwargs))


def get_detect_params(args):
parser = database_arg_parser(verify_parser, USAGE)
parser.add_argument('table', nargs=1, help='database table name')
parser.add_argument('constraints', nargs=1,
help='constraints file to verify against')
parser.add_argument('detect_outpath', nargs=1,
help='file to write detection results to')
params = {}
flags = database_arg_flags(verify_flags, parser, args, params)
params['table'] = flags.table[0] if flags.table else None
params['constraints_path'] = (flags.constraints[0] if flags.constraints
else None)
params['detect_outpath'] = (flags.detect_outpath[0] if flags.detect_outpath
else None)
return params


class DatabaseDetector:
def __init__(self, argv, verbose=False):
self.argv = argv
self.verbose = verbose

def detect(self):
params = get_detect_params(self.argv[1:])
detect_database_table_from_file(**params)


def main(argv):
if len(argv) > 1 and argv[1] in ('-v', '--version'):
print(__version__)
sys.exit(0)
v = DatabaseDetector(argv)
v.detect()


if __name__ == '__main__':
main(sys.argv)

4 changes: 4 additions & 0 deletions tdda/constraints/db/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from tdda.constraints.db.drivers import applicable
from tdda.constraints.db.discover import DatabaseDiscoverer
from tdda.constraints.db.verify import DatabaseVerifier
from tdda.constraints.db.detect import DatabaseDetector


class TDDADatabaseExtension(ExtensionBase):
Expand Down Expand Up @@ -41,3 +42,6 @@ def discover(self):
def verify(self):
return DatabaseVerifier(self.argv, verbose=self.verbose).verify()

def detect(self):
return DatabaseDetector(self.argv, verbose=self.verbose).detect()

17 changes: 4 additions & 13 deletions tdda/constraints/db/testdbconstraints.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# -*- coding: utf-8 -*-

"""
Test Suite
Test Suite for database constraints.
The tests only use SqlLite, since that can be used in a contained way, without
depending on having set up any database server.
"""

from __future__ import division
Expand Down Expand Up @@ -146,18 +149,6 @@ def test_sqlite_discover_elements_rex(self):
TestSQLiteDatabaseConstraintDiscoverers.set_default_data_location(TESTDATA_DIR)


@unittest.skip('test environment not set up for PostgreSQL yet')
class TestPostgreSQLDatabaseConstraintVerifiers(unittest.TestCase):
def test_postgresql(self):
pass


@unittest.skip('test environment not set up for MySQL yet')
class TestMySQLDatabaseConstraintVerifiers(unittest.TestCase):
def test_mysql(self):
pass


if __name__ == '__main__':
ReferenceTestCase.main()

8 changes: 2 additions & 6 deletions tdda/constraints/db/verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@

"""
Support for database constraint verification from the command-line tool
Verify CSV files, or Pandas or R DataFrames saved as feather files,
against a constraints from .tdda JSON file.
constraints file.
"""

from __future__ import division
Expand Down Expand Up @@ -55,7 +51,7 @@ def verify_database_table_from_file(table, constraints_path,
print(verify_db_table(dbtype, db, table, constraints_path, **kwargs))


def get_params(args):
def get_verify_params(args):
parser = database_arg_parser(verify_parser, USAGE)
parser.add_argument('table', nargs=1, help='database table name')
parser.add_argument('constraints', nargs=1,
Expand All @@ -74,7 +70,7 @@ def __init__(self, argv, verbose=False):
self.verbose = verbose

def verify(self):
params = get_params(self.argv[1:])
params = get_verify_params(self.argv[1:])
verify_database_table_from_file(**params)


Expand Down
15 changes: 14 additions & 1 deletion tdda/constraints/examples/files_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

from tdda.constraints.flags import discover_parser, discover_flags
from tdda.constraints.flags import verify_parser, verify_flags
from tdda.constraints.flags import detect_parser, detect_flags
from tdda.constraints.extension import ExtensionBase

from tdda.constraints.base import (
Expand Down Expand Up @@ -79,19 +80,31 @@ def discover(self):
f.write(results)
else:
print(results)
return results

def verify(self):
parser = verify_parser()
parser.add_argument('directory', nargs=1, help='directory path')
parser.add_argument('constraints', nargs=1,
help='constraints file to verify against')
return self.verify_or_detect(parser)

def detect(self):
parser = detect_parser()
parser.add_argument('directory', nargs=1, help='directory path')
parser.add_argument('constraints', nargs=1,
help='constraints file to verify against')
return self.verify_or_detect(parser)

def verify_or_detect(self, parser):
params = {}
flags = verify_flags(parser, self.argv[1:], params)
params['path'] = flags.directory[0] if flags.directory else None
params['constraints_path'] = (flags.constraints[0] if flags.constraints
else None)
params['type_checking'] = 'strict'
print(verify_directory_from_file(**params))
results = verify_directory_from_file(**params)
return results


def discover_directory(path, constraints_path=None, **kwargs):
Expand Down
110 changes: 102 additions & 8 deletions tdda/constraints/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,78 @@
An example of a simple extension is included with the set of standard
examples. See :ref:`examples`.
Extension Overview
------------------
An extension should provide:
- an implementation (subclass) of :py:class:`ExtensionBase`, to
provide a command-line interface, extending the ``tdda`` command
to support a particular type of input data.
- an implementation (subclass) of :py:class:`BaseConstraintCalculator`,
to provide methods for computing individual constraint results.
- an implementation (subclass) of :py:class:`BaseConstraintDetector`,
to provide methods for generating detection results.
A typical implementation looks like::
from tdda.constraints.flags import discover_parser, discover_flags
from tdda.constraints.flags import verify_parser, verify_flags
from tdda.constraints.flags import detect_parser, detect_flags
from tdda.constraints.extension import ExtensionBase
from tdda.constraints.base import DatasetConstraints
from tdda.constraints.baseconstraints import (BaseConstraintCalculator,
BaseConstraintVerifier,
BaseConstraintDiscoverer)
from tdda.rexpy import rexpy
class MyExtension(ExtensionBase):
def applicable(self):
...
def help(self, stream=sys.stdout):
print('...', file=stream)
def spec(self):
return '...'
def discover(self):
parser = discover_parser()
parser.add_argument(...)
params = {}
flags = discover_flags(parser, self.argv[1:], params)
data = ... get data source from flags ...
discoverer = MyConstraintDiscoverer(data, **params)
constraints = discoverer.discover()
results = constraints.to_json()
... write constraints JSON to output file
return results
def verify(self):
parser = verify_parser()
parser.add_argument(...)
params = {}
flags = verify_flags(parser, self.argv[1:], params)
data = ... get data source from flags ...
verifier = MyConstraintVerifier(data, **params)
constraints = DatasetConstraints(loadpath=...)
results = verifier.verify(constraints)
return results
def detect(self):
parser = detect_parser()
parser.add_argument(...)
params = {}
flags = detect_flags(parser, self.argv[1:], params)
data = ... get data source from flags ...
detector = MyConstraintDetector(data, **params)
constraints = DatasetConstraints(loadpath=...)
results = detector.detect(constraints)
return results
Extension API
-------------
"""
Expand All @@ -47,6 +119,11 @@ class ExtensionBase:
:py:meth:`verify` methods.
"""
def __init__(self, argv, verbose=False):
"""
A subclass of :py:class:`ExtensionBase` should call its superclass
:py:meth:`__init__` initialisation method with a list of argument
strings (such as ``sys.path``).
"""
self.argv = argv
self.verbose = verbose

Expand Down Expand Up @@ -85,10 +162,10 @@ def discover(self):
The :py:meth:`discover` method should implement constraint
discovery.
It should allow whatever other optional or mandatory flags or
parameters are required to specify the data from which constraints
are to be discovered, and the name of the file to which the
constraints are to be written.
It should use the ``self.argv`` variable to get whatever other
optional or mandatory flags or parameters are required to specify
the data from which constraints are to be discovered, and the name
of the file to which the constraints are to be written.
"""
pass

Expand All @@ -101,9 +178,26 @@ def verify(self):
the command line, and verify these constraints on the data
specified.
It should allow whatever other optional or mandatory flags or
parameters are required to specify the data on which the constraints
are to be verified.
It should use the ``self.argv`` variable to get whatever other
optional or mandatory flags or parameters are required to specify
the data on which the constraints are to be verified.
"""
pass

def detect(self):
"""
The :py:meth:`detect` method should implement constraint
detection.
It should read constraints from a ``.tdda`` file specified on
the command line, and verify these constraints on the data
specified, and produce detection output.
It should use the ``self.argv`` variable to get whatever other
optional or mandatory flags or parameters are required to specify
the data on which the constraints are to be verified, where the
output detection data should be written, and detection-specific
flags.
"""
pass

Expand Down Expand Up @@ -257,7 +351,7 @@ def calc_rex_constraint(self, colname, constraint, detect=False):

class BaseConstraintDetector:
"""
The :py:mod:`BaseConstraintCalculator` class defines a default or dummy
The :py:mod:`BaseConstraintDetector` class defines a default or dummy
implementation of all of the methods that are required in order
to implement constraint detection via the a subclass of the base
:py:mod:`BaseConstraintVerifier` class.
Expand Down
Loading

0 comments on commit 8cd396d

Please sign in to comment.