Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-36216: Add simple schema description classes #17

Merged
merged 6 commits into from
Oct 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:

# We have two cores so we can speed up the testing with xdist
- name: Install xdist, openfiles and flake8 for pytest
run: pip install pytest-xdist pytest-openfiles pytest-flake8 pytest-cov
run: pip install "flake8<5" pytest-xdist pytest-openfiles pytest-flake8 pytest-cov

- name: Build and install
run: pip install -v .
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ describing the fundamentally relational objects that make up a database.

Felis is influenced by work on CSVW, which uses JSON-LD to describe CSV
files. CSVW is oriented a bit more towards publishing data to the web,
and that doesn't quite capture the use case of desribing tables,
and that doesn't quite capture the use case of describing tables,
especially those which haven't been created yet. Still, for services
which may return CSV files, a translation to CSVW will be
straightforward.
Expand Down Expand Up @@ -626,7 +626,7 @@ references.
## DBMS Extensions

DBMS Extension Annotations may be used to override defaults or provide a
way to describe non-standard paramters for creating objects in a
way to describe non-standard parameters for creating objects in a
database or file.

[The SQLAlchemy documentation on
Expand All @@ -639,7 +639,7 @@ user (Oracle), or file (SQLite) has already been created. Tools SHOULD
take into account the name of the schema defined in a felis description,
but parameters for creating the schema object are beyond the scope of a
felis description, because those parameters will likely be
instance-dependent and may contaian secrets, as in the case of Oracle.
instance-dependent and may contain secrets, as in the case of Oracle.

### MySQL

Expand Down Expand Up @@ -723,7 +723,7 @@ this can probably be automated with a proper vocabulary for Felis.

#### afw.table

A few of the metadata values for tables and columns are storeable on in
A few of the metadata values for tables and columns are storable on in
the properties of a schema (table) or field.

#### YAML/JSON
Expand Down
3 changes: 3 additions & 0 deletions python/felis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from . import types
from .check import *
from .version import *
from .visitor import *

DEFAULT_CONTEXT = {
"@vocab": "http://lsst.org/felis/",
Expand Down
295 changes: 295 additions & 0 deletions python/felis/check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,295 @@
# This file is part of felis.
#
# Developed for the LSST Data Management System.
# This product includes software developed by the LSST Project
# (https://www.lsst.org).
# See the COPYRIGHT file at the top-level directory of this distribution
# for details of code ownership.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

from __future__ import annotations

__all__ = ["CheckingVisitor", "FelisValidator"]

import logging
from collections.abc import Iterable, Mapping, MutableSet
from typing import Any

from .types import FelisType
from .visitor import Visitor

_Mapping = Mapping[str, Any]

logger = logging.getLogger("felis")


class FelisValidator:
"""Class defining methods for validating individual objects in a felis
structure.

The class implements all reasonable consistency checks for types of
objects (mappings) that can appear in the Felis structure. It also
verifies that object ID (``@id`` field) is unique, hence all check methods
can only be called once for a given object.
"""

def __init__(self) -> None:
self._ids: MutableSet[str] = set()

def check_schema(self, schema_obj: _Mapping) -> None:
"""Validate contents of Felis schema object.

Parameters
----------
schema_obj : `Mapping` [ `str`, `Any` ]
Felis object (mapping) representing a schema.

Raises
------
ValueError
Raised if validation fails.
"""
_id = self._assert_id(schema_obj)
self._check_visited(_id)

def check_table(self, table_obj: _Mapping, schema_obj: _Mapping) -> None:
"""Validate contents of Felis table object.

Parameters
----------
table_obj : `Mapping` [ `str`, `Any` ]
Felis object (mapping) representing a table.
schema_obj : `Mapping` [ `str`, `Any` ]
Felis object (mapping) representing parent schema.

Raises
------
ValueError
Raised if validation fails.
"""
_id = self._assert_id(table_obj)
self._assert_name(table_obj)
self._check_visited(_id)

def check_column(self, column_obj: _Mapping, table_obj: _Mapping) -> None:
"""Validate contents of Felis column object.

Parameters
----------
column_obj : `Mapping` [ `str`, `Any` ]
Felis object (mapping) representing a column.
table_obj : `Mapping` [ `str`, `Any` ]
Felis object (mapping) representing parent table.

Raises
------
ValueError
Raised if validation fails.
"""
_id = self._assert_id(column_obj)
self._assert_name(column_obj)
datatype_name = self._assert_datatype(column_obj)
length = column_obj.get("length")
felis_type = FelisType.felis_type(datatype_name)
if not length and (felis_type.is_sized or felis_type.is_timestamp):
# This is not a warning, because it's usually fine
logger.info(f"No length defined for {_id} for type {datatype_name}")
self._check_visited(_id)

def check_primary_key(self, primary_key_obj: str | Iterable[str], table: _Mapping) -> None:
"""Validate contents of Felis primary key object.

Parameters
----------
primary_key_obj : `str` or `Mapping` [ `str`, `Any` ]
Felis object (mapping) representing a primary key.
table_obj : `Mapping` [ `str`, `Any` ]
Felis object (mapping) representing parent table.

Raises
------
ValueError
Raised if validation fails.
"""
pass

def check_constraint(self, constraint_obj: _Mapping, table_obj: _Mapping) -> None:
"""Validate contents of Felis constraint object.

Parameters
----------
constraint_obj : `Mapping` [ `str`, `Any` ]
Felis object (mapping) representing a constraint.
table_obj : `Mapping` [ `str`, `Any` ]
Felis object (mapping) representing parent table.

Raises
------
ValueError
Raised if validation fails.
"""
_id = self._assert_id(constraint_obj)
constraint_type = constraint_obj.get("@type")
if not constraint_type:
raise ValueError(f"Constraint has no @type: {_id}")
if constraint_type not in ["ForeignKey", "Check", "Unique"]:
raise ValueError(f"Not a valid constraint type: {constraint_type}")
self._check_visited(_id)

def check_index(self, index_obj: _Mapping, table_obj: _Mapping) -> None:
"""Validate contents of Felis constraint object.

Parameters
----------
index_obj : `Mapping` [ `str`, `Any` ]
Felis object (mapping) representing an index.
table_obj : `Mapping` [ `str`, `Any` ]
Felis object (mapping) representing parent table.

Raises
------
ValueError
Raised if validation fails.
"""
_id = self._assert_id(index_obj)
self._assert_name(index_obj)
if "columns" in index_obj and "expressions" in index_obj:
raise ValueError(f"Defining columns and expressions is not valid for index {_id}")
self._check_visited(_id)

def _assert_id(self, obj: _Mapping) -> str:
"""Verify that an object has a non-empty ``@id`` field.

Parameters
----------
obj : `Mapping` [ `str`, `Any` ]
Felis object.

Raises
------
ValueError
Raised if ``@id`` field is missing or empty.

Returns
-------
id : `str`
The value of ``@id`` field.
"""
_id: str = obj.get("@id", "")
if not _id:
name = obj.get("name", "")
maybe_string = f"(check object with name: {name})" if name else ""
raise ValueError(f"No @id defined for object {maybe_string}")
return _id

def _assert_name(self, obj: _Mapping) -> None:
"""Verify that an object has a ``name`` field.

Parameters
----------
obj : `Mapping` [ `str`, `Any` ]
Felis object.

Raises
------
ValueError
Raised if ``name`` field is missing.
"""
if "name" not in obj:
_id = obj.get("@id")
raise ValueError(f"No name for table object {_id}")

def _assert_datatype(self, obj: _Mapping) -> str:
"""Verify that an object has a valid ``datatype`` field.

Parameters
----------
obj : `Mapping` [ `str`, `Any` ]
Felis object.

Raises
------
ValueError
Raised if ``datatype`` field is missing or invalid.

Returns
-------
datatype : `str`
The value of ``datatype`` field.
"""
datatype_name: str = obj.get("datatype", "")
_id = obj["@id"]
if not datatype_name:
raise ValueError(f"No datatype defined for id {_id}")
try:
FelisType.felis_type(datatype_name)
except TypeError:
raise ValueError(f"Incorrect Type Name for id {_id}: {datatype_name}") from None
return datatype_name

def _check_visited(self, _id: str) -> None:
"""Check that given ID has not been visited, generates a warning
otherwise.

Parameters
_id : `str`
Felis object ID.
"""
if _id in self._ids:
logger.warning(f"Duplication of @id {_id}")
self._ids.add(_id)


class CheckingVisitor(Visitor[None, None, None, None, None, None]):
"""Visitor implementation which validates felis structures and raises
exceptions for errors.
"""

def __init__(self) -> None:
super().__init__()
self.checker = FelisValidator()

def visit_schema(self, schema_obj: _Mapping) -> None:
# Docstring is inherited.
self.checker.check_schema(schema_obj)
for table_obj in schema_obj["tables"]:
self.visit_table(table_obj, schema_obj)

def visit_table(self, table_obj: _Mapping, schema_obj: _Mapping) -> None:
# Docstring is inherited.
self.checker.check_table(table_obj, schema_obj)
for column_obj in table_obj["columns"]:
self.visit_column(column_obj, table_obj)
self.visit_primary_key(table_obj.get("primaryKey", []), table_obj)
for constraint_obj in table_obj.get("constraints", []):
self.visit_constraint(constraint_obj, table_obj)
for index_obj in table_obj.get("indexes", []):
self.visit_index(index_obj, table_obj)

def visit_column(self, column_obj: _Mapping, table_obj: _Mapping) -> None:
# Docstring is inherited.
self.checker.check_column(column_obj, table_obj)

def visit_primary_key(self, primary_key_obj: str | Iterable[str], table_obj: _Mapping) -> None:
# Docstring is inherited.
self.checker.check_primary_key(primary_key_obj, table_obj)

def visit_constraint(self, constraint_obj: _Mapping, table_obj: _Mapping) -> None:
# Docstring is inherited.
self.checker.check_constraint(constraint_obj, table_obj)

def visit_index(self, index_obj: _Mapping, table_obj: _Mapping) -> None:
# Docstring is inherited.
self.checker.check_index(index_obj, table_obj)
7 changes: 4 additions & 3 deletions python/felis/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
from sqlalchemy import create_engine

from . import DEFAULT_CONTEXT, DEFAULT_FRAME, __version__
from .model import Visitor, VisitorBase
from .check import CheckingVisitor
from .sql import SQLVisitor
from .tap import Tap11Base, TapLoadingVisitor, init_tables
from .utils import ReorderingVisitor

Expand All @@ -55,7 +56,7 @@ def create_all(engine_url: str, schema_name: str, dry_run: bool, file: io.TextIO
"""Create schema objects from the Felis FILE."""

schema_obj = yaml.load(file, Loader=yaml.SafeLoader)
visitor = Visitor(schema_name=schema_name)
visitor = SQLVisitor(schema_name=schema_name)
schema = visitor.visit_schema(schema_obj)

metadata = schema.metadata
Expand Down Expand Up @@ -220,7 +221,7 @@ def basic_check(file: io.TextIOBase) -> None:
schema_obj["@type"] = "felis:Schema"
# Force Context and Schema Type
schema_obj["@context"] = DEFAULT_CONTEXT
check_visitor = VisitorBase()
check_visitor = CheckingVisitor()
check_visitor.visit_schema(schema_obj)


Expand Down