Skip to content

Commit

Permalink
Merge pull request #43 from sdss/registry
Browse files Browse the repository at this point in the history
adds a database registry
  • Loading branch information
havok2063 committed Oct 28, 2020
2 parents 828ebc1 + 487e44f commit b5c42b1
Show file tree
Hide file tree
Showing 4 changed files with 265 additions and 0 deletions.
4 changes: 4 additions & 0 deletions docs/sphinx/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,8 @@ Utils
:members:
:show-inheritance:

.. automodule:: sdssdb.utils.registry
:members:
:show-inheritance:

.. autofunction:: sdssdb.utils.schemadisplay.create_schema_graph
197 changes: 197 additions & 0 deletions python/sdssdb/utils/registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
# !/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Filename: registry.py
# Project: utils
# Author: Brian Cherinka
# Created: Tuesday, 20th October 2020 1:44:45 pm
# License: BSD 3-clause "New" or "Revised" License
# Copyright (c) 2020 Brian Cherinka
# Last Modified: Tuesday, 20th October 2020 1:44:45 pm
# Modified By: Brian Cherinka


from __future__ import print_function, division, absolute_import
import pathlib
from typing import Union, Type
try:
from astropy.table import Table, Column
except ImportError:
Table = None
Column = None

__all__ = ['list_databases', 'display_table']

db_registry = None


def update_db_registry() -> dict:
""" Updates the global database registry
Uses pathlib to traverse the sdssdb directory structure and
parses content to identify databases and relevant schema for each
database. Assumes a given structure of "orm/database/schema.py"
Returns
-------
dict
A dictionary of all databases and schema organized by ORM
"""
# if global dict already populated, return the cache
if db_registry is not None:
return db_registry

sdssdb_path = pathlib.Path(__file__).parent.parent
registry = {'peewee': {}, 'sqlalchemy': {}}
for i in sdssdb_path.rglob('./'):
# reject if not a directory, paths ending in '_' and if the parent
# directory is not peewee or sqlalchemy
if not i.is_dir() or i.as_posix().endswith('_') \
or i.parent.stem not in ['peewee', 'sqlalchemy']:
continue

# convert to string
path = i.as_posix()

# look for schema.py files
schema = i.glob('[a-z]*.py')
if 'peewee' in path:
registry['peewee'][i.stem] = {'schema': [s.stem for s in schema]}
elif 'sqlalchemy' in path:
registry['sqlalchemy'][i.stem] = {'schema': [s.stem for s in schema]}
return registry


db_registry = update_db_registry()


def list_databases(orm: str = None, with_schema: bool = False) -> Union[dict, list]:
""" Return a list of sdssdb databases
Returns a list of available databases in sdssdb. When no orm is specified,
returns a dict of orm:database key:values. If with_schema is specified, also
returns a list of schema for each database.
Parameters
----------
orm : str, optional
The type of ORM to select on, by default None
with_schema : bool, optional
If True, also includes the schemas for each database, by default False
Returns
-------
Union[dict, list]
A list of databases for a given ORM or a dict of database:schema values
or a dict of orm:database values
Raises
------
TypeError
when input orm is not a string
ValueError
when input orm is not either peewee or sqlalchemy
"""
if orm and type(orm) != str:
raise TypeError(f'Input {orm} must be a string.')

if orm and orm not in ['peewee', 'pw', 'sqla', 'sqlalchemy']:
raise ValueError(f"ORM {orm} can only be 'peewee', 'pw', 'sqla', or 'sqlalchemy'")

if with_schema:
if not orm:
return db_registry
else:
orm = 'peewee' if orm == 'pw' else 'sqlalchemy' if orm == 'sqla' else orm
return db_registry[orm]

if not orm:
return {'peewee': list(db_registry['peewee'].keys()),
'sqlalchemy': list(db_registry['sqlalchemy'].keys())}
else:
orm = 'peewee' if orm == 'pw' else 'sqlalchemy' if orm == 'sqla' else orm
return list(db_registry.get(orm, None).keys())


def _mask_column(column: Type[Column], idx: list, fill: str = '') -> None:
""" Mask out duplicate elements in a given Astropy table.Column
Masks out elements in an table column. Given an array of indices of
unique elements, masks out the inverse with the specified fill value.
Parameters
----------
column : `~astropy.table.Column`
an Astropy table Column to mask
idx : numpy array
The array indices of unique column elements
fill : str, optional
The column mask fill value, by default ''
"""
column.mask[idx] = True
column.mask = ~column.mask
column.fill_value = fill


def display_table(pprint: bool = None, mask_dups: bool = False,
fill: str = '', **kwargs) -> Type[Table]:
""" Display sdssdb databases and schema as an Astropy Table
Displays the list of available sdssdb databases organized by ORM
and includes the schema for each database. Produces a table with columns
"orm", "db", and "schema".
Parameters
----------
pprint : bool, optional
Pretty print the Astropy Table, by default None
mask_dups : bool, optional
If True, masks duplicate orm and db entries, by default False
fill : str, optional
The column mask fill value, by default ''
kwargs :
extra kwargs passed to Table.pprint
Returns
-------
`~astropy.table.Table`
an Astropy Table of sdssdb databases
Raises
------
ImportError
when astropy is not installed
"""
if not Table:
raise ImportError('No Table found. Astropy is not installed.')

tt = []
for k, v in db_registry.items():
for i, j in v.items():
for r in j['schema']:
tt.append({'orm': k, 'db': i, 'schema': r})

# create a masked table
t = Table(tt, names=['orm', 'db', 'schema'], masked=True)
# group and sort the table
t = t.group_by(['orm', 'db'])
t.sort(['orm', 'db', 'schema'])

# mask out duplicate rows for columns orm and db
if mask_dups:
og = t.group_by('orm')
dg = t.group_by(['orm', 'db'])

_mask_column(og['orm'], og.groups.indices[:-1], fill=fill)
_mask_column(og['db'], dg.groups.indices[:-1], fill=fill)

t = og.filled()

# pretty print the table
if pprint:
t.pprint(**kwargs)
return

return t


1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ dev =
pytest-postgresql>=2.2.1
factory_boy>=2.12.0
pytest-factoryboy>=2.0.3
astropy>=4.0.0
docs =
Sphinx>=1.8.0
sphinx_bootstrap_theme>=0.4.12
Expand Down
63 changes: 63 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# !/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Filename: test_utils.py
# Project: tests
# Author: Brian Cherinka
# Created: Wednesday, 21st October 2020 1:51:18 pm
# License: BSD 3-clause "New" or "Revised" License
# Copyright (c) 2020 Brian Cherinka
# Last Modified: Wednesday, 21st October 2020 1:51:18 pm
# Modified By: Brian Cherinka


from __future__ import print_function, division, absolute_import

import pytest
from sdssdb.utils.registry import list_databases, display_table


class TestListDbs(object):

def test_databases_noorm(self):
data = list_databases()
assert type(data) is dict
assert 'peewee' in data
assert 'sqlalchemy' in data

@pytest.mark.parametrize('orm', [('peewee'), ('sqlalchemy')])
def test_databases_orm(self, orm):
data = list_databases(orm)
assert type(data) is list
assert 'operationsdb' in data
if orm == 'peewee':
assert 'sdss5db' in data
else:
assert 'mangadb' in data
assert 'archive' in data

def test_database_schema(self):
data = list_databases('peewee', with_schema=True)
assert type(data) is dict
assert 'schema' in data['sdss5db']
assert 'targetdb' in data['sdss5db']['schema']
assert 'mangadb' in data['operationsdb']['schema']

def test_nosubdirs(self):
data = list_databases('sqla', with_schema=True)
assert 'operationsdb' in data
assert 'operationsdb.tools' not in data
assert 'tools' not in data
assert 'tools' not in data['operationsdb']['schema']


class TestDisplay(object):

@pytest.mark.parametrize('mask', [(True), (False)], ids=['mask', 'nomask'])
def test_table(self, mask):
t = display_table(mask_dups=mask)
assert t.colnames == ['orm', 'db', 'schema']

assert set(t['orm']) == {'peewee', 'sqlalchemy'} \
or set(t['orm']) == {'', 'peewee', 'sqlalchemy'}
assert all(t['orm']) is not mask

0 comments on commit b5c42b1

Please sign in to comment.