Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-43097: Replication-related improvements #48

Merged
merged 8 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/lsst.dax.apdb/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ Python API reference
:no-main-docstr:
:no-inheritance-diagram:

.. automodapi:: lsst.dax.apdb.schema_model
:no-main-docstr:
:no-inheritance-diagram:


Command line tools
==================
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ classifiers = [
keywords = ["lsst"]
dependencies = [
"astropy",
"felis",
"lsst-felis",
"lsst-pex-config",
"lsst-sphgeom",
"lsst-utils",
Expand Down
6 changes: 4 additions & 2 deletions python/lsst/dax/apdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from . import schema_model
from .apdb import *
from .apdbCassandra import *
from .apdbMetadata import *
from .apdbReplica import *
from .apdbSchema import ApdbTables
from .apdbSql import *
from .cassandra import ApdbCassandra, ApdbCassandraConfig
from .sql import ApdbSql, ApdbSqlConfig
from .version import *
from .versionTuple import *
161 changes: 8 additions & 153 deletions python/lsst/dax/apdb/apdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,23 @@

from __future__ import annotations

__all__ = ["ApdbConfig", "Apdb", "ApdbInsertId", "ApdbTableData"]
__all__ = ["ApdbConfig", "Apdb"]

import os
from abc import ABC, abstractmethod
from collections.abc import Iterable, Mapping
from dataclasses import dataclass
from typing import TYPE_CHECKING, cast
from uuid import UUID, uuid4

import astropy.time
import pandas
from felis.simple import Table
from lsst.pex.config import Config, ConfigurableField, Field
from lsst.resources import ResourcePath, ResourcePathExpression
from lsst.sphgeom import Region

from .apdbIndex import ApdbIndex
from .apdbSchema import ApdbTables
from .factory import make_apdb
from .schema_model import Table

if TYPE_CHECKING:
from .apdbMetadata import ApdbMetadata
Expand Down Expand Up @@ -72,59 +70,16 @@ class ApdbConfig(Config):
)
use_insert_id = Field[bool](
doc=(
"If True, make and fill additional tables used for getHistory methods. "
"If True, make and fill additional tables used for replication. "
"Databases created with earlier versions of APDB may not have these tables, "
"and corresponding methods will not work for them."
),
default=False,
)


class ApdbTableData(ABC):
"""Abstract class for representing table data."""

@abstractmethod
def column_names(self) -> list[str]:
"""Return ordered sequence of column names in the table.

Returns
-------
names : `list` [`str`]
Column names.
"""
raise NotImplementedError()

@abstractmethod
def rows(self) -> Iterable[tuple]:
"""Return table rows, each row is a tuple of values.

Returns
-------
rows : `iterable` [`tuple`]
Iterable of tuples.
"""
raise NotImplementedError()


@dataclass(frozen=True)
class ApdbInsertId:
"""Class used to identify single insert operation.

Instances of this class are used to identify the units of transfer from
APDB to PPDB. Usually single `ApdbInsertId` corresponds to a single call to
`store` method.
"""

id: UUID
insert_time: astropy.time.Time
"""Time of this insert, usually corresponds to visit time
(`astropy.time.Time`).
"""

@classmethod
def new_insert_id(cls, insert_time: astropy.time.Time) -> ApdbInsertId:
"""Generate new unique insert identifier."""
return ApdbInsertId(id=uuid4(), insert_time=insert_time)
replica_chunk_seconds = Field[int](
default=600,
doc="Time extent for replica chunks, new chunks are created every specified number of seconds.",
)


class Apdb(ABC):
Expand Down Expand Up @@ -215,7 +170,7 @@ def tableDef(self, table: ApdbTables) -> Table | None:

Returns
-------
tableSchema : `felis.simple.Table` or `None`
tableSchema : `.schema_model.Table` or `None`
Table schema description, `None` is returned if table is not
defined by this implementation.
"""
Expand Down Expand Up @@ -338,106 +293,6 @@ def containsVisitDetector(self, visit: int, detector: int) -> bool:
"""
raise NotImplementedError()

@abstractmethod
def getInsertIds(self) -> list[ApdbInsertId] | None:
"""Return collection of insert identifiers known to the database.

Returns
-------
ids : `list` [`ApdbInsertId`] or `None`
List of identifiers, they may be time-ordered if database supports
ordering. `None` is returned if database is not configured to store
insert identifiers.
"""
raise NotImplementedError()

@abstractmethod
def deleteInsertIds(self, ids: Iterable[ApdbInsertId]) -> None:
"""Remove insert identifiers from the database.

Parameters
----------
ids : `iterable` [`ApdbInsertId`]
Insert identifiers, can include items returned from `getInsertIds`.

Notes
-----
This method causes Apdb to forget about specified identifiers. If there
are any auxiliary data associated with the identifiers, it is also
removed from database (but data in regular tables is not removed).
This method should be called after successful transfer of data from
APDB to PPDB to free space used by history.
"""
raise NotImplementedError()

@abstractmethod
def getDiaObjectsHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
"""Return catalog of DiaObject instances from a given time period
including the history of each DiaObject.

Parameters
----------
ids : `iterable` [`ApdbInsertId`]
Insert identifiers, can include items returned from `getInsertIds`.

Returns
-------
data : `ApdbTableData`
Catalog containing DiaObject records. In addition to all regular
columns it will contain ``insert_id`` column.

Notes
-----
This part of API may not be very stable and can change before the
implementation finalizes.
"""
raise NotImplementedError()

@abstractmethod
def getDiaSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
"""Return catalog of DiaSource instances from a given time period.

Parameters
----------
ids : `iterable` [`ApdbInsertId`]
Insert identifiers, can include items returned from `getInsertIds`.

Returns
-------
data : `ApdbTableData`
Catalog containing DiaSource records. In addition to all regular
columns it will contain ``insert_id`` column.

Notes
-----
This part of API may not be very stable and can change before the
implementation finalizes.
"""
raise NotImplementedError()

@abstractmethod
def getDiaForcedSourcesHistory(self, ids: Iterable[ApdbInsertId]) -> ApdbTableData:
"""Return catalog of DiaForcedSource instances from a given time
period.

Parameters
----------
ids : `iterable` [`ApdbInsertId`]
Insert identifiers, can include items returned from `getInsertIds`.

Returns
-------
data : `ApdbTableData`
Catalog containing DiaForcedSource records. In addition to all
regular columns it will contain ``insert_id`` column.

Notes
-----
This part of API may not be very stable and can change before the
implementation finalizes.
"""
raise NotImplementedError()

@abstractmethod
def getSSObjects(self) -> pandas.DataFrame:
"""Return catalog of SSObject instances.
Expand Down