From be476bd6ce855dc0e1f58c6421496335a1eb7c57 Mon Sep 17 00:00:00 2001 From: Leo Singer Date: Sun, 21 Aug 2022 20:44:11 -0400 Subject: [PATCH 1/9] Add benchmark of integrated probability Add benchmark of integrated probability within the union of a set of fields. --- .../tests/benchmarks/test_benchmarks.py | 157 ++++++++++-------- 1 file changed, 88 insertions(+), 69 deletions(-) diff --git a/healpix_alchemy/tests/benchmarks/test_benchmarks.py b/healpix_alchemy/tests/benchmarks/test_benchmarks.py index 142f0ef..03a8e42 100644 --- a/healpix_alchemy/tests/benchmarks/test_benchmarks.py +++ b/healpix_alchemy/tests/benchmarks/test_benchmarks.py @@ -27,81 +27,100 @@ def _func(query, expected): return _func -def test_union_area(bench_and_check, random_fields): - """Find the area of the union of N fields.""" - # Assemble query - subquery = sa.select( +# def test_union_area(bench_and_check, random_fields): +# """Find the area of the union of N fields.""" +# # Assemble query +# subquery = sa.select( +# func.union(FieldTile.hpx).label('hpx') +# ).subquery() +# query = sa.select( +# sa.func.sum(subquery.columns.hpx.area) +# ) + +# # Expected result +# union = reduce(lambda a, b: a.union(b), random_fields) +# result = union.sky_fraction * 4 * np.pi +# expected = ((result,),) + +# # Run benchmark, check result +# bench_and_check(query, expected) + + +# def test_crossmatch_galaxies_and_fields(bench_and_check, +# random_fields, random_galaxies): +# """Cross match N galaxies with M fields.""" +# # Assemble query +# count = sa.func.count(Galaxy.id) +# query = sa.select( +# count +# ).filter( +# FieldTile.hpx.contains(Galaxy.hpx) +# ).group_by( +# FieldTile.id +# ).order_by( +# count.desc() +# ).limit( +# 5 +# ) + +# # Expected result +# points = random_galaxies +# fields = random_fields +# result = np.sum( +# [moc.contains(points.ra, points.dec) for moc in fields], +# axis=1) +# expected = np.flipud(np.sort(result))[:5].reshape(-1, 1) + +# # Run benchmark, check result +# bench_and_check(query, expected) + + +# def test_fields_in_90pct_credible_region(bench, random_fields, random_sky_map): +# """Find which of N fields overlap the 90% credible region.""" +# # Assemble query +# cum_prob = sa.func.sum( +# SkymapTile.probdensity * SkymapTile.hpx.area +# ).over( +# order_by=SkymapTile.probdensity.desc() +# ).label( +# 'cum_prob' +# ) +# subquery1 = sa.select( +# SkymapTile.probdensity, +# cum_prob +# ).filter( +# SkymapTile.id == 1 +# ).subquery() +# min_probdensity = sa.select( +# sa.func.min(subquery1.columns.probdensity) +# ).filter( +# subquery1.columns.cum_prob <= 0.9 +# ).scalar_subquery() +# query = sa.select( +# sa.func.count(FieldTile.id.distinct()) +# ).filter( +# SkymapTile.hpx.overlaps(FieldTile.hpx), +# SkymapTile.probdensity >= min_probdensity +# ) + +# # Run benchmark +# bench(query) + + +def test_integrated_probability(bench, random_fields, random_sky_map): + """Find integrated probability within union of fields.""" + union = sa.select( func.union(FieldTile.hpx).label('hpx') ).subquery() - query = sa.select( - sa.func.sum(subquery.columns.hpx.area) - ) - - # Expected result - union = reduce(lambda a, b: a.union(b), random_fields) - result = union.sky_fraction * 4 * np.pi - expected = ((result,),) - # Run benchmark, check result - bench_and_check(query, expected) - - -def test_crossmatch_galaxies_and_fields(bench_and_check, - random_fields, random_galaxies): - """Cross match N galaxies with M fields.""" - # Assemble query - count = sa.func.count(Galaxy.id) - query = sa.select( - count - ).filter( - FieldTile.hpx.contains(Galaxy.hpx) - ).group_by( - FieldTile.id - ).order_by( - count.desc() - ).limit( - 5 - ) + area = (union.columns.hpx * SkymapTile.hpx).area + prob = sa.func.sum(SkymapTile.probdensity * area) - # Expected result - points = random_galaxies - fields = random_fields - result = np.sum( - [moc.contains(points.ra, points.dec) for moc in fields], - axis=1) - expected = np.flipud(np.sort(result))[:5].reshape(-1, 1) - - # Run benchmark, check result - bench_and_check(query, expected) - - -def test_fields_in_90pct_credible_region(bench, random_fields, random_sky_map): - """Find which of N fields overlap the 90% credible region.""" - # Assemble query - cum_prob = sa.func.sum( - SkymapTile.probdensity * SkymapTile.hpx.area - ).over( - order_by=SkymapTile.probdensity.desc() - ).label( - 'cum_prob' - ) - subquery1 = sa.select( - SkymapTile.probdensity, - cum_prob - ).filter( - SkymapTile.id == 1 - ).subquery() - min_probdensity = sa.select( - sa.func.min(subquery1.columns.probdensity) - ).filter( - subquery1.columns.cum_prob <= 0.9 - ).scalar_subquery() query = sa.select( - sa.func.count(FieldTile.id.distinct()) + prob ).filter( - SkymapTile.hpx.overlaps(FieldTile.hpx), - SkymapTile.probdensity >= min_probdensity + SkymapTile.id == 1, + union.columns.hpx.overlaps(SkymapTile.hpx) ) - # Run benchmark bench(query) From 3e70c0d703f572a2309df120b6462129166d99a6 Mon Sep 17 00:00:00 2001 From: Leo Singer Date: Wed, 9 Nov 2022 14:29:29 -0500 Subject: [PATCH 2/9] WIP --- healpix_alchemy/tests/benchmarks/data.py | 53 ++++++++++++------------ 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/healpix_alchemy/tests/benchmarks/data.py b/healpix_alchemy/tests/benchmarks/data.py index bc846c7..a6386c2 100644 --- a/healpix_alchemy/tests/benchmarks/data.py +++ b/healpix_alchemy/tests/benchmarks/data.py @@ -103,32 +103,33 @@ def get_random_fields(n, cursor): def get_random_sky_map(n, cursor): rng = np.random.default_rng(RANDOM_SKY_MAP_SEED) - # Make a randomly subdivided sky map - npix = HPX.npix - tiles = np.arange(0, npix + 1, 4 ** LEVEL).tolist() - while len(tiles) < n: - i = rng.integers(len(tiles)) - lo = 0 if i == 0 else tiles[i - 1] - hi = tiles[i] - diff = (hi - lo) // 4 - if diff == 0: - continue - tiles.insert(i, hi - diff) - tiles.insert(i, hi - 2 * diff) - tiles.insert(i, hi - 3 * diff) - - probdensity = rng.uniform(0, 1, size=len(tiles) - 1) - probdensity /= np.sum(np.diff(tiles) * probdensity) * PIXEL_AREA - - f = io.StringIO('1') - cursor.copy_from(f, Skymap.__tablename__) - - f = io.StringIO( - '\n'.join( - f'1\t[{lo},{hi})\t{p}' - for lo, hi, p in zip(tiles[:-1], tiles[1:], probdensity) + for skymap_id in range(1, 0, -1): + # Make a randomly subdivided sky map + npix = HPX.npix + tiles = np.arange(0, npix + 1, 4 ** LEVEL).tolist() + while len(tiles) < n: + i = rng.integers(len(tiles)) + lo = 0 if i == 0 else tiles[i - 1] + hi = tiles[i] + diff = (hi - lo) // 4 + if diff == 0: + continue + tiles.insert(i, hi - diff) + tiles.insert(i, hi - 2 * diff) + tiles.insert(i, hi - 3 * diff) + + probdensity = rng.uniform(0, 1, size=len(tiles) - 1) + probdensity /= np.sum(np.diff(tiles) * probdensity) * PIXEL_AREA + + f = io.StringIO(f'{skymap_id}') + cursor.copy_from(f, Skymap.__tablename__) + + f = io.StringIO( + '\n'.join( + f'{skymap_id}\t[{lo},{hi})\t{p}' + for lo, hi, p in zip(tiles[:-1], tiles[1:], probdensity) + ) ) - ) - cursor.copy_from(f, SkymapTile.__tablename__) + cursor.copy_from(f, SkymapTile.__tablename__) return tiles, probdensity From 4df556bcf373d0e9ed0bd6075807741b80b2d3ef Mon Sep 17 00:00:00 2001 From: Leo Singer Date: Thu, 10 Nov 2022 13:20:45 -0500 Subject: [PATCH 3/9] Regions --- healpix_alchemy/tests/benchmarks/data.py | 40 +++++++++++------- .../tests/benchmarks/test_benchmarks.py | 1 + poetry.lock | 42 ++++++++++++++++++- pyproject.toml | 1 + 4 files changed, 68 insertions(+), 16 deletions(-) diff --git a/healpix_alchemy/tests/benchmarks/data.py b/healpix_alchemy/tests/benchmarks/data.py index a6386c2..a61fc72 100644 --- a/healpix_alchemy/tests/benchmarks/data.py +++ b/healpix_alchemy/tests/benchmarks/data.py @@ -6,12 +6,14 @@ """ import io +import functools from astropy.coordinates import SkyCoord, uniform_spherical_random_surface from astropy import units as u from mocpy import MOC import numpy as np import pytest +from regions import Regions from ...constants import HPX, LEVEL, PIXEL_AREA from ...types import Tile @@ -25,19 +27,18 @@ def get_ztf_footprint_corners(): - """Return the corner offsets of the ZTF footprint. - - Notes - ----- - This polygon is smaller than the spatial extent of the true ZTF field of - view, but has approximately the same area because the real ZTF field of - view has chip gaps. + """Return the corner offsets of the ZTF footprint.""" + url = '/Users/lpsinger/src/skyportal/data/ZTF_Region.reg' + regions = Regions.read(url, cache=True) + + vertices = SkyCoord( + [region.vertices.ra for region in regions], + [region.vertices.dec for region in regions] + ).transform_to( + SkyCoord(0 * u.deg, 0 * u.deg).skyoffset_frame() + ) - For the real ZTF footprint, use the region file - https://github.com/skyportal/skyportal/blob/main/data/ZTF_Region.reg. - """ - x = 6.86 / 2 - return [-x, +x, +x, -x] * u.deg, [-x, -x, +x, +x] * u.deg + return vertices.lon, vertices.lat def get_footprints_grid(lon, lat, offsets): @@ -62,7 +63,7 @@ def get_footprints_grid(lon, lat, offsets): """ lon = np.repeat(lon[np.newaxis, :], len(offsets), axis=0) lat = np.repeat(lat[np.newaxis, :], len(offsets), axis=0) - result = SkyCoord(lon, lat, frame=offsets[:, np.newaxis].skyoffset_frame()) + result = SkyCoord(lon, lat, frame=offsets.reshape((-1,) + (1,) * (lon.ndim - 1)).skyoffset_frame()) return result.icrs @@ -82,10 +83,19 @@ def get_random_galaxies(n, cursor): return points +def union_moc_func(a, b): + return a.union(b) + + +def get_union_moc(footprints): + mocs = (MOC.from_polygon_skycoord(footprint) for footprint in footprints) + return functools.reduce(union_moc_func, mocs) + + def get_random_fields(n, cursor): centers = SkyCoord(get_random_points(n, RANDOM_FIELDS_SEED)) footprints = get_footprints_grid(*get_ztf_footprint_corners(), centers) - mocs = [MOC.from_polygon_skycoord(footprint) for footprint in footprints] + mocs = [get_union_moc(footprint) for footprint in footprints] f = io.StringIO('\n'.join(f'{i}' for i in range(len(mocs)))) cursor.copy_from(f, Field.__tablename__) @@ -103,7 +113,7 @@ def get_random_fields(n, cursor): def get_random_sky_map(n, cursor): rng = np.random.default_rng(RANDOM_SKY_MAP_SEED) - for skymap_id in range(1, 0, -1): + for skymap_id in range(10, 0, -1): # Make a randomly subdivided sky map npix = HPX.npix tiles = np.arange(0, npix + 1, 4 ** LEVEL).tolist() diff --git a/healpix_alchemy/tests/benchmarks/test_benchmarks.py b/healpix_alchemy/tests/benchmarks/test_benchmarks.py index 03a8e42..0bf9746 100644 --- a/healpix_alchemy/tests/benchmarks/test_benchmarks.py +++ b/healpix_alchemy/tests/benchmarks/test_benchmarks.py @@ -1,4 +1,5 @@ from functools import reduce +import time import numpy as np import sqlalchemy as sa diff --git a/poetry.lock b/poetry.lock index 99a418a..4a423b9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -668,6 +668,23 @@ category = "main" optional = false python-versions = ">=3.6" +[[package]] +name = "regions" +version = "0.7" +description = "Astropy coordinated package for region handling" +category = "dev" +optional = false +python-versions = ">=3.8" + +[package.dependencies] +astropy = ">=5.0" +numpy = ">=1.18" + +[package.extras] +all = ["matplotlib (>=3.1)", "shapely"] +docs = ["matplotlib (>=3.1)", "shapely", "sphinx-astropy"] +test = ["pytest-arraydiff", "pytest-astropy"] + [[package]] name = "requests" version = "2.28.1" @@ -829,7 +846,7 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools" [metadata] lock-version = "1.1" python-versions = "^3.8" -content-hash = "625f07f698023e02da75e678bb3d4996e46f075309a3fabb46afcce6840723d0" +content-hash = "2e5188bf874d16f90b32b20525dd61954e6b43ba5c61d15163baba50c00197dc" [metadata.files] astropy = [ @@ -1754,6 +1771,29 @@ pyyaml = [ {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"}, {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"}, ] +regions = [ + {file = "regions-0.7-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a89453a27992c517554d810931af965d997967344d133b660906947b50024a7a"}, + {file = "regions-0.7-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04ebb9653c150eeb47c3326bbe39f49312bedae6fbd2bdf1edf330aa4ff698dd"}, + {file = "regions-0.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ccb927a09b26928ee68f49b1755f430969e4563d80a7d0d633b920e99f40349c"}, + {file = "regions-0.7-cp310-cp310-win32.whl", hash = "sha256:df4e52f3e69caae1aa1b993347ad4e12f8ba3a5eb0e93e27efd242ef39f417ac"}, + {file = "regions-0.7-cp310-cp310-win_amd64.whl", hash = "sha256:906cbfaa191f58e55fd3792ee898ebde1cf59c9a86da6777f683422bc56747d2"}, + {file = "regions-0.7-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:908554c9badd08d24567d2e6b78702b2f195ec67ce380c9a698c07e631f7451e"}, + {file = "regions-0.7-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8efa73b3ccd71e1098fb4661c317a7d1dddc7721c497ae7dad6a39c6a3135dcf"}, + {file = "regions-0.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:daf87920dfef19fb421d0e3293f28384e59afd425e9c5e8e6bdcd6c99eba3c2e"}, + {file = "regions-0.7-cp311-cp311-win32.whl", hash = "sha256:b5a54ada42114b345edbf43dc63df9995ca04f1b496187e9c9680827c57af8d1"}, + {file = "regions-0.7-cp311-cp311-win_amd64.whl", hash = "sha256:5c5668f9c9dc88d3627f7772ea15df3df4987802a2bc3247a9472ebcc3d1621c"}, + {file = "regions-0.7-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f77071fe57e8092fc9c615d00cd75545bc90f126de638aacb1eac6c386658844"}, + {file = "regions-0.7-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5f3bb69b75ff5b26ac606cf5e2aa0e8e90ed75b6b8c21c22230c9da1c52f5bae"}, + {file = "regions-0.7-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e089a7282433fa7927dec2a6610bced214d581cc47d237e516e7d7e1de822005"}, + {file = "regions-0.7-cp38-cp38-win32.whl", hash = "sha256:39cad6568ccd951e79070b2e4197b1e2aa0b9736ebb4d8ef4c6a742957400997"}, + {file = "regions-0.7-cp38-cp38-win_amd64.whl", hash = "sha256:f16951f95a4103f5ae85c437443d952e9ad5898bde58eb59f00630c92459303a"}, + {file = "regions-0.7-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f727270fb81b56de447f952f07e5fa98c1329a40b3f7e0c072d93aede5fb9c7e"}, + {file = "regions-0.7-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1fee607752e15e712f088c03a43e22b46779273316ebc97b442b2da91a81cf60"}, + {file = "regions-0.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5a98b1fef5273883816561ea258ae16517eca33ec11858d0a3da0ad5ac65458"}, + {file = "regions-0.7-cp39-cp39-win32.whl", hash = "sha256:aaff5e1199d282c0781e9e63093214006d92d4e22e27332fb13168f6d69141a4"}, + {file = "regions-0.7-cp39-cp39-win_amd64.whl", hash = "sha256:6cf77c098732e619a6f188fb4a34e04537dfbd574480efbdff47fdf3fd4f771e"}, + {file = "regions-0.7.tar.gz", hash = "sha256:75387ef2dfcca46c7716803a81c68dba3669005e01ddf6d0bf254edb677a9a23"}, +] requests = [ {file = "requests-2.28.1-py3-none-any.whl", hash = "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349"}, {file = "requests-2.28.1.tar.gz", hash = "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983"}, diff --git a/pyproject.toml b/pyproject.toml index 9a9e0fa..87172be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ pytest = "*" pytest-benchmark = "*" pytest-doctestplus = "*" pytest-postgresql = "<4.0.0" # pytest-postgresql 4.0.0 uses psycopg3, but sqlalchemy does not yet support it +regions = ">=0.6" [tool.pytest.ini_options] addopts = "--doctest-glob *.md" From 2b84f84b699b6f69e409ff16f66240cb90dd2c80 Mon Sep 17 00:00:00 2001 From: Leo Singer Date: Tue, 22 Nov 2022 21:35:47 -0500 Subject: [PATCH 4/9] WIP --- healpix_alchemy/tests/benchmarks/conftest.py | 7 ++++- healpix_alchemy/tests/benchmarks/data.py | 28 +++++++++++++++++++ .../tests/benchmarks/test_benchmarks.py | 5 +++- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/healpix_alchemy/tests/benchmarks/conftest.py b/healpix_alchemy/tests/benchmarks/conftest.py index 8d683a6..bab6063 100644 --- a/healpix_alchemy/tests/benchmarks/conftest.py +++ b/healpix_alchemy/tests/benchmarks/conftest.py @@ -33,11 +33,16 @@ def random_galaxies(cursor, tables): return data.get_random_galaxies(40_000, cursor) -@pytest.fixture(params=np.geomspace(1, 10_000, 10, dtype=int).tolist()) +@pytest.fixture(params=[2000]) def random_fields(cursor, tables, request): return data.get_random_fields(request.param, cursor) +@pytest.fixture() +def ztf_fields(cursor, tables): + return data.get_ztf_fields(cursor) + + @pytest.fixture def random_sky_map(cursor, tables): return data.get_random_sky_map(20_000, cursor) diff --git a/healpix_alchemy/tests/benchmarks/data.py b/healpix_alchemy/tests/benchmarks/data.py index a61fc72..9f3761e 100644 --- a/healpix_alchemy/tests/benchmarks/data.py +++ b/healpix_alchemy/tests/benchmarks/data.py @@ -10,6 +10,7 @@ from astropy.coordinates import SkyCoord, uniform_spherical_random_surface from astropy import units as u +from astropy.table import Table from mocpy import MOC import numpy as np import pytest @@ -73,6 +74,13 @@ def get_random_points(n, seed): return uniform_spherical_random_surface(n) +def get_ztf_field_data(): + url = 'https://raw.githubusercontent.com/ZwickyTransientFacility/ztf_information/master/field_grid/ZTF_Fields.txt' + data = Table.read(url, cache=True, format='ascii', comment='%') + data.rename_columns(['col1', 'col2', 'col3'], ['id', 'ra', 'dec']) + return data + + def get_random_galaxies(n, cursor): points = SkyCoord(get_random_points(n, RANDOM_GALAXIES_SEED)) hpx = HPX.skycoord_to_healpix(points) @@ -111,6 +119,26 @@ def get_random_fields(n, cursor): return mocs +def get_ztf_fields(cursor): + field_data = get_ztf_field_data() + centers = SkyCoord(field_data['ra'] * u.deg, field_data['dec'] * u.deg) + footprints = get_footprints_grid(*get_ztf_footprint_corners(), centers) + mocs = [get_union_moc(footprint) for footprint in footprints] + + f = io.StringIO('\n'.join(f'{i}' for i in field_data['id'])) + cursor.copy_from(f, Field.__tablename__) + + f = io.StringIO( + '\n'.join( + f'{i}\t{hpx}' + for i, moc in zip(field_data['id'], mocs) for hpx in Tile.tiles_from(moc) + ) + ) + cursor.copy_from(f, FieldTile.__tablename__) + + return mocs + + def get_random_sky_map(n, cursor): rng = np.random.default_rng(RANDOM_SKY_MAP_SEED) for skymap_id in range(10, 0, -1): diff --git a/healpix_alchemy/tests/benchmarks/test_benchmarks.py b/healpix_alchemy/tests/benchmarks/test_benchmarks.py index 0bf9746..95605a9 100644 --- a/healpix_alchemy/tests/benchmarks/test_benchmarks.py +++ b/healpix_alchemy/tests/benchmarks/test_benchmarks.py @@ -108,10 +108,12 @@ def _func(query, expected): # bench(query) -def test_integrated_probability(bench, random_fields, random_sky_map): +def test_integrated_probability(bench, ztf_fields, random_sky_map): """Find integrated probability within union of fields.""" union = sa.select( func.union(FieldTile.hpx).label('hpx') + ).filter( + FieldTile.id.between(100, 120) ).subquery() area = (union.columns.hpx * SkymapTile.hpx).area @@ -124,4 +126,5 @@ def test_integrated_probability(bench, random_fields, random_sky_map): union.columns.hpx.overlaps(SkymapTile.hpx) ) + # Run benchmark bench(query) From 63ab9467abf46785c9a86fb0b37208394efc4b81 Mon Sep 17 00:00:00 2001 From: Leo Singer Date: Fri, 27 Jan 2023 12:17:25 -0500 Subject: [PATCH 5/9] WIP --- healpix_alchemy/tests/benchmarks/conftest.py | 7 ++++--- healpix_alchemy/tests/benchmarks/data.py | 15 +++++++++++---- healpix_alchemy/tests/benchmarks/models.py | 2 +- .../tests/benchmarks/test_benchmarks.py | 11 +++++++---- healpix_alchemy/types.py | 2 +- 5 files changed, 24 insertions(+), 13 deletions(-) diff --git a/healpix_alchemy/tests/benchmarks/conftest.py b/healpix_alchemy/tests/benchmarks/conftest.py index bab6063..4c4cb1a 100644 --- a/healpix_alchemy/tests/benchmarks/conftest.py +++ b/healpix_alchemy/tests/benchmarks/conftest.py @@ -43,6 +43,7 @@ def ztf_fields(cursor, tables): return data.get_ztf_fields(cursor) -@pytest.fixture -def random_sky_map(cursor, tables): - return data.get_random_sky_map(20_000, cursor) +@pytest.fixture(params=np.geomspace(1, 1000, 10, dtype=int).tolist()) +# @pytest.fixture(params=[50]) +def random_sky_map(cursor, tables, request): + return data.get_random_sky_map(20_000, request.param, cursor) diff --git a/healpix_alchemy/tests/benchmarks/data.py b/healpix_alchemy/tests/benchmarks/data.py index 9f3761e..c9c1bf0 100644 --- a/healpix_alchemy/tests/benchmarks/data.py +++ b/healpix_alchemy/tests/benchmarks/data.py @@ -119,11 +119,17 @@ def get_random_fields(n, cursor): return mocs -def get_ztf_fields(cursor): +@functools.cache +def get_ztf_mocs(): field_data = get_ztf_field_data() centers = SkyCoord(field_data['ra'] * u.deg, field_data['dec'] * u.deg) footprints = get_footprints_grid(*get_ztf_footprint_corners(), centers) mocs = [get_union_moc(footprint) for footprint in footprints] + return field_data, mocs + + +def get_ztf_fields(cursor): + field_data, mocs = get_ztf_mocs() f = io.StringIO('\n'.join(f'{i}' for i in field_data['id'])) cursor.copy_from(f, Field.__tablename__) @@ -139,9 +145,9 @@ def get_ztf_fields(cursor): return mocs -def get_random_sky_map(n, cursor): +def get_random_sky_map(n, nmaps, cursor): rng = np.random.default_rng(RANDOM_SKY_MAP_SEED) - for skymap_id in range(10, 0, -1): + for skymap_id in range(nmaps, 0, -1): # Make a randomly subdivided sky map npix = HPX.npix tiles = np.arange(0, npix + 1, 4 ** LEVEL).tolist() @@ -170,4 +176,5 @@ def get_random_sky_map(n, cursor): ) cursor.copy_from(f, SkymapTile.__tablename__) - return tiles, probdensity + # return tiles, probdensity + return nmaps diff --git a/healpix_alchemy/tests/benchmarks/models.py b/healpix_alchemy/tests/benchmarks/models.py index 188f8ee..24c163c 100644 --- a/healpix_alchemy/tests/benchmarks/models.py +++ b/healpix_alchemy/tests/benchmarks/models.py @@ -23,7 +23,7 @@ class Field(Base): class FieldTile(Base): - id = sa.Column(sa.ForeignKey(Field.id), primary_key=True, index=True) + id = sa.Column(sa.ForeignKey(Field.id), primary_key=True) hpx = sa.Column(Tile, primary_key=True, index=True) diff --git a/healpix_alchemy/tests/benchmarks/test_benchmarks.py b/healpix_alchemy/tests/benchmarks/test_benchmarks.py index 95605a9..3585c11 100644 --- a/healpix_alchemy/tests/benchmarks/test_benchmarks.py +++ b/healpix_alchemy/tests/benchmarks/test_benchmarks.py @@ -1,5 +1,5 @@ from functools import reduce -import time +from .explain import explain import numpy as np import sqlalchemy as sa @@ -108,7 +108,7 @@ def _func(query, expected): # bench(query) -def test_integrated_probability(bench, ztf_fields, random_sky_map): +def test_integrated_probability(bench, ztf_fields, random_sky_map, session): """Find integrated probability within union of fields.""" union = sa.select( func.union(FieldTile.hpx).label('hpx') @@ -126,5 +126,8 @@ def test_integrated_probability(bench, ztf_fields, random_sky_map): union.columns.hpx.overlaps(SkymapTile.hpx) ) - # Run benchmark - bench(query) + session.execute('ANALYZE') + session.execute("LOAD 'auto_explain'") + session.execute("SET auto_explain.log_min_duration=0") + session.execute(query).all() + session.execute("SET auto_explain.log_min_duration='1000s'") diff --git a/healpix_alchemy/types.py b/healpix_alchemy/types.py index da80aed..5a65496 100644 --- a/healpix_alchemy/types.py +++ b/healpix_alchemy/types.py @@ -93,4 +93,4 @@ def _create_indices(index, parent): isinstance(index.expressions[0], sa.Column) and isinstance(index.expressions[0].type, Tile) ): - index.dialect_options['postgresql']['using'] = 'spgist' + index.dialect_options['postgresql']['using'] = 'gist' From 722178fe349510e497c0da9ef9feb7e67e49572e Mon Sep 17 00:00:00 2001 From: Leo Singer Date: Fri, 27 Jan 2023 12:17:49 -0500 Subject: [PATCH 6/9] WIP --- healpix_alchemy/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/healpix_alchemy/types.py b/healpix_alchemy/types.py index 5a65496..da80aed 100644 --- a/healpix_alchemy/types.py +++ b/healpix_alchemy/types.py @@ -93,4 +93,4 @@ def _create_indices(index, parent): isinstance(index.expressions[0], sa.Column) and isinstance(index.expressions[0].type, Tile) ): - index.dialect_options['postgresql']['using'] = 'gist' + index.dialect_options['postgresql']['using'] = 'spgist' From ad26de33d440ce079734e6c7843a10fa088f9729 Mon Sep 17 00:00:00 2001 From: Leo Singer Date: Wed, 1 Feb 2023 15:30:47 -0500 Subject: [PATCH 7/9] WIP --- healpix_alchemy/tests/benchmarks/test_benchmarks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/healpix_alchemy/tests/benchmarks/test_benchmarks.py b/healpix_alchemy/tests/benchmarks/test_benchmarks.py index 3585c11..9f60b33 100644 --- a/healpix_alchemy/tests/benchmarks/test_benchmarks.py +++ b/healpix_alchemy/tests/benchmarks/test_benchmarks.py @@ -129,5 +129,6 @@ def test_integrated_probability(bench, ztf_fields, random_sky_map, session): session.execute('ANALYZE') session.execute("LOAD 'auto_explain'") session.execute("SET auto_explain.log_min_duration=0") + session.execute("SET auto_explain.log_analyze=true") session.execute(query).all() session.execute("SET auto_explain.log_min_duration='1000s'") From f9abdadb762caec260add23de1bbc414d69b6ede Mon Sep 17 00:00:00 2001 From: Leo Singer Date: Wed, 1 Feb 2023 15:31:46 -0500 Subject: [PATCH 8/9] Add temporary table --- healpix_alchemy/__init__.py | 3 +- healpix_alchemy/_temporary_table.py | 43 +++++++++++++++++++ .../tests/benchmarks/test_benchmarks.py | 2 +- 3 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 healpix_alchemy/_temporary_table.py diff --git a/healpix_alchemy/__init__.py b/healpix_alchemy/__init__.py index 5a627e0..b545eb3 100644 --- a/healpix_alchemy/__init__.py +++ b/healpix_alchemy/__init__.py @@ -1,4 +1,5 @@ +from ._temporary_table import temporary_table from .types import Point, Tile from . import func # noqa: F401 -__all__ = ('Point', 'Tile') +__all__ = ('temporary_table', 'Point', 'Tile') diff --git a/healpix_alchemy/_temporary_table.py b/healpix_alchemy/_temporary_table.py new file mode 100644 index 0000000..c71d937 --- /dev/null +++ b/healpix_alchemy/_temporary_table.py @@ -0,0 +1,43 @@ +"""SQLAlchemy temporary tables. + +Inspired by https://github.com/sqlalchemy/sqlalchemy/wiki/Views +""" +from sqlalchemy.ext.compiler import compiles +from sqlalchemy.schema import DDLElement +from sqlalchemy.sql import TableClause + +from uuid import uuid4 + +__all__ = ('temporary_table',) + + +class CreateTemporaryTable(DDLElement): + + inherit_cache = False + + def __init__(self, selectable, name): + self.selectable = selectable + self.name = name + + +@compiles(CreateTemporaryTable) +def _create_temporary_table(element, compiler, **_): + selectable = compiler.sql_compiler.process( + element.selectable, literal_binds=True) + return f'CREATE TEMPORARY TABLE {element.name} AS {selectable}' + + +class temporary_table(TableClause): + + inherit_cache = False + + def __init__(self, selectable, name=None): + if name is None: + name = 'temp_' + str(uuid4()).replace('-', '_') + super().__init__(name) + self._selectable = selectable + self._columns._populate_separate_keys( + col._make_proxy(self) for col in selectable.selected_columns) + + def create(self, bind): + return bind.execute(CreateTemporaryTable(self._selectable, self.name)) diff --git a/healpix_alchemy/tests/benchmarks/test_benchmarks.py b/healpix_alchemy/tests/benchmarks/test_benchmarks.py index 9f60b33..b62ea2d 100644 --- a/healpix_alchemy/tests/benchmarks/test_benchmarks.py +++ b/healpix_alchemy/tests/benchmarks/test_benchmarks.py @@ -5,7 +5,7 @@ import sqlalchemy as sa import pytest -from ... import func +from ... import temporary_table, func from .models import Galaxy, FieldTile, SkymapTile From 21cec740d3ced9377b0384ab064f8ddd3bf9a703 Mon Sep 17 00:00:00 2001 From: Leo Singer Date: Tue, 7 Feb 2023 15:22:18 -0500 Subject: [PATCH 9/9] WIP: try adding custom function with hardcoded planner stats --- healpix_alchemy/func.py | 2 +- healpix_alchemy/types.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/healpix_alchemy/func.py b/healpix_alchemy/func.py index 35b2e9f..2e49735 100644 --- a/healpix_alchemy/func.py +++ b/healpix_alchemy/func.py @@ -5,4 +5,4 @@ def union(tiles): - return _func.unnest(_func.range_agg(tiles), type_=_Tile) + return _func.multirange_unnest(_func.range_agg(tiles), type_=_Tile) diff --git a/healpix_alchemy/types.py b/healpix_alchemy/types.py index da80aed..e65b50c 100644 --- a/healpix_alchemy/types.py +++ b/healpix_alchemy/types.py @@ -94,3 +94,26 @@ def _create_indices(index, parent): isinstance(index.expressions[0].type, Tile) ): index.dialect_options['postgresql']['using'] = 'spgist' + + +@sa.event.listens_for(sa.Table, 'after_create') +def _after_create_table(target, connection, **kwargs): + """Declare our own version of unnest() with a hardcoded planner row count + estimate that is more appropriate for sky maps. + + FIXME: Remove this when PostgreSQL gets better planner support for + multiranges. + + See https://github.com/skyportal/healpix-alchemy/pull/55. + """ + connection.execute(sa.text( + 'CREATE OR REPLACE FUNCTION multirange_unnest(arg anymultirange)' + ' RETURNS SETOF anyrange' + ' AS $$ select unnest($1); $$' + ' LANGUAGE SQL' + ' IMMUTABLE' + ' PARALLEL SAFE' + ' STRICT' + ' COST 1' + ' ROWS 2000' + ))