diff --git a/python/misc_pymod.cc b/python/misc_pymod.cc index 243287f7..52a7ef85 100644 --- a/python/misc_pymod.cc +++ b/python/misc_pymod.cc @@ -797,7 +797,6 @@ py::array Py_get_deflected_angles2(const py::array &theta_, if (calc_rotation) MR_assert(e_r.x > 0, "for the poles fix alpha=0 case in calc_rotation first "); double dphi = 2*pi/nphi(iring); - double sin_aoa, twohav_aod, cos_a; for (size_t iphi=0; iphi0); } + +size_t get_nalm(size_t spin, SHT_mode mode) + { return (spin==0) ? 1 : ((mode==STANDARD) ? 2 : 1); } + py::array Py_GL_weights(size_t nlat, size_t nlon) { auto res = make_Pyarr({nlat}); @@ -185,8 +199,9 @@ size_t min_mapdim(const cmav &nphi, const cmav &ringstart, template py::array Py2_alm2leg(const py::array &alm_, size_t spin, size_t lmax, const py::object &mval_, const py::object &mstart_, ptrdiff_t lstride, const py::array &theta_, size_t nthreads, - py::object &leg__) + py::object &leg__, const string &mode_) { + auto mode = get_mode(mode_); auto alm = to_cmav,2>(alm_); auto theta = to_cmav(theta_); vmav mval, mstart; @@ -194,91 +209,64 @@ template py::array Py2_alm2leg(const py::array &alm_, size_t spin, MR_assert(alm.shape(1)>=min_almdim(lmax, mval, mstart, lstride), "bad a_lm array size"); auto leg_ = get_optional_Pyarr>(leg__, - {alm.shape(0),theta.shape(0),mval.shape(0)}); + {get_nmaps(spin,mode),theta.shape(0),mval.shape(0)}); auto leg = to_vmav,3>(leg_); { py::gil_scoped_release release; - alm2leg(alm, leg, spin, lmax, mval, mstart, lstride, theta, nthreads, ALM2MAP); + alm2leg(alm, leg, spin, lmax, mval, mstart, lstride, theta, nthreads, mode); } return leg_; } py::array Py_alm2leg(const py::array &alm, size_t lmax, const py::array &theta, size_t spin, const py::object &mval, const py::object &mstart, - ptrdiff_t lstride, size_t nthreads, py::object &leg) + ptrdiff_t lstride, size_t nthreads, py::object &leg, const string &mode) { if (isPyarr>(alm)) return Py2_alm2leg(alm, spin, lmax, mval, mstart, lstride, theta, - nthreads, leg); + nthreads, leg, mode); if (isPyarr>(alm)) return Py2_alm2leg(alm, spin, lmax, mval, mstart, lstride, theta, - nthreads, leg); + nthreads, leg, mode); MR_fail("type matching failed: 'alm' has neither type 'c8' nor 'c16'"); } -template py::array Py2_alm2leg_deriv1(const py::array &alm_, - size_t lmax, const py::object &mval_, const py::object &mstart_, - ptrdiff_t lstride, const py::array &theta_, size_t nthreads, - py::object &leg__) - { - auto alm = to_cmav,2>(alm_); - auto theta = to_cmav(theta_); - vmav mval, mstart; - getmstuff(lmax, mval_, mstart_, mval, mstart); - MR_assert(alm.shape(0)==1, "need exactly 1 a_lm component"); - MR_assert(alm.shape(1)>=min_almdim(lmax, mval, mstart, lstride), - "bad a_lm array size"); - auto leg_ = get_optional_Pyarr>(leg__, - {2,theta.shape(0),mval.shape(0)}); - auto leg = to_vmav,3>(leg_); - { - py::gil_scoped_release release; - alm2leg(alm, leg, 0, lmax, mval, mstart, lstride, theta, nthreads, - ALM2MAP_DERIV1); - } - return leg_; - } py::array Py_alm2leg_deriv1(const py::array &alm, size_t lmax, const py::array &theta, const py::object &mval, const py::object &mstart, ptrdiff_t lstride, size_t nthreads, py::object &leg) { - if (isPyarr>(alm)) - return Py2_alm2leg_deriv1(alm, lmax, mval, mstart, lstride, theta, - nthreads, leg); - if (isPyarr>(alm)) - return Py2_alm2leg_deriv1(alm, lmax, mval, mstart, lstride, theta, - nthreads, leg); - MR_fail("type matching failed: 'alm' has neither type 'c8' nor 'c16'"); + return Py_alm2leg(alm, lmax, theta, 1, mval, mstart, lstride, nthreads, leg, "DERIV1"); } template py::array Py2_leg2alm(const py::array &leg_, const py::array &theta_, size_t spin, size_t lmax, const py::object &mval_, const py::object &mstart_, ptrdiff_t lstride, size_t nthreads, - py::object &alm__) + py::object &alm__,const string &mode_) { + auto mode = get_mode(mode_); auto leg = to_cmav,3>(leg_); auto theta = to_cmav(theta_); MR_assert(leg.shape(1)==theta.shape(0), "bad leg array size"); vmav mval, mstart; getmstuff(lmax, mval_, mstart_, mval, mstart); auto alm_ = get_optional_Pyarr_minshape>(alm__, - {leg.shape(0),min_almdim(lmax, mval, mstart, lstride)}); + {get_nalm(spin,mode),min_almdim(lmax, mval, mstart, lstride)}); auto alm = to_vmav,2>(alm_); MR_assert(alm.shape(0)==leg.shape(0), "bad number of components in a_lm array"); { py::gil_scoped_release release; - leg2alm(alm, leg, spin, lmax, mval, mstart, lstride, theta, nthreads); + leg2alm(alm, leg, spin, lmax, mval, mstart, lstride, theta, nthreads, mode); } return alm_; } py::array Py_leg2alm(const py::array &leg, size_t lmax, const py::array &theta, size_t spin, const py::object &mval, const py::object &mstart, - ptrdiff_t lstride, size_t nthreads, py::object &alm) + ptrdiff_t lstride, size_t nthreads, py::object &alm, const string &mode) { if (isPyarr>(leg)) return Py2_leg2alm(leg, theta, spin, lmax, mval, mstart, lstride, - nthreads, alm); + nthreads, alm, mode); if (isPyarr>(leg)) return Py2_leg2alm(leg, theta, spin, lmax, mval, mstart, lstride, - nthreads, alm); + nthreads, alm, mode); MR_fail("type matching failed: 'leg' has neither type 'c8' nor 'c16'"); } template py::array Py2_map2leg(const py::array &map_, @@ -352,8 +340,10 @@ template py::array Py2_synthesis(const py::array &alm_, const py::array &theta_, const py::array &nphi_, const py::array &phi0_, const py::array &ringstart_, - ptrdiff_t pixstride, size_t nthreads, const py::object &mmax_) + ptrdiff_t pixstride, size_t nthreads, const py::object &mmax_, + const string &mode_) { + auto mode = get_mode(mode_); auto mstart = get_mstart(lmax, mmax_, mstart_); auto theta = to_cmav(theta_); auto phi0 = to_cmav(phi0_); @@ -364,10 +354,10 @@ template py::array Py2_synthesis(const py::array &alm_, vector mapshp(alm_.ndim()); for(size_t i=0; i(map__, mapshp); auto map = to_vmav_with_optional_leading_dimensions(map_); MR_assert(map.shape(0)==alm.shape(0), "bad number of components in map array"); - MR_assert(map.shape(1)==alm.shape(1), "bad number of components in map array"); size_t nthreads_outer=1; if (alm.shape(0)>nthreads) // parallelize over entire transforms { nthreads_outer=nthreads; nthreads=1; } @@ -381,7 +371,7 @@ template py::array Py2_synthesis(const py::array &alm_, auto subalm = subarray<2>(alm, {{itrans},{},{}}); auto submap = subarray<2>(map, {{itrans},{},{}}); synthesis(subalm, submap, spin, lmax, mstart, lstride, theta, nphi, - phi0, ringstart, pixstride, nthreads, ALM2MAP); + phi0, ringstart, pixstride, nthreads, mode); } }); } @@ -392,71 +382,24 @@ py::array Py_synthesis(const py::array &alm, const py::array &theta, const py::array &nphi, const py::array &phi0, const py::array &ringstart, size_t spin, ptrdiff_t lstride, ptrdiff_t pixstride, size_t nthreads, py::object &map, - const py::object &mmax_) + const py::object &mmax_, const string &mode) { if (isPyarr>(alm)) return Py2_synthesis(alm, map, spin, lmax, mstart, lstride, theta, - nphi, phi0, ringstart, pixstride, nthreads, mmax_); + nphi, phi0, ringstart, pixstride, nthreads, mmax_, mode); else if (isPyarr>(alm)) return Py2_synthesis(alm, map, spin, lmax, mstart, lstride, theta, - nphi, phi0, ringstart, pixstride, nthreads, mmax_); + nphi, phi0, ringstart, pixstride, nthreads, mmax_, mode); MR_fail("type matching failed: 'alm' has neither type 'c8' nor 'c16'"); } -template py::array Py2_synthesis_deriv1(const py::array &alm_, - py::object &map__, size_t lmax, - const py::object &mstart_, ptrdiff_t lstride, - const py::array &theta_, - const py::array &nphi_, - const py::array &phi0_, const py::array &ringstart_, - ptrdiff_t pixstride, size_t nthreads, const py::object &mmax_) - { - auto mstart = get_mstart(lmax, mmax_, mstart_); - auto theta = to_cmav(theta_); - auto phi0 = to_cmav(phi0_); - auto nphi = to_cmav(nphi_); - auto ringstart = to_cmav(ringstart_); - MR_assert((alm_.ndim()>=2)&&(alm_.ndim()<=3), "alm must be a 2D or 3D array"); - auto alm = to_cmav_with_optional_leading_dimensions,3>(alm_); - vector mapshp(alm_.ndim()); - for(size_t i=0; i(map__, mapshp); - auto map = to_vmav_with_optional_leading_dimensions(map_); - MR_assert(map.shape(0)==alm.shape(0), "bad number of components in map array"); - size_t nthreads_outer=1; - if (alm.shape(0)>nthreads) // parallelize over entire transforms - { nthreads_outer=nthreads; nthreads=1; } - { - py::gil_scoped_release release; - execDynamic(alm.shape(0), nthreads_outer, 1, [&](Scheduler &sched) - { - while (auto rng=sched.getNext()) - for(auto itrans=rng.lo; itrans(alm, {{itrans},{},{}}); - auto submap = subarray<2>(map, {{itrans},{},{}}); - synthesis(subalm, submap, 1, lmax, mstart, lstride, theta, nphi, phi0, - ringstart, pixstride, nthreads, ALM2MAP_DERIV1); - } - }); - } - return map_; - } py::array Py_synthesis_deriv1(const py::array &alm, const py::array &theta, size_t lmax, const py::object &mstart, const py::array &nphi, const py::array &phi0, const py::array &ringstart, ptrdiff_t lstride, ptrdiff_t pixstride, size_t nthreads, py::object &map, const py::object &mmax_) { - if (isPyarr>(alm)) - return Py2_synthesis_deriv1(alm, map, lmax, mstart, lstride, theta, - nphi, phi0, ringstart, pixstride, nthreads, mmax_); - else if (isPyarr>(alm)) - return Py2_synthesis_deriv1(alm, map, lmax, mstart, lstride, theta, - nphi, phi0, ringstart, pixstride, nthreads, mmax_); - MR_fail("type matching failed: 'alm' has neither type 'c8' nor 'c16'"); + return Py_synthesis(alm, theta, lmax, mstart, nphi, phi0, ringstart, 1, lstride, + pixstride, nthreads, map, mmax_, "DERIV1"); } @@ -501,92 +444,71 @@ template py::array_t> check_build_alm template py::array Py2_synthesis_2d(const py::array &alm_, size_t spin, size_t lmax, const string &geometry, const py::object & ntheta, - const py::object &nphi, size_t mmax, size_t nthreads, py::object &map__) + const py::object &nphi, size_t mmax, size_t nthreads, py::object &map__, const string &mode_) { + auto mode = get_mode(mode_); auto alm = to_cmav,2>(alm_); - auto map_ = check_build_map(map__, alm.shape(0), ntheta, nphi); + auto map_ = check_build_map(map__, get_nmaps(spin,mode), ntheta, nphi); auto map = to_vmav(map_); - MR_assert(map.shape(0)==alm.shape(0), "bad number of components in map array"); { py::gil_scoped_release release; - synthesis_2d(alm, map, spin, lmax, mmax, geometry, nthreads); + synthesis_2d(alm, map, spin, lmax, mmax, geometry, nthreads, mode); } return map_; } py::array Py_synthesis_2d(const py::array &alm, size_t spin, size_t lmax, const string &geometry, const py::object &ntheta, const py::object &nphi, - const py::object &mmax_, size_t nthreads, py::object &map) + const py::object &mmax_, size_t nthreads, py::object &map, const string &mode) { size_t mmax = mmax_.is_none() ? lmax : mmax_.cast(); if (isPyarr>(alm)) return Py2_synthesis_2d(alm, spin, lmax, geometry, ntheta, nphi, - mmax, nthreads, map); + mmax, nthreads, map, mode); else if (isPyarr>(alm)) return Py2_synthesis_2d(alm, spin, lmax, geometry, ntheta, nphi, - mmax, nthreads, map); + mmax, nthreads, map, mode); MR_fail("type matching failed: 'alm' has neither type 'c8' nor 'c16'"); } template py::array Py2_adjoint_synthesis_2d( const py::array &map_, size_t spin, size_t lmax, const string &geometry, - size_t mmax, size_t nthreads, py::object &alm__) + size_t mmax, size_t nthreads, py::object &alm__, const string &mode_) { + auto mode = get_mode(mode_); auto map = to_cmav(map_); - auto alm_ = check_build_alm(alm__, map.shape(0), lmax, mmax); + auto alm_ = check_build_alm(alm__, get_nalm(spin,mode), lmax, mmax); auto alm = to_vmav,2>(alm_); - MR_assert(map.shape(0)==alm.shape(0), "bad number of components in map array"); { py::gil_scoped_release release; - adjoint_synthesis_2d(alm, map, spin, lmax, mmax, geometry, nthreads); + adjoint_synthesis_2d(alm, map, spin, lmax, mmax, geometry, nthreads, mode); } return alm_; } py::array Py_adjoint_synthesis_2d( const py::array &map, size_t spin, size_t lmax, const string &geometry, - const py::object &mmax_, size_t nthreads, py::object &alm) + const py::object &mmax_, size_t nthreads, py::object &alm, const string &mode) { size_t mmax = mmax_.is_none() ? lmax : mmax_.cast(); if (isPyarr(map)) return Py2_adjoint_synthesis_2d(map, spin, lmax, geometry, mmax, - nthreads, alm); + nthreads, alm,mode); else if (isPyarr(map)) return Py2_adjoint_synthesis_2d(map, spin, lmax, geometry, mmax, - nthreads, alm); + nthreads, alm, mode); MR_fail("type matching failed: 'alm' has neither type 'c8' nor 'c16'"); } -template py::array Py2_synthesis_2d_deriv1(const py::array &alm_, - size_t lmax, const string &geometry, const py::object & ntheta, - const py::object &nphi, size_t mmax, size_t nthreads, py::object &map__) - { - auto alm = to_cmav,2>(alm_); - auto map_ = check_build_map(map__, 2, ntheta, nphi); - auto map = to_vmav(map_); - MR_assert((map.shape(0)==2) && (alm.shape(0)==1), - "incorrect number of components"); - { - py::gil_scoped_release release; - synthesis_2d(alm, map, 1, lmax, mmax, geometry, nthreads, ALM2MAP_DERIV1); - } - return map_; - } py::array Py_synthesis_2d_deriv1(const py::array &alm, size_t lmax, const string &geometry, const py::object &ntheta, const py::object &nphi, const py::object &mmax_, size_t nthreads, py::object &map) { - size_t mmax = mmax_.is_none() ? lmax : mmax_.cast(); - if (isPyarr>(alm)) - return Py2_synthesis_2d_deriv1(alm, lmax, geometry, ntheta, nphi, - mmax, nthreads, map); - else if (isPyarr>(alm)) - return Py2_synthesis_2d_deriv1(alm, lmax, geometry, ntheta, nphi, - mmax, nthreads, map); - MR_fail("type matching failed: 'alm' has neither type 'c8' nor 'c16'"); + return Py_synthesis_2d(alm, 1, lmax, geometry, ntheta, nphi, mmax_, nthreads, map, "DERIV1"); } template py::array Py2_adjoint_synthesis(py::object &alm__, size_t lmax, const py::object &mstart_, ptrdiff_t lstride, const py::array &map_, const py::array &theta_, const py::array &phi0_, const py::array &nphi_, const py::array &ringstart_, size_t spin, - ptrdiff_t pixstride, size_t nthreads, const py::object &mmax_) + ptrdiff_t pixstride, size_t nthreads, const py::object &mmax_, const string &mode_) { + auto mode = get_mode(mode_); auto mstart = get_mstart(lmax, mmax_, mstart_); auto theta = to_cmav(theta_); auto phi0 = to_cmav(phi0_); @@ -600,10 +522,10 @@ template py::array Py2_adjoint_synthesis(py::object &alm__, vector almshp(map_.ndim()); for(size_t i=0; i>(alm__, almshp); auto alm = to_vmav_with_optional_leading_dimensions,3>(alm_); MR_assert(map.shape(0)==alm.shape(0), "bad number of components in alm array"); - MR_assert(map.shape(1)==alm.shape(1), "bad number of components in alm array"); size_t nthreads_outer=1; if (map.shape(0)>nthreads) // parallelize over entire transforms { nthreads_outer=nthreads; nthreads=1; } @@ -617,7 +539,7 @@ template py::array Py2_adjoint_synthesis(py::object &alm__, auto submap = subarray<2>(map, {{itrans},{},{}}); auto subalm = subarray<2>(alm, {{itrans},{},{}}); adjoint_synthesis(subalm, submap, spin, lmax, mstart, lstride, theta, - nphi, phi0, ringstart, pixstride, nthreads); + nphi, phi0, ringstart, pixstride, nthreads, mode); } }); } @@ -630,14 +552,15 @@ py::array Py_adjoint_synthesis(const py::array &map, const py::array &theta, const py::array &phi0, const py::array &ringstart, size_t spin, ptrdiff_t lstride, ptrdiff_t pixstride, size_t nthreads, - py::object &alm, const py::object &mmax_) + py::object &alm, const py::object &mmax_, + const string &mode) { if (isPyarr(map)) return Py2_adjoint_synthesis(alm, lmax, mstart, lstride, map, theta, - phi0, nphi, ringstart, spin, pixstride, nthreads, mmax_); + phi0, nphi, ringstart, spin, pixstride, nthreads, mmax_, mode); else if (isPyarr(map)) return Py2_adjoint_synthesis(alm, lmax, mstart, lstride, map, theta, - phi0, nphi, ringstart, spin, pixstride, nthreads, mmax_); + phi0, nphi, ringstart, spin, pixstride, nthreads, mmax_, mode); MR_fail("type matching failed: 'alm' has neither type 'c8' nor 'c16'"); } template py::object Py2_pseudo_analysis(py::object &alm__, @@ -781,32 +704,32 @@ py::array Py_adjoint_analysis_2d(const py::array &alm, size_t spin, size_t lmax, template py::array Py2_synthesis_general(const py::array &alm_, size_t spin, size_t lmax, const py::array &loc_, double epsilon, size_t mmax, - size_t nthreads, py::object &map__, double sigma_min, double sigma_max) + size_t nthreads, py::object &map__, double sigma_min, double sigma_max, const string &mode_) { + auto mode = get_mode(mode_); auto alm = to_cmav,2>(alm_); auto loc = to_cmav(loc_); MR_assert(loc.shape(1)==2, "last dimension of loc must have size 2"); - size_t ncomp = (spin==0) ? 1 : 2; - MR_assert(alm.shape(0)==ncomp, "number of components mismatch in alm"); - auto map_ = get_optional_Pyarr(map__, {alm.shape(0), loc.shape(0)}); + MR_assert(alm.shape(0)==get_nalm(spin,mode), "number of components mismatch in alm"); + auto map_ = get_optional_Pyarr(map__, {get_nmaps(spin,mode), loc.shape(0)}); auto map = to_vmav(map_); { py::gil_scoped_release release; - synthesis_general(alm, map, spin, lmax, mmax, loc, epsilon, sigma_min, sigma_max, nthreads); + synthesis_general(alm, map, spin, lmax, mmax, loc, epsilon, sigma_min, sigma_max, nthreads, mode); } return map_; } py::array Py_synthesis_general(const py::array &alm, size_t spin, size_t lmax, const py::array &loc, double epsilon, const py::object &mmax_, - size_t nthreads, py::object &map, double sigma_min, double sigma_max) + size_t nthreads, py::object &map, double sigma_min, double sigma_max, const string &mode) { size_t mmax = mmax_.is_none() ? lmax : mmax_.cast(); if (isPyarr(loc)) { if (isPyarr>(alm)) - return Py2_synthesis_general(alm, spin, lmax, loc, epsilon, mmax, nthreads, map, sigma_min, sigma_max); + return Py2_synthesis_general(alm, spin, lmax, loc, epsilon, mmax, nthreads, map, sigma_min, sigma_max, mode); else if (isPyarr>(alm)) - return Py2_synthesis_general(alm, spin, lmax, loc, epsilon, mmax, nthreads, map, sigma_min, sigma_max); + return Py2_synthesis_general(alm, spin, lmax, loc, epsilon, mmax, nthreads, map, sigma_min, sigma_max, mode); } #if 0 else if (isPyarr(loc)) @@ -823,33 +746,33 @@ py::array Py_synthesis_general(const py::array &alm, size_t spin, size_t lmax, template py::array Py2_adjoint_synthesis_general(const py::array &map_, size_t spin, size_t lmax, const py::array &loc_, double epsilon, size_t mmax, - size_t nthreads, py::object &alm__, double sigma_min, double sigma_max) + size_t nthreads, py::object &alm__, double sigma_min, double sigma_max, const string &mode_) { + auto mode = get_mode(mode_); auto map = to_cmav(map_); auto loc = to_cmav(loc_); MR_assert(loc.shape(1)==2, "last dimension of loc must have size 2"); - size_t ncomp = (spin==0) ? 1 : 2; - MR_assert(map.shape(0)==ncomp, "number of components mismatch in map"); - auto alm_ = check_build_alm(alm__, map.shape(0), lmax, mmax); + MR_assert(map.shape(0)==get_nmaps(spin,mode), "number of components mismatch in map"); + auto alm_ = check_build_alm(alm__, get_nalm(spin,mode), lmax, mmax); auto alm = to_vmav,2>(alm_); { py::gil_scoped_release release; - adjoint_synthesis_general(alm, map, spin, lmax, mmax, loc, epsilon, sigma_min, sigma_max, nthreads); + adjoint_synthesis_general(alm, map, spin, lmax, mmax, loc, epsilon, sigma_min, sigma_max, nthreads, mode); } return alm_; } py::array Py_adjoint_synthesis_general(const py::array &map, size_t spin, size_t lmax, const py::array &loc, double epsilon, const py::object &mmax_, - size_t nthreads, py::object &alm, double sigma_min, double sigma_max) + size_t nthreads, py::object &alm, double sigma_min, double sigma_max, const string &mode) { size_t mmax = mmax_.is_none() ? lmax : mmax_.cast(); if (isPyarr(loc)) { if (isPyarr(map)) - return Py2_adjoint_synthesis_general(map, spin, lmax, loc, epsilon, mmax, nthreads, alm, sigma_min, sigma_max); + return Py2_adjoint_synthesis_general(map, spin, lmax, loc, epsilon, mmax, nthreads, alm, sigma_min, sigma_max, mode); else if (isPyarr(map)) - return Py2_adjoint_synthesis_general(map, spin, lmax, loc, epsilon, mmax, nthreads, alm, sigma_min, sigma_max); + return Py2_adjoint_synthesis_general(map, spin, lmax, loc, epsilon, mmax, nthreads, alm, sigma_min, sigma_max, mode); } #if 0 else if (isPyarr(loc)) @@ -1019,13 +942,13 @@ template class Py_sharpjob ringstart(rs) = size_t(base.Npix() - startpix - ringpix); } vmav mr(map.data(), {1, size_t(npix_)}, {0, map.stride(0)}); - synthesis(ar, mr, 0, lmax_, mstart, 1, theta, nphi, phi0, ringstart, 1, nthreads); + synthesis(ar, mr, 0, lmax_, mstart, 1, theta, nphi, phi0, ringstart, 1, nthreads, STANDARD); } else { vmav mr(map.data(), {1, size_t(ntheta_), size_t(nphi_)}, {0, ptrdiff_t(map.stride(0)*nphi_), map.stride(0)}); - synthesis_2d(ar, mr, 0, lmax_, mmax_, geom, nthreads); + synthesis_2d(ar, mr, 0, lmax_, mmax_, geom, nthreads, STANDARD); } return map_; } @@ -1059,13 +982,13 @@ template class Py_sharpjob ringstart(rs) = size_t(base.Npix() - startpix - ringpix); } cmav mr(map.data(), {1, npix_}, {0, map.stride(0)}); - adjoint_synthesis(ar, mr, 0, lmax_, mstart, 1, theta, nphi, phi0, ringstart, 1, nthreads); + adjoint_synthesis(ar, mr, 0, lmax_, mstart, 1, theta, nphi, phi0, ringstart, 1, nthreads,STANDARD); } else { cmav mr(map.data(), {1, size_t(ntheta_), size_t(nphi_)}, {0, ptrdiff_t(map.stride(0)*nphi_), map.stride(0)}); - adjoint_synthesis_2d(ar, mr, 0, lmax_, mmax_, geom, nthreads); + adjoint_synthesis_2d(ar, mr, 0, lmax_, mmax_, geom, nthreads, STANDARD); } return alm_; } @@ -1111,13 +1034,13 @@ template class Py_sharpjob ringstart(r) = size_t(startpix); ringstart(rs) = size_t(base.Npix() - startpix - ringpix); } - synthesis(alm, map, spin, lmax_, mstart, 1, theta, nphi, phi0, ringstart, 1, nthreads); + synthesis(alm, map, spin, lmax_, mstart, 1, theta, nphi, phi0, ringstart, 1, nthreads, STANDARD); } else { vmav mr(map.data(), {2, ntheta_, nphi_}, {map.stride(0), ptrdiff_t(map.stride(1)*nphi_), map.stride(1)}); - synthesis_2d(alm, mr, spin, lmax_, mmax_, geom, nthreads); + synthesis_2d(alm, mr, spin, lmax_, mmax_, geom, nthreads, STANDARD); } return map_; } @@ -1185,17 +1108,15 @@ dependent on theta and m. Parameters ---------- -alm: numpy.ndarray((ncomp, x), dtype=numpy.complex64 or numpy.complex128) +alm: numpy.ndarray((nalm, x), dtype=numpy.complex64 or numpy.complex128) the set of spherical harmonic coefficients. - ncomp must be 1 if spin is 0, else 2. The second dimension must be large enough to accommodate all entries, which are stored according to the parameters `lmax`, 'mval`, `mstart`, and `lstride`. -leg: None or numpy.ndarray((ncomp, ntheta, nm), same dtype as `alm`) +leg: None or numpy.ndarray((nleg, ntheta, nm), same dtype as `alm`) output array containing the Legendre coefficients if `None`, a new suitable array is allocated spin: int >= 0 the spin to use for the transform - if spin==0, ncomp must be 1, otherwise 2 lmax: int >= 0 the maximum l moment of the transform (inclusive) mval: numpy.ndarray((nm,), dtype = numpy.uint64) @@ -1212,11 +1133,22 @@ theta: numpy.ndarray((ntheta,), dtype=numpy.float64) nthreads: int >= 0 the number of threads to use for the computation if 0, use as many threads as there are hardware threads available on the system +mode: str + the transform mode + | "STANDARD": standard transform + | "GRAD_ONLY": only valid for spin>0, curl a_lm are assumed to be zero + | "DERIV1": same as "GRAD_ONLY", but spin is assumed to be 1 and a_lm are + muliplied by sqrt(l*(l+1)) Returns ------- -numpy.ndarray((ncomp, ntheta, nm), same dtype as `alm`) +numpy.ndarray((nleg, ntheta, nm), same dtype as `alm`) the Legendre coefficients. If `leg` was supplied, this will be the same object. + +Notes +----- +nleg = 1 if spin == 0 else 2 +nalm = 1 if spin == 0 else (2 if mode == "STANDARD" else 1) )"""; constexpr const char *alm2leg_deriv1_DS = R"""( @@ -1263,16 +1195,14 @@ Transforms a set of Legendre coefficients to spherical harmonic coefficients Parameters ---------- -leg: numpy.ndarray((ncomp, ntheta, nm), dtype=numpy.complex64 or numpy.complex128) - ncomp must be 1 if spin is 0, else 2 -alm: None or numpy.ndarray((ncomp, x), same dtype as `leg`) +leg: numpy.ndarray((nleg, ntheta, nm), dtype=numpy.complex64 or numpy.complex128) +alm: None or numpy.ndarray((nalm, x), same dtype as `leg`) the set of spherical harmonic coefficients. The second dimension must be large enough to accommodate all entries, which are stored according to the parameters `lmax`, 'mval`, `mstart`, and `lstride`. if `None`, a new suitable array is allocated spin: int >= 0 the spin to use for the transform - if spin==0, ncomp must be 1, otherwise 2 lmax: int >= 0 the maximum l moment of the transform (inclusive) mval: numpy.ndarray((nm,), dtype = numpy.uint64) @@ -1289,13 +1219,22 @@ theta: numpy.ndarray((ntheta,), dtype=numpy.float64) nthreads: int >= 0 the number of threads to use for the computation if 0, use as many threads as there are hardware threads available on the system +mode: str + the transform mode + | "STANDARD": standard transform + | "GRAD_ONLY": only valid for spin>0, curl a_lm are not computed Returns ------- -numpy.ndarray((ncomp, *), same dtype as `leg`) +numpy.ndarray((nalm, *), same dtype as `leg`) the Legendre coefficients. if `alm` was supplied, this will be the same object If newly allocated, the smallest possible second dimensions will be chosen. + +Notes +----- +nleg = 1 if spin == 0 else 2 +nalm = 1 if spin == 0 else (2 if mode == "STANDARD" else 1) )"""; constexpr const char *map2leg_DS = R"""( @@ -1388,11 +1327,11 @@ Transforms one or two sets of spherical harmonic coefficients to 2D maps. Parameters ---------- -alm: numpy.ndarray((ncomp, x), dtype=numpy.complex64 or numpy.complex128) +alm: numpy.ndarray((nalm, x), dtype=numpy.complex64 or numpy.complex128) the set of spherical harmonic coefficients. The second dimension must be large enough to accommodate all entries, which are stored according to the healpy convention. -map: numpy.ndarray((ncomp, ntheta, nphi), dtype=numpy.float of same accuracy as alm) +map: numpy.ndarray((nmaps, ntheta, nphi), dtype=numpy.float of same accuracy as alm) storage for the output map. If not supplied, a new array is allocated. ntheta, nphi: int > 0 @@ -1401,7 +1340,6 @@ ntheta, nphi: int > 0 If supplied, and `map` is also supplied, must match with the array dimensions spin: int >= 0 the spin to use for the transform. - If spin==0, ncomp must be 1, otherwise 2 lmax: int >= 0 the maximum l moment of the transform (inclusive). mmax: int >= 0 and <= lmax @@ -1424,12 +1362,23 @@ geometry: one of "CC", "F1", "MW", "MWflip", "GL", "DH", "F2" nthreads: int >= 0 the number of threads to use for the computation. If 0, use as many threads as there are hardware threads available on the system +mode: str + the transform mode + | "STANDARD": standard transform + | "GRAD_ONLY": only valid for spin>0, curl a_lm are assumed to be zero + | "DERIV1": same as "GRAD_ONLY", but spin is assumed to be 1 and a_lm are + muliplied by sqrt(l*(l+1)) Returns ------- -numpy.ndarray((ncomp, ntheta, nphi), dtype=numpy.float of same accuracy as alm) +numpy.ndarray((nmaps, ntheta, nphi), dtype=numpy.float of same accuracy as alm) the computed map. If the map parameter was specified, this is identical with map. + +Notes +----- +nmaps = 1 if spin == 0 else 2 +nalm = 1 if spin == 0 else (2 if mode == "STANDARD" else 1) )"""; constexpr const char *synthesis_2d_deriv1_DS = R"""( @@ -1474,7 +1423,7 @@ nthreads: int >= 0 Returns ------- -numpy.ndarray((ncomp, ntheta, nphi), dtype=numpy.float of same accuracy as alm) +numpy.ndarray((2, ntheta, nphi), dtype=numpy.float of same accuracy as alm) the maps containing the derivatives with respect to theta and phi. If the map parameter was specified, this is identical with map. )"""; @@ -1485,16 +1434,15 @@ This is the adjoint operation of `synthesis_2D`. Parameters ---------- -alm: numpy.ndarray((ncomp, x), dtype=numpy.complex64 or numpy.complex128) +alm: numpy.ndarray((nalm, x), dtype=numpy.complex64 or numpy.complex128) storage for the spherical harmonic coefficients. The second dimension must be large enough to accommodate all entries, which are stored according to the healpy convention. If not supplied, a new array is allocated. -map: numpy.ndarray((ncomp, ntheta, nphi), dtype=numpy.float of same accuracy as alm) +map: numpy.ndarray((nmaps, ntheta, nphi), dtype=numpy.float of same accuracy as alm) The input map. spin: int >= 0 the spin to use for the transform. - If spin==0, ncomp must be 1, otherwise 2 lmax: int >= 0 the maximum l (and m) moment of the transform (inclusive) mmax: int >= 0 and <= lmax @@ -1517,12 +1465,21 @@ geometry: one of "CC", "F1", "MW", "MWflip", "GL", "DH", "F2" nthreads: int >= 0 the number of threads to use for the computation. If 0, use as many threads as there are hardware threads available on the system +mode: str + the transform mode + | "STANDARD": standard transform + | "GRAD_ONLY": only valid for spin>0, curl a_lm are not computed Returns ------- -numpy.ndarray((ncomp, x), dtype=numpy.complex64 or numpy.complex128) +numpy.ndarray((nalm, x), dtype=numpy.complex64 or numpy.complex128) the computed spherical harmonic coefficients If the `alm` parameter was specified, this is identical to `alm`. + +Notes +----- +nmaps = 1 if spin == 0 else 2 +nalm = 1 if spin == 0 else (2 if mode == "STANDARD" else 1) )"""; constexpr const char *analysis_2d_DS = R"""( @@ -1648,12 +1605,11 @@ Transforms (sets of) one or two sets of spherical harmonic coefficients to maps Parameters ---------- -alm: numpy.ndarray(([ntrans,] ncomp, x), dtype=numpy.complex64 or numpy.complex128) +alm: numpy.ndarray(([ntrans,] nalm, x), dtype=numpy.complex64 or numpy.complex128) the set(s) of spherical harmonic coefficients. - ncomp must be 1 if spin is 0, else 2. The last dimension must be large enough to accommodate all entries, which are stored according to the parameters `lmax`, 'mmax`, `mstart`, and `lstride`. -map: None or numpy.ndarray(([ntrans,] ncomp, x), dtype=numpy.float of same accuracy as `alm` +map: None or numpy.ndarray(([ntrans,] nmaps, x), dtype=numpy.float of same accuracy as `alm` the map pixel data. The last dimension must be large enough to accommodate all pixels, which are stored according to the parameters `nphi`, 'ringstart`, and `pixstride`. @@ -1682,18 +1638,28 @@ nthreads: int >= 0 if 0, use as many threads as there are hardware threads available on the system spin: int >= 0 the spin to use for the transform. - If spin==0, ncomp must be 1, otherwise 2 lmax: int >= 0 the maximum l moment of the transform (inclusive). mmax: int >= 0 <= lmax the maximum m moment of the transform (inclusive). +mode: str + the transform mode + | "STANDARD": standard transform + | "GRAD_ONLY": only valid for spin>0, curl a_lm are assumed to be zero + | "DERIV1": same as "GRAD_ONLY", but spin is assumed to be 1 and a_lm are + muliplied by sqrt(l*(l+1)) Returns ------- -numpy.ndarray(([ntrans,] ncomp, x), dtype=numpy.float of same accuracy as `alm`) +numpy.ndarray(([ntrans,] nmaps, x), dtype=numpy.float of same accuracy as `alm`) the map pixel data. If `map` was supplied, this will be the same object If newly allocated, the smallest possible last dimension will be chosen. + +Notes +----- +nmaps = 1 if spin == 0 else 2 +nalm = 1 if spin == 0 else (2 if mode == "STANDARD" else 1) )"""; constexpr const char *adjoint_synthesis_DS = R"""( @@ -1702,12 +1668,12 @@ This is the adjoint operation of `synthesis`. Parameters ---------- -alm: None or numpy.ndarray(([ntrans,] ncomp, x), dtype=numpy.complex of same precision as `map`) +alm: None or numpy.ndarray(([ntrans,] nalm, x), dtype=numpy.complex of same precision as `map`) the set of spherical harmonic coefficients. The last dimension must be large enough to accommodate all entries, which are stored according to the parameters `lmax`, 'mmax`, `mstart`, and `lstride`. if `None`, a new suitable array is allocated -map: numpy.ndarray(([ntrans,] ncomp, x), dtype=numpy.float32 or numpy.float64 +map: numpy.ndarray(([ntrans,] nmaps, x), dtype=numpy.float32 or numpy.float64 The last dimension must be large enough to accommodate all pixels, which are stored according to the parameters `nphi`, `ringstart`, and `pixstride`. theta: numpy.ndarray((ntheta,), dtype=numpy.float64) @@ -1734,18 +1700,26 @@ nthreads: int >= 0 if 0, use as many threads as there are hardware threads available on the system spin: int >= 0 the spin to use for the transform. - If spin==0, ncomp must be 1, otherwise 2 lmax: int >= 0 the maximum l moment of the transform (inclusive). mmax: int >= 0 <= lmax the maximum m moment of the transform (inclusive). +mode: str + the transform mode + | "STANDARD": standard transform + | "GRAD_ONLY": only valid for spin>0, curl a_lm are not computed Returns ------- -numpy.ndarray(([ntrans,] ncomp, x), dtype=numpy.complex of same accuracy as `map`) +numpy.ndarray(([ntrans,] nalm, x), dtype=numpy.complex of same accuracy as `map`) the set(s) of spherical harmonic coefficients. If `alm` was supplied, this will be the same object If newly allocated, the smallest possible last dimension will be chosen. + +Notes +----- +nmaps = 1 if spin == 0 else 2 +nalm = 1 if spin == 0 else (2 if mode == "STANDARD" else 1) )"""; constexpr const char *pseudo_analysis_DS = R"""( @@ -1875,13 +1849,12 @@ Evaluate a_lm at arbitrary positions on the sphere Parameters ---------- -alm: numpy.ndarray((ncomp, x), dtype=numpy.complex64 or numpy.complex128) +alm: numpy.ndarray((nalm, x), dtype=numpy.complex64 or numpy.complex128) the set(s) of spherical harmonic coefficients. The last dimension must be large enough to accommodate all entries, which are stored according to the healpy convention. spin: int >= 0 the spin to use for the transform. - If spin==0, ncomp must be 1, otherwise 2 lmax: int >= 0 the maximum l moment of the transform (inclusive). mmax: int >= 0 and <= lmax @@ -1898,29 +1871,39 @@ epsilon : float nthreads: int >= 0 the number of threads to use for the computation if 0, use as many threads as there are hardware threads available on the system -map: None or numpy.ndarray((ncomp, npix), dtype=numpy.float of same accuracy as `alm` +map: None or numpy.ndarray((nmaps, npix), dtype=numpy.float of same accuracy as `alm` the map pixel data. If `None`, a new suitable array is allocated. sigma_min, sigma_max: float minimum and maximum allowed oversampling factors for the NUFFT component 1.2 <= sigma_min < sigma_max <= 2.5 +mode: str + the transform mode + | "STANDARD": standard transform + | "GRAD_ONLY": only valid for spin>0, curl a_lm are assumed to be zero + | "DERIV1": same as "GRAD_ONLY", but spin is assumed to be 1 and a_lm are + muliplied by sqrt(l*(l+1)) Returns ------- -numpy.ndarray((ncomp, npix), dtype=numpy.float of same accuracy as `alm` +numpy.ndarray((nmaps, npix), dtype=numpy.float of same accuracy as `alm` the pixel values at the locations specified by `loc`. If the map parameter was specified, this is identical with map. + +Notes +----- +nmaps = 1 if spin == 0 else 2 +nalm = 1 if spin == 0 else (2 if mode == "STANDARD" else 1) )"""; constexpr const char *adjoint_synthesis_general_DS = R"""( This is the adjoint operation of `synthesis_general`. Parameters ---------- -map: numpy.ndarray((ncomp, npix), dtype=numpy.float32 or numpy.float64 +map: numpy.ndarray((nmaps, npix), dtype=numpy.float32 or numpy.float64 The pixel values at the locations specified by `loc`. spin: int >= 0 the spin to use for the transform. - If spin==0, ncomp must be 1, otherwise 2 lmax: int >= 0 the maximum l moment of the transform (inclusive). mmax: int >= 0 and <= lmax @@ -1937,7 +1920,7 @@ epsilon : float nthreads: int >= 0 the number of threads to use for the computation if 0, use as many threads as there are hardware threads available on the system -alm: None or numpy.ndarray((ncomp, x), dtype=complex, same accuracy as `map`) +alm: None or numpy.ndarray((nalm, x), dtype=complex, same accuracy as `map`) the set(s) of spherical harmonic coefficients. The last dimension must be large enough to accommodate all entries, which are stored according to the healpy convention. @@ -1945,12 +1928,21 @@ alm: None or numpy.ndarray((ncomp, x), dtype=complex, same accuracy as `map`) sigma_min, sigma_max: float minimum and maximum allowed oversampling factors for the NUFFT component 1.2 <= sigma_min < sigma_max <= 2.5 +mode: str + the transform mode + | "STANDARD": standard transform + | "GRAD_ONLY": only valid for spin>0, curl a_lm are not computed Returns ------- -numpy.ndarray((ncomp, x), dtype=complex, same accuracy as `map`) +numpy.ndarray((nalm, x), dtype=complex, same accuracy as `map`) the computed spherical harmonic coefficients If the `alm` parameter was specified, this is identical to `alm`. + +Notes +----- +nmaps = 1 if spin == 0 else 2 +nalm = 1 if spin == 0 else (2 if mode == "STANDARD" else 1) )"""; constexpr const char *pseudo_analysis_general_DS = R"""( @@ -2033,10 +2025,10 @@ void add_sht(py::module_ &msup) m2.def("synthesis", &Py_synthesis, synthesis_DS, py::kw_only(), "alm"_a, "theta"_a, "lmax"_a, "mstart"_a=None, "nphi"_a, "phi0"_a, "ringstart"_a, "spin"_a, - "lstride"_a=1, "pixstride"_a=1, "nthreads"_a=1, "map"_a=None, "mmax"_a=None); + "lstride"_a=1, "pixstride"_a=1, "nthreads"_a=1, "map"_a=None, "mmax"_a=None, "mode"_a="STANDARD"); m2.def("adjoint_synthesis", &Py_adjoint_synthesis, adjoint_synthesis_DS, py::kw_only(), "map"_a, "theta"_a, "lmax"_a, "mstart"_a=None, "nphi"_a, "phi0"_a, "ringstart"_a, "spin"_a, - "lstride"_a=1, "pixstride"_a=1, "nthreads"_a=1, "alm"_a=None, "mmax"_a=None); + "lstride"_a=1, "pixstride"_a=1, "nthreads"_a=1, "alm"_a=None, "mmax"_a=None, "mode"_a="STANDARD"); m2.def("pseudo_analysis", &Py_pseudo_analysis, pseudo_analysis_DS, py::kw_only(), "map"_a, "theta"_a, "lmax"_a, "mstart"_a=None, "nphi"_a, "phi0"_a, "ringstart"_a, "spin"_a, "lstride"_a=1, "pixstride"_a=1, "nthreads"_a=1, "alm"_a=None, "maxiter"_a, "epsilon"_a, "mmax"_a=None); @@ -2044,23 +2036,23 @@ void add_sht(py::module_ &msup) "lmax"_a, "mstart"_a=None, "nphi"_a, "phi0"_a, "ringstart"_a, "lstride"_a=1, "pixstride"_a=1, "nthreads"_a=1, "map"_a=None, "mmax"_a=None); - m2.def("synthesis_2d", &Py_synthesis_2d, synthesis_2d_DS, py::kw_only(), "alm"_a, "spin"_a, "lmax"_a, "geometry"_a, "ntheta"_a=None, "nphi"_a=None, "mmax"_a=None, "nthreads"_a=1, "map"_a=None); - m2.def("adjoint_synthesis_2d", &Py_adjoint_synthesis_2d, adjoint_synthesis_2d_DS, py::kw_only(), "map"_a, "spin"_a, "lmax"_a, "geometry"_a, "mmax"_a=None, "nthreads"_a=1, "alm"_a=None); + m2.def("synthesis_2d", &Py_synthesis_2d, synthesis_2d_DS, py::kw_only(), "alm"_a, "spin"_a, "lmax"_a, "geometry"_a, "ntheta"_a=None, "nphi"_a=None, "mmax"_a=None, "nthreads"_a=1, "map"_a=None, "mode"_a="STANDARD"); + m2.def("adjoint_synthesis_2d", &Py_adjoint_synthesis_2d, adjoint_synthesis_2d_DS, py::kw_only(), "map"_a, "spin"_a, "lmax"_a, "geometry"_a, "mmax"_a=None, "nthreads"_a=1, "alm"_a=None, "mode"_a="STANDARD"); m2.def("synthesis_2d_deriv1", &Py_synthesis_2d_deriv1, synthesis_2d_deriv1_DS, py::kw_only(), "alm"_a, "lmax"_a, "geometry"_a, "ntheta"_a=None, "nphi"_a=None, "mmax"_a=None, "nthreads"_a=1, "map"_a=None); m2.def("analysis_2d", &Py_analysis_2d, analysis_2d_DS, py::kw_only(), "map"_a, "spin"_a, "lmax"_a, "geometry"_a, "mmax"_a=None, "nthreads"_a=1, "alm"_a=None); m2.def("adjoint_analysis_2d", &Py_adjoint_analysis_2d, adjoint_analysis_2d_DS, py::kw_only(), "alm"_a, "spin"_a, "lmax"_a, "geometry"_a, "ntheta"_a=None, "nphi"_a=None, "mmax"_a=None, "nthreads"_a=1, "map"_a=None); // FIXME: maybe add mstart, lstride - m2.def("synthesis_general", &Py_synthesis_general, synthesis_general_DS, py::kw_only(), "alm"_a, "spin"_a, "lmax"_a, "loc"_a, "epsilon"_a=1e-5, "mmax"_a=None, "nthreads"_a=1, "map"_a=None, "sigma_min"_a=1.1, "sigma_max"_a=2.6); - m2.def("adjoint_synthesis_general", &Py_adjoint_synthesis_general, adjoint_synthesis_general_DS, py::kw_only(), "map"_a, "spin"_a, "lmax"_a, "loc"_a, "epsilon"_a=1e-5, "mmax"_a=None, "nthreads"_a=1, "alm"_a=None, "sigma_min"_a=1.1, "sigma_max"_a=2.6); + m2.def("synthesis_general", &Py_synthesis_general, synthesis_general_DS, py::kw_only(), "alm"_a, "spin"_a, "lmax"_a, "loc"_a, "epsilon"_a=1e-5, "mmax"_a=None, "nthreads"_a=1, "map"_a=None, "sigma_min"_a=1.1, "sigma_max"_a=2.6, "mode"_a="STANDARD"); + m2.def("adjoint_synthesis_general", &Py_adjoint_synthesis_general, adjoint_synthesis_general_DS, py::kw_only(), "map"_a, "spin"_a, "lmax"_a, "loc"_a, "epsilon"_a=1e-5, "mmax"_a=None, "nthreads"_a=1, "alm"_a=None, "sigma_min"_a=1.1, "sigma_max"_a=2.6, "mode"_a="STANDARD"); m2.def("pseudo_analysis_general", &Py_pseudo_analysis_general, pseudo_analysis_general_DS, py::kw_only(), "lmax"_a, "map"_a, "loc"_a, "spin"_a, "nthreads"_a, "maxiter"_a, "epsilon"_a=1e-5, "sigma_min"_a=1.1, "sigma_max"_a=2.6, "mmax"_a=None, "alm"_a=None); m2.def("GL_weights",&Py_GL_weights, "nlat"_a, "nlon"_a); m2.def("GL_thetas",&Py_GL_thetas, "nlat"_a); m2.def("get_gridweights", &Py_get_gridweights, "type"_a, "ntheta"_a); - m2.def("alm2leg", &Py_alm2leg, alm2leg_DS, py::kw_only(), "alm"_a, "lmax"_a, "theta"_a, "spin"_a=0, "mval"_a=None, "mstart"_a=None, "lstride"_a=1, "nthreads"_a=1, "leg"_a=None); + m2.def("alm2leg", &Py_alm2leg, alm2leg_DS, py::kw_only(), "alm"_a, "lmax"_a, "theta"_a, "spin"_a=0, "mval"_a=None, "mstart"_a=None, "lstride"_a=1, "nthreads"_a=1, "leg"_a=None, "mode"_a="STANDARD"); m2.def("alm2leg_deriv1", &Py_alm2leg_deriv1, alm2leg_deriv1_DS, py::kw_only(), "alm"_a, "lmax"_a, "theta"_a, "mval"_a=None, "mstart"_a=None, "lstride"_a=1, "nthreads"_a=1, "leg"_a=None); - m2.def("leg2alm", &Py_leg2alm, leg2alm_DS, py::kw_only(), "leg"_a, "lmax"_a, "theta"_a, "spin"_a=0, "mval"_a=None, "mstart"_a=None, "lstride"_a=1, "nthreads"_a=1, "alm"_a=None); + m2.def("leg2alm", &Py_leg2alm, leg2alm_DS, py::kw_only(), "leg"_a, "lmax"_a, "theta"_a, "spin"_a=0, "mval"_a=None, "mstart"_a=None, "lstride"_a=1, "nthreads"_a=1, "alm"_a=None, "mode"_a="STANDARD"); m2.def("map2leg", &Py_map2leg, map2leg_DS, py::kw_only(), "map"_a, "nphi"_a, "phi0"_a, "ringstart"_a, "mmax"_a, "pixstride"_a=1, "nthreads"_a=1, "leg"_a=None); m2.def("leg2map", &Py_leg2map, leg2map_DS, py::kw_only(), "leg"_a, "nphi"_a, "phi0"_a, "ringstart"_a, "pixstride"_a=1, "nthreads"_a=1, "map"_a=None); m.def("rotate_alm", &Py_rotate_alm, rotate_alm_DS, "alm"_a, "lmax"_a, "psi"_a, "theta"_a, diff --git a/python/test/test_sht.py b/python/test/test_sht.py index 4202bcdf..c3c8c244 100644 --- a/python/test/test_sht.py +++ b/python/test/test_sht.py @@ -131,6 +131,20 @@ def test_2d_adjoint(lmmax, geometry, spin, nthreads): v1 = np.sum([myalmdot(alm0[i], alm1[i], lmax) for i in range(ncomp)]) assert_(np.abs((v1-v2)/v1)<1e-10) + if spin > 0: + # test adjointness between synthesis and adjoint_synthesis (gradient only) + map1 = ducc0.sht.experimental.synthesis_2d(alm=alm0[:1], lmax=lmax, mmax=mmax, spin=spin, ntheta=nrings, nphi=nphi, nthreads=nthreads, geometry=geometry, mode="GRAD_ONLY") + v2 = np.sum([ducc0.misc.vdot(map0[i], map1[i]) for i in range(ncomp)]) + # compare grad-only alm2map with full alm2map where curl is set to zero + almx = alm0.copy() + almx[1] = 0 + map1b = ducc0.sht.experimental.synthesis_2d(alm=almx, lmax=lmax, mmax=mmax, spin=spin, ntheta=nrings, nphi=nphi, nthreads=nthreads, geometry=geometry) + assert_(ducc0.misc.l2error(map1, map1b)<1e-10) + del almx, map1 + alm1 = ducc0.sht.experimental.adjoint_synthesis_2d(lmax=lmax, mmax=mmax, spin=spin, map=map0, nthreads=nthreads, geometry=geometry, mode="GRAD_ONLY") + v1 = np.sum([myalmdot(alm0[i], alm1[i], lmax) for i in range(1)]) + assert_(np.abs((v1-v2)/v1)<1e-10) + # test adjointness between analysis and adjoint_analysis # naive version @@ -189,6 +203,14 @@ def test_healpix_adjoint(lmax, nside, spin, mmaxhalf, nthreads): v1 = np.sum([myalmdot(alm0[i], alm1[i], lmax) for i in range(ncomp)]) assert_(np.abs((v1-v2)/v1)<1e-10) + if spin > 0: + map1 = ducc0.sht.experimental.synthesis(alm=alm0[:1], lmax=lmax, spin=spin, nthreads=nthreads, mmax=mmax, mode="GRAD_ONLY", **geom) + v2 = np.sum([ducc0.misc.vdot(map0[i], map1[i]) for i in range(ncomp)]) + del map1 + alm1 = ducc0.sht.experimental.adjoint_synthesis(lmax=lmax, spin=spin, map=map0, nthreads=nthreads, mmax=mmax, mode="GRAD_ONLY", **geom) + v1 = np.sum([myalmdot(alm0[i], alm1[i], lmax) for i in range(1)]) + assert_(np.abs((v1-v2)/v1)<1e-10) + @pmp("lmax", tuple(range(0,70,3))) @pmp("nthreads", (0,1,2)) @@ -243,15 +265,21 @@ def test_adjointness_general(lmmax, npix, spin, nthreads): loc = rng.uniform(0., 1., (npix,2)) loc[:, 0] *= np.pi loc[:, 1] *= 2*np.pi - points1 = ducc0.sht.experimental.synthesis_general(lmax=lmax, mmax=mmax, alm=slm1, loc=loc, spin=spin, epsilon=epsilon, nthreads=nthreads) points2 = rng.uniform(-0.5, 0.5, (loc.shape[0],ncomp)).T + points1 = ducc0.sht.experimental.synthesis_general(lmax=lmax, mmax=mmax, alm=slm1, loc=loc, spin=spin, epsilon=epsilon, nthreads=nthreads) slm2 = ducc0.sht.experimental.adjoint_synthesis_general(lmax=lmax, mmax=mmax, map=points2, loc=loc, spin=spin, epsilon=epsilon, nthreads=nthreads) - print(slm1.shape, slm2.shape) v1 = np.sum([myalmdot(slm1[c, :], slm2[c, :], lmax) for c in range(ncomp)]) v2 = ducc0.misc.vdot(points2.real, points1.real) + ducc0.misc.vdot(points2.imag, points1.imag) assert_allclose(v1, v2, rtol=1e-9) + if spin > 0: + points1 = ducc0.sht.experimental.synthesis_general(lmax=lmax, mmax=mmax, alm=slm1[:1], loc=loc, spin=spin, epsilon=epsilon, nthreads=nthreads, mode="GRAD_ONLY") + slm2 = ducc0.sht.experimental.adjoint_synthesis_general(lmax=lmax, mmax=mmax, map=points2, loc=loc, spin=spin, epsilon=epsilon, nthreads=nthreads, mode="GRAD_ONLY") + v1 = np.sum([myalmdot(slm1[c, :], slm2[c, :], lmax) + for c in range(1)]) + v2 = ducc0.misc.vdot(points2.real, points1.real) + ducc0.misc.vdot(points2.imag, points1.imag) + assert_allclose(v1, v2, rtol=1e-9) @pmp('spin', (0, 1, 2)) @pmp('nthreads', (1, 4)) diff --git a/src/ducc0/sht/sht.cc b/src/ducc0/sht/sht.cc index 110d7b94..2d7222af 100644 --- a/src/ducc0/sht/sht.cc +++ b/src/ducc0/sht/sht.cc @@ -1133,7 +1133,7 @@ DUCC0_NOINLINE static void calc_map2alm_spin (dcmplx * DUCC0_RESTRICT alm, } -DUCC0_NOINLINE static void alm2map_deriv1_kernel(sxdata_v & DUCC0_RESTRICT d, +DUCC0_NOINLINE static void alm2map_spin_gradonly_kernel(sxdata_v & DUCC0_RESTRICT d, const vector &fx, const dcmplx * DUCC0_RESTRICT alm, size_t l, size_t lmax, size_t nv2) { @@ -1177,7 +1177,7 @@ DUCC0_NOINLINE static void alm2map_deriv1_kernel(sxdata_v & DUCC0_RESTRICT d, } } -DUCC0_NOINLINE static void calc_alm2map_deriv1(const dcmplx * DUCC0_RESTRICT alm, +DUCC0_NOINLINE static void calc_alm2map_spin_gradonly(const dcmplx * DUCC0_RESTRICT alm, const Ylmgen &gen, sxdata_v & DUCC0_RESTRICT d, size_t nth) { size_t l,lmax=gen.lmax; @@ -1240,7 +1240,7 @@ DUCC0_NOINLINE static void calc_alm2map_deriv1(const dcmplx * DUCC0_RESTRICT alm d.l1m[i] *= d.cfm[i]; d.l2m[i] *= d.cfm[i]; } - alm2map_deriv1_kernel(d, fx, alm, l, lmax, nv2); + alm2map_spin_gradonly_kernel(d, fx, alm, l, lmax, nv2); for (size_t i=0; i &fx, dcmplx * DUCC0_RESTRICT alm, + size_t l, size_t lmax, size_t nv2) + { + size_t lsave=l; + while (l<=lmax) + { + Tv fx10=fx[l+1].a,fx11=fx[l+1].b; + Tv fx20=fx[l+2].a,fx21=fx[l+2].b; + Tv agr1=0, agi1=0; + Tv agr2=0, agi2=0; + for (size_t i=0; ilmax) return; + + const auto &fx = gen.coef; + bool full_ieee=true; + for (size_t i=0; i=0) && + all_of(d.scm[i]>=0); + } + for (size_t i=0; i=0); + if (rescale(d.l1m[i], d.l2m[i], d.scm[i], sharp_ftol)) + getCorfac(d.scm[i], d.cfm[i]); + full_ieee &= all_of(d.scm[i]>=0); + } + vhsum_cmplx_special (agr1,agi1,agr2,agi2,&alm[l]); + l+=2; + } + if (l>lmax) return; + + for (size_t i=0; i DUCC0_NOINLINE static void inner_loop_a2m(SHT_mode mode, vmav,2> &almtmp, @@ -1374,9 +1485,10 @@ template DUCC0_NOINLINE static void inner_loop_a2m(SHT_mode mode, for (size_t i=0; i DUCC0_NOINLINE static void inner_loop_a2m(SHT_mode mode, } } -template DUCC0_NOINLINE static void inner_loop_m2a( +template DUCC0_NOINLINE static void inner_loop_m2a(SHT_mode mode, vmav,2> &almtmp, const cmav,3> &phase, const vector &rdata, Ylmgen &gen, size_t mi) @@ -1508,15 +1620,16 @@ template DUCC0_NOINLINE static void inner_loop_m2a( d.s.p1pr[i]=d.s.p1pi[i]=d.s.p2pr[i]=d.s.p2pi[i]=0.; d.s.p1mr[i]=d.s.p1mi[i]=d.s.p2mr[i]=d.s.p2mi[i]=0.; } - calc_map2alm_spin(almtmp.data(), gen, d.v, nth); + if (mode==STANDARD) + calc_map2alm_spin(almtmp.data(), gen, d.v, nth); + else + calc_map2alm_spin_gradonly(almtmp.data(), gen, d.v, nth); } } //adjust the a_lm for the new algorithm for (size_t l=gen.mhi; l<=gen.lmax; ++l) - { - almtmp(l,0)*=gen.alpha[l]; - almtmp(l,1)*=gen.alpha[l]; - } + for (size_t i=0; i void alm2leg( // associated Legendre transform MR_assert(nm==leg.shape(2), "nm mismatch"); auto nalm=alm.shape(0); auto mmax = get_mmax(mval, lmax); - if (mode==ALM2MAP_DERIV1) + if (mode==DERIV1) { spin=1; MR_assert(nalm==1, "need one a_lm component"); MR_assert(leg.shape(0)==2, "need two Legendre components"); } + else if (mode==GRAD_ONLY) + { + MR_assert(spin>0, "spin must be positive for grad-only SHTs"); + MR_assert(nalm==1, "need one a_lm component"); + MR_assert(leg.shape(0)==2, "need two Legendre components"); + } else { size_t ncomp = (spin==0) ? 1 : 2; @@ -1838,8 +1957,8 @@ template void alm2leg( // associated Legendre transform return; } - auto norm_l = (mode==ALM2MAP_DERIV1) ? Ylmgen::get_d1norm (lmax) : - Ylmgen::get_norm (lmax, spin); + auto norm_l = (mode==DERIV1) ? Ylmgen::get_d1norm (lmax) : + Ylmgen::get_norm (lmax, spin); auto rdata = make_ringdata(theta, lmax, spin); YlmBase base(lmax, mmax, spin); @@ -1875,7 +1994,8 @@ template void leg2alm( // associated Legendre transform const cmav &mstart, // (nm) ptrdiff_t lstride, const cmav &theta, // (nrings) - size_t nthreads) + size_t nthreads, + SHT_mode mode) { // sanity checks auto nrings=theta.shape(0); @@ -1884,9 +2004,21 @@ template void leg2alm( // associated Legendre transform MR_assert(nm==mstart.shape(0), "nm mismatch"); MR_assert(nm==leg.shape(2), "nm mismatch"); auto mmax = get_mmax(mval, lmax); - size_t ncomp = (spin==0) ? 1 : 2; - MR_assert(alm.shape(0)==ncomp, "incorrect number of a_lm components"); - MR_assert(leg.shape(0)==ncomp, "incorrect number of Legendre components"); + auto nalm = alm.shape(0); + if (mode==DERIV1) + MR_fail("DERIV1 mode not supported in map->alm direction"); + else if (mode==GRAD_ONLY) + { + MR_assert(spin>0, "spin must be positive for grad-only SHTs"); + MR_assert(nalm==1, "need one a_lm component"); + MR_assert(leg.shape(0)==2, "need two Legendre components"); + } + else + { + size_t ncomp = (spin==0) ? 1 : 2; + MR_assert(nalm==ncomp, "incorrect number of a_lm components"); + MR_assert(leg.shape(0)==ncomp, "incorrect number of Legendre components"); + } bool npi, spi; size_t ntheta_tmp; @@ -1897,7 +2029,7 @@ template void leg2alm( // associated Legendre transform theta_tmp(i) = i*pi/(ntheta_tmp-1); auto leg_tmp(vmav,3>::build_noncritical({leg.shape(0), ntheta_tmp, leg.shape(2)}, UNINITIALIZED)); resample_theta(leg, npi, spi, leg_tmp, true, true, spin, nthreads, true); - leg2alm(alm, leg_tmp, spin, lmax, mval, mstart, lstride, theta_tmp, nthreads); + leg2alm(alm, leg_tmp, spin, lmax, mval, mstart, lstride, theta_tmp, nthreads, mode); return; } @@ -1908,22 +2040,22 @@ template void leg2alm( // associated Legendre transform ducc0::execDynamic(nm, nthreads, 1, [&](ducc0::Scheduler &sched) { Ylmgen gen(base); - vmav,2> almtmp({lmax+2,ncomp}, UNINITIALIZED); + vmav,2> almtmp({lmax+2,nalm}, UNINITIALIZED); while (auto rng=sched.getNext()) for(auto mi=rng.lo; mi(almtmp(l,ialm)*norm_l[l]); } }); /* end of parallel region */ @@ -2329,13 +2461,19 @@ void sanity_checks( (nphi.shape(0)==nrings) && (ringstart.shape(0)==nrings), "inconsistency in the number of rings"); - size_t ncomp = 1+(spin>0); - if (mode==ALM2MAP_DERIV1) + if ((mode==DERIV1) || (mode==GRAD_ONLY)) + { + MR_assert(spin>0, "DERIV and GRAD_ONLY modes require spin>0"); +cout << alm.shape(0) <<" " << map.shape(0)<< endl; MR_assert((alm.shape(0)==1) && (map.shape(0)==2), "inconsistent number of components"); + } else + { + size_t ncomp = 1+(spin>0); MR_assert((alm.shape(0)==ncomp) && (map.shape(0)==ncomp), "inconsistent number of components"); + } } template void synthesis( @@ -2462,9 +2600,10 @@ template void adjoint_synthesis( const cmav &phi0, // (nrings) const cmav &ringstart, // (nrings) ptrdiff_t pixstride, - size_t nthreads) + size_t nthreads, + SHT_mode mode) { - sanity_checks(alm, lmax, mstart, map, theta, phi0, nphi, ringstart, spin, MAP2ALM); + sanity_checks(alm, lmax, mstart, map, theta, phi0, nphi, ringstart, spin, mode); vmav mval({mstart.shape(0)}, UNINITIALIZED); for (size_t i=0; i void adjoint_synthesis( auto lego(subarray<3>(leg, {{},{0,ntheta_tmp},{}})); map2leg(map, legi, nphi, phi0, ringstart, pixstride, nthreads); resample_theta(legi, npi, spi, lego, true, true, spin, nthreads, true); - leg2alm(alm, lego, spin, lmax, mval, mstart, lstride, theta_tmp, nthreads); + leg2alm(alm, lego, spin, lmax, mval, mstart, lstride, theta_tmp, nthreads,mode); } else { - auto leg(vmav,3>::build_noncritical({alm.shape(0),theta.shape(0),mstart.shape(0)}, UNINITIALIZED)); + auto leg(vmav,3>::build_noncritical({map.shape(0),theta.shape(0),mstart.shape(0)}, UNINITIALIZED)); map2leg(map, leg, nphi, phi0, ringstart, pixstride, nthreads); - leg2alm(alm, leg, spin, lmax, mval, mstart, lstride, theta, nthreads); + leg2alm(alm, leg, spin, lmax, mval, mstart, lstride, theta, nthreads, mode); } } template tuple pseudo_analysis( @@ -2509,12 +2648,12 @@ template tuple pseudo_analysis( auto op = [&](const cmav,2> &xalm, vmav &xmap) { synthesis(xalm, xmap, spin, lmax, mstart, lstride, theta, nphi, phi0, - ringstart, pixstride, nthreads, ALM2MAP); + ringstart, pixstride, nthreads, STANDARD); }; auto op_adj = [&](const cmav &xmap, vmav,2> &xalm) { adjoint_synthesis(xalm, xmap, spin, lmax, mstart, lstride, theta, nphi, - phi0, ringstart, pixstride, nthreads); + phi0, ringstart, pixstride, nthreads, STANDARD); }; auto mapnorm = [&](const cmav &xmap) { @@ -2585,7 +2724,7 @@ template tuple pseudo_analysis( template void adjoint_synthesis_2d(vmav,2> &alm, const cmav &map, size_t spin, size_t lmax, size_t mmax, - const string &geometry, size_t nthreads) + const string &geometry, size_t nthreads, SHT_mode mode) { auto nphi = cmav::build_uniform({map.shape(1)}, map.shape(2)); auto phi0 = cmav::build_uniform({map.shape(1)}, 0.); @@ -2604,14 +2743,14 @@ template void adjoint_synthesis_2d(vmav,2> &alm, {map.stride(0), 1}); vmav theta({map.shape(1)}, UNINITIALIZED); get_ringtheta_2d(geometry, theta); - adjoint_synthesis(alm, map2, spin, lmax, mstart, 1, theta, nphi, phi0, ringstart, pixstride, nthreads); + adjoint_synthesis(alm, map2, spin, lmax, mstart, 1, theta, nphi, phi0, ringstart, pixstride, nthreads, mode); } template void adjoint_synthesis_2d(vmav,2> &alm, const cmav &map, size_t spin, size_t lmax, size_t mmax, - const string &geometry, size_t nthreads); + const string &geometry, size_t nthreads, SHT_mode mode); template void adjoint_synthesis_2d(vmav,2> &alm, const cmav &map, size_t spin, size_t lmax, size_t mmax, - const string &geometry, size_t nthreads); + const string &geometry, size_t nthreads, SHT_mode mode); template void analysis_2d( vmav,2> &alm, // (ncomp, *) @@ -2642,7 +2781,7 @@ template void analysis_2d( mval(i) = i; vmav theta({nphi.shape(0)}, UNINITIALIZED); get_ringtheta_2d(geometry, theta); - sanity_checks(alm, lmax, mstart, map, theta, phi0, nphi, ringstart, spin, MAP2ALM); + sanity_checks(alm, lmax, mstart, map, theta, phi0, nphi, ringstart, spin, STANDARD); if ((geometry=="CC")||(geometry=="F1")||(geometry=="MW")||(geometry=="MWflip")) { bool npi, spi; @@ -2672,7 +2811,7 @@ template void analysis_2d( vmav newtheta({ntheta_leg}, UNINITIALIZED); for (size_t i=0; i void analysis_2d( for (size_t k=0; k void adjoint_analysis_2d( mval(i) = i; vmav theta({nphi.shape(0)}, UNINITIALIZED); get_ringtheta_2d(geometry, theta); - sanity_checks(alm, lmax, mstart, map, theta, phi0, nphi, ringstart, spin, MAP2ALM); + sanity_checks(alm, lmax, mstart, map, theta, phi0, nphi, ringstart, spin, STANDARD); if ((geometry=="CC")||(geometry=="F1")||(geometry=="MW")||(geometry=="MWflip")) { bool npo, spo; @@ -2770,7 +2909,7 @@ template void adjoint_analysis_2d( vmav theta_tmp({ntheta_leg}, UNINITIALIZED); for (size_t i=0; i void adjoint_analysis_2d( { auto wgt = get_gridweights(geometry, theta.shape(0)); auto leg(vmav,3>::build_noncritical({map.shape(0), theta.shape(0), mstart.shape(0)}, UNINITIALIZED)); - alm2leg(alm, leg, spin, lmax, mval, mstart, lstride, theta, nthreads); + alm2leg(alm, leg, spin, lmax, mval, mstart, lstride, theta, nthreads, STANDARD); for (size_t i=0; i,2> &alm, vmav void synthesis_general( const cmav,2> &alm, vmav &map, size_t spin, size_t lmax, size_t mmax, const cmav &loc, - double epsilon, double sigma_min, double sigma_max, size_t nthreads) + double epsilon, double sigma_min, double sigma_max, size_t nthreads, SHT_mode mode) { MR_assert(loc.shape(1)==2, "last dimension of loc must have size 2"); - size_t ncomp = (spin==0) ? 1 : 2; - MR_assert(alm.shape(0)==ncomp, "number of components mismatch in alm"); + size_t nalm = (spin==0) ? 1 : ((mode==STANDARD) ? 2 : 1); + size_t nmaps = (spin==0) ? 1 : 2; + MR_assert(alm.shape(0)==nalm, "number of components mismatch in alm"); SphereInterpol inter(lmax, mmax, spin, loc.shape(0), sigma_min, sigma_max, epsilon, nthreads); - auto planes_ = vmav,3>::build_noncritical({ncomp, inter.Ntheta(), (inter.Nphi()+1)/2}, UNINITIALIZED); + auto planes_ = vmav,3>::build_noncritical({nmaps, inter.Ntheta(), (inter.Nphi()+1)/2}, UNINITIALIZED); vmav planes(reinterpret_cast(planes_.data()), - {ncomp, inter.Ntheta(), inter.Nphi()}, + {nmaps, inter.Ntheta(), inter.Nphi()}, {2*planes_.stride(0), 2*planes_.stride(1), 1}); - inter.getPlane(alm, planes); + inter.getPlane(alm, planes, mode); auto xtheta = subarray<1>(loc, {{},{0}}); auto xphi = subarray<1>(loc, {{},{1}}); inter.interpol(planes, 0, 0, xtheta, xphi, map); @@ -2849,41 +2989,41 @@ template void synthesis_general( template void synthesis_general( const cmav,2> &alm, vmav &map, size_t spin, size_t lmax, size_t mmax, const cmav &loc, - double epsilon, double sigma_min, double sigma_max, size_t nthreads); + double epsilon, double sigma_min, double sigma_max, size_t nthreads, SHT_mode mode); template void synthesis_general( const cmav,2> &alm, vmav &map, size_t spin, size_t lmax, size_t mmax, const cmav &loc, - double epsilon, double sigma_min, double sigma_max, size_t nthreads); + double epsilon, double sigma_min, double sigma_max, size_t nthreads, SHT_mode mode); template void adjoint_synthesis_general( vmav,2> &alm, const cmav &map, size_t spin, size_t lmax, size_t mmax, const cmav &loc, - double epsilon, double sigma_min, double sigma_max, size_t nthreads) + double epsilon, double sigma_min, double sigma_max, size_t nthreads, SHT_mode mode) { MR_assert(loc.shape(1)==2, "last dimension of loc must have size 2"); - size_t ncomp = (spin==0) ? 1 : 2; - MR_assert(map.shape(0)==ncomp, "number of components mismatch in map"); + size_t nmaps = (spin==0) ? 1 : 2; + MR_assert(map.shape(0)==nmaps, "number of components mismatch in map"); SphereInterpol inter(lmax, mmax, spin, loc.shape(0), sigma_min, sigma_max, epsilon, nthreads); - auto planes_ = vmav,3>::build_noncritical({ncomp, inter.Ntheta(), (inter.Nphi()+1)/2}, UNINITIALIZED); + auto planes_ = vmav,3>::build_noncritical({nmaps, inter.Ntheta(), (inter.Nphi()+1)/2}, UNINITIALIZED); vmav planes(reinterpret_cast(planes_.data()), - {ncomp, inter.Ntheta(), inter.Nphi()}, + {nmaps, inter.Ntheta(), inter.Nphi()}, {2*planes_.stride(0), 2*planes_.stride(1), 1}); mav_apply([](auto &v){v=0;}, nthreads, planes); auto xtheta = subarray<1>(loc, {{},{0}}); auto xphi = subarray<1>(loc, {{},{1}}); inter.deinterpol(planes, 0, 0, xtheta, xphi, map); - inter.updateAlm(alm, planes); + inter.updateAlm(alm, planes, mode); } template void adjoint_synthesis_general( vmav,2> &alm, const cmav &map, size_t spin, size_t lmax, size_t mmax, const cmav &loc, - double epsilon, double sigma_min, double sigma_max, size_t nthreads); + double epsilon, double sigma_min, double sigma_max, size_t nthreads, SHT_mode mode); template void adjoint_synthesis_general( vmav,2> &alm, const cmav &map, size_t spin, size_t lmax, size_t mmax, const cmav &loc, - double epsilon, double sigma_min, double sigma_max, size_t nthreads); + double epsilon, double sigma_min, double sigma_max, size_t nthreads, SHT_mode mode); template tuple pseudo_analysis_general( vmav,2> &alm, // (ncomp, *) @@ -2907,12 +3047,12 @@ template tuple pseudo_analysis_gener auto op = [&](const cmav,2> &xalm, vmav &xmap) { synthesis_general(xalm, xmap, spin, lmax, mmax, loc, 1e-1*epsilon, - sigma_min, sigma_max, nthreads); + sigma_min, sigma_max, nthreads, STANDARD); }; auto op_adj = [&](const cmav &xmap, vmav,2> &xalm) { adjoint_synthesis_general(xalm, xmap, spin, lmax, mmax, loc, 1e-1*epsilon, - sigma_min, sigma_max, nthreads); + sigma_min, sigma_max, nthreads, STANDARD); }; auto mapnorm = [&](const cmav &xmap) { diff --git a/src/ducc0/sht/sht.h b/src/ducc0/sht/sht.h index a1d89a2a..3f7d06a2 100644 --- a/src/ducc0/sht/sht.h +++ b/src/ducc0/sht/sht.h @@ -37,10 +37,7 @@ namespace detail_sht { using namespace std; -enum SHT_mode { MAP2ALM, - ALM2MAP, - ALM2MAP_DERIV1 - }; +enum SHT_mode { STANDARD, GRAD_ONLY, DERIV1 }; void get_gridweights(const string &type, vmav &wgt); vmav get_gridweights(const string &type, size_t nrings); @@ -55,7 +52,7 @@ template void alm2leg( // associated Legendre transform ptrdiff_t lstride, const cmav &theta, // (nrings) size_t nthreads, - SHT_mode mode=ALM2MAP); + SHT_mode mode); template void leg2alm( // associated Legendre transform vmav,2> &alm, // (ncomp, lmidx) const cmav,3> &leg, // (ncomp, nrings, nm) @@ -65,7 +62,8 @@ template void leg2alm( // associated Legendre transform const cmav &mstart, // (nm) ptrdiff_t lstride, const cmav &theta, // (nrings) - size_t nthreads); + size_t nthreads, + SHT_mode mode); template void map2leg( // FFT const cmav &map, // (ncomp, pix) @@ -97,7 +95,7 @@ template void synthesis( const cmav &ringstart, // (nrings) ptrdiff_t pixstride, size_t nthreads, - SHT_mode mode=ALM2MAP); + SHT_mode mode); template void adjoint_synthesis( vmav,2> &alm, // (ncomp, *) @@ -111,7 +109,8 @@ template void adjoint_synthesis( const cmav &phi0, // (nrings) const cmav &ringstart, // (nrings) ptrdiff_t pixstride, - size_t nthreads); + size_t nthreads, + SHT_mode mode); template tuple pseudo_analysis( vmav,2> &alm, // (ncomp, *) @@ -127,14 +126,14 @@ template tuple pseudo_analysis( ptrdiff_t pixstride, size_t nthreads, size_t maxiter, - double epsilom); + double epsilon); template void synthesis_2d(const cmav,2> &alm, vmav &map, - size_t spin, size_t lmax, size_t mmax, const string &geometry, size_t nthreads, SHT_mode mode=ALM2MAP); + size_t spin, size_t lmax, size_t mmax, const string &geometry, size_t nthreads, SHT_mode mode); template void adjoint_synthesis_2d(vmav,2> &alm, const cmav &map, size_t spin, size_t lmax, size_t mmax, - const string &geometry, size_t nthreads); + const string &geometry, size_t nthreads, SHT_mode mode); template void analysis_2d(vmav,2> &alm, const cmav &map, size_t spin, size_t lmax, size_t mmax, @@ -147,12 +146,12 @@ template void adjoint_analysis_2d(const cmav,2> &alm, template void synthesis_general( const cmav,2> &alm, vmav &map, size_t spin, size_t lmax, size_t mmax, const cmav &loc, - double epsilon, double sigma_min, double sigma_max, size_t nthreads); + double epsilon, double sigma_min, double sigma_max, size_t nthreads, SHT_mode mode); template void adjoint_synthesis_general( vmav,2> &alm, const cmav &map, size_t spin, size_t lmax, size_t mmax, const cmav &loc, - double epsilon, double sigma_min, double sigma_max, size_t nthreads); + double epsilon, double sigma_min, double sigma_max, size_t nthreads, SHT_mode mode); template tuple pseudo_analysis_general( vmav,2> &alm, // (ncomp, *) @@ -168,9 +167,9 @@ template tuple pseudo_analysis_gener } using detail_sht::SHT_mode; -using detail_sht::ALM2MAP; -using detail_sht::ALM2MAP_DERIV1; -using detail_sht::MAP2ALM; +using detail_sht::STANDARD; +using detail_sht::GRAD_ONLY; +using detail_sht::DERIV1; using detail_sht::get_gridweights; using detail_sht::alm2leg; using detail_sht::leg2alm; diff --git a/src/ducc0/sht/sphere_interpol.h b/src/ducc0/sht/sphere_interpol.h index 1a1cc4c2..af6c2a51 100644 --- a/src/ducc0/sht/sphere_interpol.h +++ b/src/ducc0/sht/sphere_interpol.h @@ -643,11 +643,9 @@ template class SphereInterpol return res; } - void getPlane(const cmav,2> &valm, vmav &planes) const + void getPlane(const cmav,2> &valm, vmav &planes, SHT_mode mode) const { size_t nplanes=1+(spin>0); - auto ncomp = valm.shape(0); - MR_assert(ncomp==nplanes, "number of components mismatch"); Alm_Base ialm(lmax, mmax); MR_assert(ialm.Num_Alms()==valm.shape(1), "bad array dimension"); MR_assert(planes.conformable({nplanes, Ntheta(), Nphi()}), "bad planes shape"); @@ -659,7 +657,7 @@ template class SphereInterpol MR_assert((planes.stride(0)&1)==0, "stride must be even"); MR_assert(2*(mmax+1)<=nphi_b, "aargh"); vmav,3> leg(reinterpret_cast *>(&planes(0,nbtheta,nbphi-1)), - {ncomp, ntheta_s, mmax+1}, {subplanes.stride(0)/2, subplanes.stride(1)/2, 1}); + {nplanes, ntheta_s, mmax+1}, {subplanes.stride(0)/2, subplanes.stride(1)/2, 1}); vmav theta({ntheta_s}, UNINITIALIZED); for (size_t i=0; i class SphereInterpol mstart(i) = ofs-i; ofs += lmax+1-i; } - alm2leg(valm, leg, spin, lmax, mval, mstart, 1, theta, nthreads); + alm2leg(valm, leg, spin, lmax, mval, mstart, 1, theta, nthreads, mode); // make halfcomplex for (size_t iplane=0; iplane class SphereInterpol deinterpolx(kernel->support(), cube, itheta0, iphi0, theta, phi, signal); } - void updateAlm(vmav,2> &valm, vmav &planes) const + void updateAlm(vmav,2> &valm, vmav &planes, SHT_mode mode) const { size_t nplanes=1+(spin>0); - auto ncomp = valm.shape(0); - MR_assert(ncomp>0, "need at least one component"); Alm_Base ialm(lmax, mmax); MR_assert(ialm.Num_Alms()==valm.shape(1), "bad array dimension"); MR_assert(planes.conformable({nplanes, Ntheta(), Nphi()}), "bad planes shape"); @@ -785,7 +781,7 @@ template class SphereInterpol MR_assert((planes.stride(0)&1)==0, "stride must be even"); MR_assert(2*(mmax+1)<=nphi_b, "aargh"); vmav,3> leg(reinterpret_cast *>(&planes(0,nbtheta,nbphi-1)), - {ncomp, ntheta_s, mmax+1}, {subplanes.stride(0)/2, subplanes.stride(1)/2, 1}); + {nplanes, ntheta_s, mmax+1}, {subplanes.stride(0)/2, subplanes.stride(1)/2, 1}); vmav theta({ntheta_s}, UNINITIALIZED); for (size_t i=0; i class SphereInterpol planes(iplane, nbtheta+itheta, nbphi-1) = planes(iplane, nbtheta+itheta, nbphi); planes(iplane, nbtheta+itheta, nbphi) = T(0); } - leg2alm(valm, leg, spin, lmax, mval, mstart, 1, theta, nthreads); + leg2alm(valm, leg, spin, lmax, mval, mstart, 1, theta, nthreads, mode); } - void updateAlm(vmav,1> &alm, vmav &planes) const + void updateAlm(vmav,1> &alm, vmav &planes, SHT_mode mode) const { vmav,2> valm(alm.data(), {1,alm.shape(0)}, {0,alm.stride(0)}); - updateAlm(valm, planes); + updateAlm(valm, planes, mode); } }; diff --git a/src/ducc0/sht/totalconvolve.h b/src/ducc0/sht/totalconvolve.h index 58b9e4f0..d240c9c6 100644 --- a/src/ducc0/sht/totalconvolve.h +++ b/src/ducc0/sht/totalconvolve.h @@ -543,7 +543,7 @@ template class ConvolverPlan } } auto subplanes=subarray<3>(planes,{{0, nplanes}, {nbtheta, nbtheta+ntheta_s}, {nbphi, nbphi+nphi_s}}); - synthesis_2d(aarr, subplanes, mbeam, lmax, lmax, "CC", nthreads); + synthesis_2d(aarr, subplanes, mbeam, lmax, lmax, "CC", nthreads, STANDARD); for (size_t iplane=0; iplane(planes, {{iplane},{nbtheta, nbtheta+ntheta_b}, {nbphi, nbphi+nphi_b}}); @@ -659,7 +659,7 @@ template class ConvolverPlan auto subplanes=subarray<3>(planes, {{0, nplanes}, {nbtheta, nbtheta+ntheta_s}, {nbphi,nbphi+nphi_s}}); vmav,2> aarr({nplanes, base.Num_Alms()}, UNINITIALIZED); - adjoint_synthesis_2d(aarr, subplanes, mbeam, lmax, lmax, "CC", nthreads); + adjoint_synthesis_2d(aarr, subplanes, mbeam, lmax, lmax, "CC", nthreads, STANDARD); for (size_t m=0; m<=lmax; ++m) for (size_t l=m; l<=lmax; ++l) if (l>=mbeam)