Skip to content

Commit

Permalink
Fix generalized expression composition
Browse files Browse the repository at this point in the history
Some cases were not properly handled, which prevented correct compilation of an
interesting test case from

        https://github.com/dionhaefner/pyhpc-benchmarks/blob/master/benchmarks/isoneutral_mixing
  • Loading branch information
serge-sans-paille committed Jan 1, 2022
1 parent 831e2c8 commit 745b4a0
Show file tree
Hide file tree
Showing 4 changed files with 314 additions and 7 deletions.
15 changes: 8 additions & 7 deletions pythran/pythonic/include/types/numpy_expr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,14 +561,14 @@ namespace types
template <class S>
constexpr size_t count_none(size_t I)
{
return std::is_same<S, none_type>::value;
return I == 0 ? 0 : std::is_same<S, none_type>::value;
}

template <class S, class Sp, class... Ss>
constexpr size_t count_none(size_t I)
{
return std::is_same<S, none_type>::value +
(I == 0 ? 0 : count_none<Sp, Ss...>(I - 1));
return I == 0 ? 0 : (std::is_same<S, none_type>::value +
count_none<Sp, Ss...>(I - 1));
}

template <class BT, class T>
Expand All @@ -586,10 +586,11 @@ namespace types
-> decltype(arg(std::get<J>(ss)...))
{
// we need to adapt_slice to take broadcasting into account
return arg(adapt_slice(std::get<J>(ss),
shp.template shape<J - count_none<S...>(J)>(),
arg.template shape<clamp(J - count_none<S...>(J),
Arg::value - 1)>())...);
return arg(adapt_slice(
std::get<J>(ss),
shp.template shape<clamp(J - count_none<S...>(J), Shp::value - 1)>(),
arg.template shape<clamp(J - count_none<S...>(J),
Arg::value - 1)>())...);
}

/* Expression template for numpy expressions - binary operators
Expand Down
16 changes: 16 additions & 0 deletions pythran/pythonic/include/types/numpy_gexpr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,22 @@ namespace types
"all slices are normalized");
};

template <class... T1>
struct merge_gexpr<std::tuple<>, std::tuple<none_type, T1...>> {
template <size_t I, class S>
auto run(S const &s, std::tuple<> const &t0,
std::tuple<none_type, T1...> const &t1)
-> decltype(tuple_push_head(
std::get<0>(t1), merge_gexpr<std::tuple<>, std::tuple<T1...>>{}
.template run<I + 1>(s, t0, tuple_tail(t1))))
{
return tuple_push_head(
std::get<0>(t1),
merge_gexpr<std::tuple<>, std::tuple<T1...>>{}.template run<I + 1>(
s, t0, tuple_tail(t1)));
}
};

template <class S0, class... T0, class... T1>
struct merge_gexpr<std::tuple<S0, T0...>, std::tuple<none_type, T1...>> {
template <size_t I, class S>
Expand Down
280 changes: 280 additions & 0 deletions pythran/tests/cases/isoneutral.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,280 @@
#from: https://github.com/dionhaefner/pyhpc-benchmarks/blob/master/benchmarks/isoneutral_mixing
import numpy as np

def get_drhodT(salt, temp, p):
rho0 = 1024.0
z0 = 0.0
theta0 = 283.0 - 273.15
grav = 9.81
betaT = 1.67e-4
betaTs = 1e-5
gammas = 1.1e-8

zz = -p - z0
thetas = temp - theta0
return -(betaTs * thetas + betaT * (1 - gammas * grav * zz * rho0)) * rho0


def get_drhodS(salt, temp, p):
betaS = 0.78e-3
rho0 = 1024.0
return betaS * rho0 * np.ones_like(temp)

def dm_taper(sx, iso_slopec, iso_dslope):
"""
tapering function for isopycnal slopes
"""
return 0.5 * (1.0 + np.tanh((-np.abs(sx) + iso_slopec) / iso_dslope))


#pythran export isoneutral_diffusion_pre(
# float[:,:,:],
# float[:,:,:],
# float[:,:,:],
# float[:,:,:],
#
# float[:],
# float[:],
# float[:],
# float[:],
# float[:],
# float[:],
# float[:],
# float[:],
#
# float[:,:,:,:],
# float[:,:,:,:],
#
# float[:],
#
# float[:,:,:],
# float[:,:,:],
# float[:,:,:],
# float[:,:,:],
#
# float[:,:,:,:,:],
# float[:,:,:,:,:],
# float[:,:,:,:,:],
# float[:,:,:,:,:],
#)

def isoneutral_diffusion_pre(
maskT,
maskU,
maskV,
maskW,
dxt,
dxu,
dyt,
dyu,
dzt,
dzw,
cost,
cosu,
salt,
temp,
zt,
K_iso,
K_11,
K_22,
K_33,
Ai_ez,
Ai_nz,
Ai_bx,
Ai_by,
):
"""
Isopycnal diffusion for tracer
following functional formulation by Griffies et al
Code adopted from MOM2.1
"""
epsln = 1e-20
iso_slopec = 1e-3
iso_dslope = 1e-3
K_iso_steep = 50.0
tau = 0

dTdx = np.zeros_like(K_11)
dSdx = np.zeros_like(K_11)
dTdy = np.zeros_like(K_11)
dSdy = np.zeros_like(K_11)
dTdz = np.zeros_like(K_11)
dSdz = np.zeros_like(K_11)

"""
drho_dt and drho_ds at centers of T cells
"""
drdT = maskT * get_drhodT(salt[:, :, :, tau], temp[:, :, :, tau], np.abs(zt))
drdS = maskT * get_drhodS(salt[:, :, :, tau], temp[:, :, :, tau], np.abs(zt))

"""
gradients at top face of T cells
"""
dTdz[:, :, :-1] = (
maskW[:, :, :-1]
* (temp[:, :, 1:, tau] - temp[:, :, :-1, tau])
/ dzw[np.newaxis, np.newaxis, :-1]
)
dSdz[:, :, :-1] = (
maskW[:, :, :-1]
* (salt[:, :, 1:, tau] - salt[:, :, :-1, tau])
/ dzw[np.newaxis, np.newaxis, :-1]
)

"""
gradients at eastern face of T cells
"""
dTdx[:-1, :, :] = (
maskU[:-1, :, :]
* (temp[1:, :, :, tau] - temp[:-1, :, :, tau])
/ (dxu[:-1, np.newaxis, np.newaxis] * cost[np.newaxis, :, np.newaxis])
)
dSdx[:-1, :, :] = (
maskU[:-1, :, :]
* (salt[1:, :, :, tau] - salt[:-1, :, :, tau])
/ (dxu[:-1, np.newaxis, np.newaxis] * cost[np.newaxis, :, np.newaxis])
)

"""
gradients at northern face of T cells
"""
dTdy[:, :-1, :] = (
maskV[:, :-1, :]
* (temp[:, 1:, :, tau] - temp[:, :-1, :, tau])
/ dyu[np.newaxis, :-1, np.newaxis]
)
dSdy[:, :-1, :] = (
maskV[:, :-1, :]
* (salt[:, 1:, :, tau] - salt[:, :-1, :, tau])
/ dyu[np.newaxis, :-1, np.newaxis]
)


"""
Compute Ai_ez and K11 on center of east face of T cell.
"""
diffloc = np.zeros_like(K_11)
diffloc[1:-2, 2:-2, 1:] = 0.25 * (
K_iso[1:-2, 2:-2, 1:]
+ K_iso[1:-2, 2:-2, :-1]
+ K_iso[2:-1, 2:-2, 1:]
+ K_iso[2:-1, 2:-2, :-1]
)
diffloc[1:-2, 2:-2, 0] = 0.5 * (K_iso[1:-2, 2:-2, 0] + K_iso[2:-1, 2:-2, 0])

sumz = np.zeros_like(K_11)[1:-2, 2:-2]
for kr in range(2):
ki = 0 if kr == 1 else 1
for ip in range(2):
drodxe = (
drdT[1 + ip : -2 + ip, 2:-2, ki:] * dTdx[1:-2, 2:-2, ki:]
+ drdS[1 + ip : -2 + ip, 2:-2, ki:] * dSdx[1:-2, 2:-2, ki:]
)
drodze = (
drdT[1 + ip : -2 + ip, 2:-2, ki:]
* dTdz[1 + ip : -2 + ip, 2:-2, : -1 + kr or None]
+ drdS[1 + ip : -2 + ip, 2:-2, ki:]
* dSdz[1 + ip : -2 + ip, 2:-2, : -1 + kr or None]
)
sxe = -drodxe / (np.minimum(0.0, drodze) - epsln)
taper = dm_taper(sxe, iso_slopec, iso_dslope)
sumz[:, :, ki:] += (
dzw[np.newaxis, np.newaxis, : -1 + kr or None]
* maskU[1:-2, 2:-2, ki:]
* np.maximum(K_iso_steep, diffloc[1:-2, 2:-2, ki:] * taper)
)
Ai_ez[1:-2, 2:-2, ki:, ip, kr] = taper * sxe * maskU[1:-2, 2:-2, ki:]
K_11[1:-2, 2:-2, :] = sumz / (4.0 * dzt[np.newaxis, np.newaxis, :])

"""
Compute Ai_nz and K_22 on center of north face of T cell.
"""
diffloc[:,:,:] = 0
diffloc[2:-2, 1:-2, 1:] = 0.25 * (
K_iso[2:-2, 1:-2, 1:]
+ K_iso[2:-2, 1:-2, :-1]
+ K_iso[2:-2, 2:-1, 1:]
+ K_iso[2:-2, 2:-1, :-1]
)
diffloc[2:-2, 1:-2, 0] = 0.5 * (K_iso[2:-2, 1:-2, 0] + K_iso[2:-2, 2:-1, 0])

sumz = np.zeros_like(K_11)[2:-2, 1:-2]
for kr in range(2):
ki = 0 if kr == 1 else 1
for jp in range(2):
drodyn = (
drdT[2:-2, 1 + jp : -2 + jp, ki:] * dTdy[2:-2, 1:-2, ki:]
+ drdS[2:-2, 1 + jp : -2 + jp, ki:] * dSdy[2:-2, 1:-2, ki:]
)
drodzn = (
drdT[2:-2, 1 + jp : -2 + jp, ki:]
* dTdz[2:-2, 1 + jp : -2 + jp, : -1 + kr or None]
+ drdS[2:-2, 1 + jp : -2 + jp, ki:]
* dSdz[2:-2, 1 + jp : -2 + jp, : -1 + kr or None]
)
syn = -drodyn / (np.minimum(0.0, drodzn) - epsln)
taper = dm_taper(syn, iso_slopec, iso_dslope)
sumz[:, :, ki:] += (
dzw[np.newaxis, np.newaxis, : -1 + kr or None]
* maskV[2:-2, 1:-2, ki:]
* np.maximum(K_iso_steep, diffloc[2:-2, 1:-2, ki:] * taper)
)
Ai_nz[2:-2, 1:-2, ki:, jp, kr] = taper * syn * maskV[2:-2, 1:-2, ki:]
K_22[2:-2, 1:-2, :] = sumz / (4.0 * dzt[np.newaxis, np.newaxis, :])

"""
compute Ai_bx, Ai_by and K33 on top face of T cell.
"""
sumx = np.zeros_like(K_11)[2:-2, 2:-2, :-1]
sumy = np.zeros_like(K_11)[2:-2, 2:-2, :-1]

for kr in range(2):
drodzb = (
drdT[2:-2, 2:-2, kr : -1 + kr or None] * dTdz[2:-2, 2:-2, :-1]
+ drdS[2:-2, 2:-2, kr : -1 + kr or None] * dSdz[2:-2, 2:-2, :-1]
)

# eastward slopes at the top of T cells
for ip in range(2):
drodxb = (
drdT[2:-2, 2:-2, kr : -1 + kr or None]
* dTdx[1 + ip : -3 + ip, 2:-2, kr : -1 + kr or None]
+ drdS[2:-2, 2:-2, kr : -1 + kr or None]
* dSdx[1 + ip : -3 + ip, 2:-2, kr : -1 + kr or None]
)
sxb = -drodxb / (np.minimum(0.0, drodzb) - epsln)
taper = dm_taper(sxb, iso_slopec, iso_dslope)
sumx += (
dxu[1 + ip : -3 + ip, np.newaxis, np.newaxis]
* K_iso[2:-2, 2:-2, :-1]
* taper
* sxb ** 2
* maskW[2:-2, 2:-2, :-1]
)
Ai_bx[2:-2, 2:-2, :-1, ip, kr] = taper * sxb * maskW[2:-2, 2:-2, :-1]

# northward slopes at the top of T cells
for jp in range(2):
facty = cosu[1 + jp : -3 + jp] * dyu[1 + jp : -3 + jp]
drodyb = (
drdT[2:-2, 2:-2, kr : -1 + kr or None]
* dTdy[2:-2, 1 + jp : -3 + jp, kr : -1 + kr or None]
+ drdS[2:-2, 2:-2, kr : -1 + kr or None]
* dSdy[2:-2, 1 + jp : -3 + jp, kr : -1 + kr or None]
)
syb = -drodyb / (np.minimum(0.0, drodzb) - epsln)
taper = dm_taper(syb, iso_slopec, iso_dslope)
sumy += (
facty[np.newaxis, :, np.newaxis]
* K_iso[2:-2, 2:-2, :-1]
* taper
* syb ** 2
* maskW[2:-2, 2:-2, :-1]
)
Ai_by[2:-2, 2:-2, :-1, jp, kr] = taper * syb * maskW[2:-2, 2:-2, :-1]

K_33[2:-2, 2:-2, :-1] = sumx / (4 * dxt[2:-2, np.newaxis, np.newaxis]) + sumy / (
4 * dyt[np.newaxis, 2:-2, np.newaxis] * cost[np.newaxis, 2:-2, np.newaxis]
)
K_33[2:-2, 2:-2, -1] = 0.0

10 changes: 10 additions & 0 deletions pythran/tests/test_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,16 @@ def test_gexpr_composition19(self):
numpy.arange(24),
gexpr_composition19=[NDArray[int, :]])

def test_gexpr_composition20(self):
self.run_test("def gexpr_composition20(a): return (a + 1)[None, 1:, None]",
numpy.arange(24),
gexpr_composition20=[NDArray[int, :]])

def test_gexpr_composition21(self):
self.run_test("def gexpr_composition21(a): return (a[:-1] + 1)[None, 1:, None]",
numpy.arange(24),
gexpr_composition21=[NDArray[int, :]])


def test_gexpr_copy0(self):
self.run_test("def gexpr_copy0(a,b): a[:,0] = b[:,0]; return a",
Expand Down

0 comments on commit 745b4a0

Please sign in to comment.