diff --git a/doc/release/1.10.0-notes.rst b/doc/release/1.10.0-notes.rst index 43dc4b5c6d0d..ec54b1f14e9b 100644 --- a/doc/release/1.10.0-notes.rst +++ b/doc/release/1.10.0-notes.rst @@ -126,6 +126,19 @@ interpolation behavior. NumPy arrays are supported as input for ``pad_width``, and an exception is raised if its values are not of integral type. +More system C99 complex functions detected and used +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +All of the functions ``in complex.h`` are now detected. There are new +fallback implementations of the following functions. + +* npy_ctan, +* npy_cacos, npy_casin, npy_catan +* npy_ccosh, npy_csinh, npy_ctanh, +* npy_cacosh, npy_casinh, npy_catanh + +As a result of these improvements, there will be some small changes in +returned values, especially for corner cases. + Changes ======= diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h index 0137e05566af..8f5d8e6c7ff1 100644 --- a/numpy/core/include/numpy/npy_math.h +++ b/numpy/core/include/numpy/npy_math.h @@ -259,7 +259,7 @@ float npy_nextafterf(float x, float y); float npy_spacingf(float x); /* - * float C99 math functions + * long double C99 math functions */ npy_longdouble npy_sinl(npy_longdouble x); @@ -425,6 +425,19 @@ npy_cdouble npy_csqrt(npy_cdouble z); npy_cdouble npy_ccos(npy_cdouble z); npy_cdouble npy_csin(npy_cdouble z); +npy_cdouble npy_ctan(npy_cdouble z); + +npy_cdouble npy_ccosh(npy_cdouble z); +npy_cdouble npy_csinh(npy_cdouble z); +npy_cdouble npy_ctanh(npy_cdouble z); + +npy_cdouble npy_cacos(npy_cdouble z); +npy_cdouble npy_casin(npy_cdouble z); +npy_cdouble npy_catan(npy_cdouble z); + +npy_cdouble npy_cacosh(npy_cdouble z); +npy_cdouble npy_casinh(npy_cdouble z); +npy_cdouble npy_catanh(npy_cdouble z); /* * Single precision complex functions @@ -440,6 +453,20 @@ npy_cfloat npy_csqrtf(npy_cfloat z); npy_cfloat npy_ccosf(npy_cfloat z); npy_cfloat npy_csinf(npy_cfloat z); +npy_cfloat npy_ctanf(npy_cfloat z); + +npy_cfloat npy_ccoshf(npy_cfloat z); +npy_cfloat npy_csinhf(npy_cfloat z); +npy_cfloat npy_ctanhf(npy_cfloat z); + +npy_cfloat npy_cacosf(npy_cfloat z); +npy_cfloat npy_casinf(npy_cfloat z); +npy_cfloat npy_catanf(npy_cfloat z); + +npy_cfloat npy_cacoshf(npy_cfloat z); +npy_cfloat npy_casinhf(npy_cfloat z); +npy_cfloat npy_catanhf(npy_cfloat z); + /* * Extended precision complex functions @@ -455,6 +482,20 @@ npy_clongdouble npy_csqrtl(npy_clongdouble z); npy_clongdouble npy_ccosl(npy_clongdouble z); npy_clongdouble npy_csinl(npy_clongdouble z); +npy_clongdouble npy_ctanl(npy_clongdouble z); + +npy_clongdouble npy_ccoshl(npy_clongdouble z); +npy_clongdouble npy_csinhl(npy_clongdouble z); +npy_clongdouble npy_ctanhl(npy_clongdouble z); + +npy_clongdouble npy_cacosl(npy_clongdouble z); +npy_clongdouble npy_casinl(npy_clongdouble z); +npy_clongdouble npy_catanl(npy_clongdouble z); + +npy_clongdouble npy_cacoshl(npy_clongdouble z); +npy_clongdouble npy_casinhl(npy_clongdouble z); +npy_clongdouble npy_catanhl(npy_clongdouble z); + /* * Functions that set the floating point error diff --git a/numpy/core/setup_common.py b/numpy/core/setup_common.py index 0b18bc6c62fa..6abbe5ff2d64 100644 --- a/numpy/core/setup_common.py +++ b/numpy/core/setup_common.py @@ -107,6 +107,7 @@ def check_api_version(apiversion, codegen_dir): # sse headers only enabled automatically on amd64/x32 builds "xmmintrin.h", # SSE "emmintrin.h", # SSE2 + "features.h", # for glibc version linux ] # optional gcc compiler builtins and their call arguments and optional a @@ -138,23 +139,29 @@ def check_api_version(apiversion, codegen_dir): OPTIONAL_VARIABLE_ATTRIBUTES = ["__thread", "__declspec(thread)"] # Subset of OPTIONAL_STDFUNCS which may alreay have HAVE_* defined by Python.h -OPTIONAL_STDFUNCS_MAYBE = ["expm1", "log1p", "acosh", "atanh", "asinh", "hypot", - "copysign", "ftello", "fseeko"] +OPTIONAL_STDFUNCS_MAYBE = [ + "expm1", "log1p", "acosh", "atanh", "asinh", "hypot", "copysign", + "ftello", "fseeko" + ] # C99 functions: float and long double versions -C99_FUNCS = ["sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs", "floor", - "ceil", "rint", "trunc", "sqrt", "log10", "log", "log1p", "exp", - "expm1", "asin", "acos", "atan", "asinh", "acosh", "atanh", - "hypot", "atan2", "pow", "fmod", "modf", 'frexp', 'ldexp', - "exp2", "log2", "copysign", "nextafter", "cbrt"] - +C99_FUNCS = [ + "sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs", "floor", "ceil", + "rint", "trunc", "sqrt", "log10", "log", "log1p", "exp", "expm1", + "asin", "acos", "atan", "asinh", "acosh", "atanh", "hypot", "atan2", + "pow", "fmod", "modf", 'frexp', 'ldexp', "exp2", "log2", "copysign", + "nextafter", "cbrt" + ] C99_FUNCS_SINGLE = [f + 'f' for f in C99_FUNCS] C99_FUNCS_EXTENDED = [f + 'l' for f in C99_FUNCS] - -C99_COMPLEX_TYPES = ['complex double', 'complex float', 'complex long double'] - -C99_COMPLEX_FUNCS = ['creal', 'cimag', 'cabs', 'carg', 'cexp', 'csqrt', 'clog', - 'ccos', 'csin', 'cpow'] +C99_COMPLEX_TYPES = [ + 'complex double', 'complex float', 'complex long double' + ] +C99_COMPLEX_FUNCS = [ + "cabs", "cacos", "cacosh", "carg", "casin", "casinh", "catan", + "catanh", "ccos", "ccosh", "cexp", "cimag", "clog", "conj", "cpow", + "cproj", "creal", "csin", "csinh", "csqrt", "ctan", "ctanh" + ] def fname2def(name): return "HAVE_%s" % name.upper() diff --git a/numpy/core/src/npymath/npy_math_complex.c.src b/numpy/core/src/npymath/npy_math_complex.c.src index 9cbfd32aeeb4..94cb7e29d865 100644 --- a/numpy/core/src/npymath/npy_math_complex.c.src +++ b/numpy/core/src/npymath/npy_math_complex.c.src @@ -1,10 +1,13 @@ /* + * vim: syntax=c + * * Implement some C99-compatible complex math functions * - * Most of the code is taken from the msun library in FreeBSD (HEAD @ 30th June - * 2009), under the following license: + * Most of the code is taken from the msun library in FreeBSD (HEAD @ 4th + * October 2013), under the following license: * - * Copyright (c) 2007 David Schultz + * Copyright (c) 2007, 2011 David Schultz + * Copyright (c) 2012 Stephen Montgomery-Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,9 +34,12 @@ #include "npy_math_common.h" #include "npy_math_private.h" -/*========================================================== - * Custom implementation of missing complex C99 functions - *=========================================================*/ + +#define raise_inexact() do { volatile npy_float junk = 1 + tiny; } while(0) + + +static npy_float tiny = 3.9443045e-31f; + /**begin repeat * #type = npy_float, npy_double, npy_longdouble# @@ -41,23 +47,171 @@ * #c = f, , l# * #C = F, , L# * #TMAX = FLT_MAX, DBL_MAX, LDBL_MAX# + * #TMIN = FLT_MIN, DBL_MIN, LDBL_MIN# + * #TMANT_DIG = FLT_MANT_DIG, DBL_MANT_DIG, LDBL_MANT_DIG# + * #TEPS = FLT_EPSILON, DBL_EPSILON, LDBL_EPSILON# + * #precision = 1, 2, 3# */ + +/*========================================================== + * Constants + *=========================================================*/ +static const @ctype@ c_1@c@ = {1.0@C@, 0.0}; +static const @ctype@ c_half@c@ = {0.5@C@, 0.0}; +static const @ctype@ c_i@c@ = {0.0, 1.0@C@}; +static const @ctype@ c_ihalf@c@ = {0.0, 0.5@C@}; + +/*========================================================== + * Helper functions + * + * These are necessary because we do not count on using a + * C99 compiler. + *=========================================================*/ +static NPY_INLINE +@ctype@ +cadd@c@(@ctype@ a, @ctype@ b) +{ + return npy_cpack@c@(npy_creal@c@(a) + npy_creal@c@(b), + npy_cimag@c@(a) + npy_cimag@c@(b)); +} + +static NPY_INLINE +@ctype@ +csub@c@(@ctype@ a, @ctype@ b) +{ + return npy_cpack@c@(npy_creal@c@(a) - npy_creal@c@(b), + npy_cimag@c@(a) - npy_cimag@c@(b)); +} + +static NPY_INLINE +@ctype@ +cmul@c@(@ctype@ a, @ctype@ b) +{ + @type@ ar, ai, br, bi; + ar = npy_creal@c@(a); + ai = npy_cimag@c@(a); + br = npy_creal@c@(b); + bi = npy_cimag@c@(b); + return npy_cpack@c@(ar*br - ai*bi, ar*bi + ai*br); +} + +static NPY_INLINE +@ctype@ +cdiv@c@(@ctype@ a, @ctype@ b) +{ + @type@ ar, ai, br, bi, abs_br, abs_bi; + ar = npy_creal@c@(a); + ai = npy_cimag@c@(a); + br = npy_creal@c@(b); + bi = npy_cimag@c@(b); + abs_br = npy_fabs@c@(br); + abs_bi = npy_fabs@c@(bi); + + if (abs_br >= abs_bi) { + if (abs_br == 0 && abs_bi == 0) { + /* divide by zeros should yield a complex inf or nan */ + return npy_cpack@c@(ar/abs_br, ai/abs_bi); + } + else { + @type@ rat = bi/br; + @type@ scl = 1.0@C@/(br+bi*rat); + return npy_cpack@c@((ar + ai*rat)*scl, (ai - ar*rat)*scl); + } + } + else { + @type@ rat = br/bi; + @type@ scl = 1.0@C@/(bi + br*rat); + return npy_cpack@c@((ar*rat + ai)*scl, (ai*rat - ar)*scl); + } +} + +static NPY_INLINE +@ctype@ +cneg@c@(@ctype@ a) +{ + return npy_cpack@c@(-npy_creal@c@(a), -npy_cimag@c@(a)); +} + +static NPY_INLINE +@ctype@ +cmuli@c@(@ctype@ a) +{ + return npy_cpack@c@(-npy_cimag@c@(a), npy_creal@c@(a)); +} + +/*========================================================== + * Custom implementation of missing complex C99 functions + *=========================================================*/ + #ifndef HAVE_CABS@C@ -@type@ npy_cabs@c@(@ctype@ z) +@type@ +npy_cabs@c@(@ctype@ z) { return npy_hypot@c@(npy_creal@c@(z), npy_cimag@c@(z)); } #endif #ifndef HAVE_CARG@C@ -@type@ npy_carg@c@(@ctype@ z) +@type@ +npy_carg@c@(@ctype@ z) { return npy_atan2@c@(npy_cimag@c@(z), npy_creal@c@(z)); } #endif +/* + * cexp and (ccos, csin)h functions need to calculate exp scaled by another + * number. This can be difficult if exp(x) overflows. By doing this way, we + * don't risk overflowing exp. This likely raises floating-point exceptions, + * if we decide that we care. + * + * This is only useful over a limited range, (see below) an expects that the + * input values are in this range. + * + * This is based on the technique used in FreeBSD's __frexp_exp and + * __ldexp_(c)exp functions by David Schultz. + * + * SCALED_CEXP_LOWER = log(FLT_MAX) + * SCALED_CEXP_UPPER = log(2) + log(FLT_MAX) - log(FLT_TRUE_MIN), + * where FLT_TRUE_MIN is the smallest possible subnormal number. + */ + +#define SCALED_CEXP_LOWERF 88.722839f +#define SCALED_CEXP_UPPERF 192.69492f +#define SCALED_CEXP_LOWER 710.47586007394386 +#define SCALED_CEXP_UPPER 1454.9159319953251 +#define SCALED_CEXP_LOWERL 11357.216553474703895L +#define SCALED_CEXP_UPPERL 22756.021937783004509L + +static +@ctype@ +_npy_scaled_cexp@c@(@type@ x, @type@ y, npy_int expt) +{ +#if @precision@ == 1 + const npy_int k = 235; +#endif +#if @precision@ == 2 + const npy_int k = 1799; +#endif +#if @precision@ == 3 + const npy_int k = 19547; +#endif + const @type@ kln2 = k * NPY_LOGE2@c@; + @type@ mant, mantcos, mantsin; + npy_int ex, excos, exsin; + + mant = npy_frexp@c@(npy_exp@c@(x - kln2), &ex); + mantcos = npy_frexp@c@(npy_cos@c@(y), &excos); + mantsin = npy_frexp@c@(npy_sin@c@(y), &exsin); + + expt += ex + k; + return npy_cpack@c@( npy_ldexp@c@(mant * mantcos, expt + excos), + npy_ldexp@c@(mant * mantsin, expt + exsin)); +} + #ifndef HAVE_CEXP@C@ -@ctype@ npy_cexp@c@(@ctype@ z) +@ctype@ +npy_cexp@c@(@ctype@ z) { @type@ x, c, s; @type@ r, i; @@ -67,46 +221,60 @@ i = npy_cimag@c@(z); if (npy_isfinite(r)) { - x = npy_exp@c@(r); + if (r >= SCALED_CEXP_LOWER@C@ && r <= SCALED_CEXP_UPPER@C@) { + ret = _npy_scaled_cexp@c@(r, i, 0); + } + else { + x = npy_exp@c@(r); - c = npy_cos@c@(i); - s = npy_sin@c@(i); + c = npy_cos@c@(i); + s = npy_sin@c@(i); - if (npy_isfinite(i)) { - ret = npy_cpack@c@(x * c, x * s); - } else { - ret = npy_cpack@c@(NPY_NAN, npy_copysign@c@(NPY_NAN, i)); + if (npy_isfinite(i)) { + ret = npy_cpack@c@(x * c, x * s); + } + else { + ret = npy_cpack@c@(NPY_NAN@C@, npy_copysign@c@(NPY_NAN@C@, i)); + } } - } else if (npy_isnan(r)) { + } + else if (npy_isnan(r)) { /* r is nan */ if (i == 0) { - ret = npy_cpack@c@(r, 0); - } else { - ret = npy_cpack@c@(r, npy_copysign@c@(NPY_NAN, i)); + ret = z; + } + else { + ret = npy_cpack@c@(r, npy_copysign@c@(NPY_NAN@C@, i)); } - } else { + } + else { /* r is +- inf */ if (r > 0) { if (i == 0) { ret = npy_cpack@c@(r, i); - } else if (npy_isfinite(i)) { + } + else if (npy_isfinite(i)) { c = npy_cos@c@(i); s = npy_sin@c@(i); ret = npy_cpack@c@(r * c, r * s); - } else { + } + else { /* x = +inf, y = +-inf | nan */ - ret = npy_cpack@c@(r, NPY_NAN); + npy_set_floatstatus_invalid(); + ret = npy_cpack@c@(r, NPY_NAN@C@); } - } else { + } + else { if (npy_isfinite(i)) { x = npy_exp@c@(r); c = npy_cos@c@(i); s = npy_sin@c@(i); ret = npy_cpack@c@(x * c, x * s); - } else { + } + else { /* x = -inf, y = nan | +i inf */ ret = npy_cpack@c@(0, 0); } @@ -118,9 +286,72 @@ #endif #ifndef HAVE_CLOG@C@ -@ctype@ npy_clog@c@(@ctype@ z) +/* algorithm from cpython, rev. d86f5686cef9 + * + * The usual formula for the real part is log(hypot(z.real, z.imag)). + * There are four situations where this formula is potentially + * problematic: + * + * (1) the absolute value of z is subnormal. Then hypot is subnormal, + * so has fewer than the usual number of bits of accuracy, hence may + * have large relative error. This then gives a large absolute error + * in the log. This can be solved by rescaling z by a suitable power + * of 2. + * + * (2) the absolute value of z is greater than DBL_MAX (e.g. when both + * z.real and z.imag are within a factor of 1/sqrt(2) of DBL_MAX) + * Again, rescaling solves this. + * + * (3) the absolute value of z is close to 1. In this case it's + * difficult to achieve good accuracy, at least in part because a + * change of 1ulp in the real or imaginary part of z can result in a + * change of billions of ulps in the correctly rounded answer. + * + * (4) z = 0. The simplest thing to do here is to call the + * floating-point log with an argument of 0, and let its behaviour + * (returning -infinity, signaling a floating-point exception, setting + * errno, or whatever) determine that of c_log. So the usual formula + * is fine here. +*/ +@ctype@ +npy_clog@c@(@ctype@ z) { - return npy_cpack@c@(npy_log@c@ (npy_cabs@c@ (z)), npy_carg@c@ (z)); + @type@ ax = npy_fabs@c@(npy_creal@c@(z)); + @type@ ay = npy_fabs@c@(npy_cimag@c@(z)); + @type@ rr, ri; + + if (ax > @TMAX@/4 || ay > @TMAX@/4) { + rr = npy_log@c@(npy_hypot@c@(ax/2, ay/2)) + NPY_LOGE2@c@; + } + else if (ax < @TMIN@ && ay < @TMIN@) { + if (ax > 0 || ay > 0) { + /* catch cases where hypot(ax, ay) is subnormal */ + rr = npy_log@c@(npy_hypot@c@(npy_ldexp@c@(ax, @TMANT_DIG@), + npy_ldexp@c@(ay, @TMANT_DIG@))) - @TMANT_DIG@*NPY_LOGE2@c@; + } + else { + /* log(+/-0 +/- 0i) */ + /* raise divide-by-zero floating point exception */ + rr = -1.0@c@ / npy_creal@c@(z); + rr = npy_copysign@c@(rr, -1); + ri = npy_carg@c@(z); + return npy_cpack@c@(rr, ri); + } + } + else { + @type@ h = npy_hypot@c@(ax, ay); + if (0.71 <= h && h <= 1.73) { + @type@ am = ax > ay ? ax : ay; /* max(ax, ay) */ + @type@ an = ax > ay ? ay : ax; /* min(ax, ay) */ + rr = npy_log1p@c@((am-1)*(am+1)+an*an)/2; + } + else { + rr = npy_log@c@(h); + } + } + ri = npy_carg@c@(z); + + return npy_cpack@c@(rr, ri); } #endif @@ -129,7 +360,8 @@ /* We risk spurious overflow for components >= DBL_MAX / (1 + sqrt(2)). */ #define THRESH (@TMAX@ / (1 + NPY_SQRT2@c@)) -@ctype@ npy_csqrt@c@(@ctype@ z) +@ctype@ +npy_csqrt@c@(@ctype@ z) { @ctype@ result; @type@ a, b; @@ -140,10 +372,12 @@ b = npy_cimag@c@(z); /* Handle special cases. */ - if (a == 0 && b == 0) + if (a == 0 && b == 0) { return (npy_cpack@c@(0, b)); - if (npy_isinf(b)) - return (npy_cpack@c@(NPY_INFINITY, b)); + } + if (npy_isinf(b)) { + return (npy_cpack@c@(NPY_INFINITY@C@, b)); + } if (npy_isnan(a)) { t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ return (npy_cpack@c@(a, t)); /* return NaN + NaN i */ @@ -155,10 +389,12 @@ * csqrt(-inf + NaN i) = NaN +- inf i * csqrt(-inf + y i) = 0 + inf i */ - if (npy_signbit(a)) + if (npy_signbit(a)) { return (npy_cpack@c@(npy_fabs@c@(b - b), npy_copysign@c@(a, b))); - else + } + else { return (npy_cpack@c@(a, npy_copysign@c@(b - b, b))); + } } /* * The remaining special case (b is NaN) is handled just fine by @@ -170,61 +406,1333 @@ a *= 0.25; b *= 0.25; scale = 1; - } else { + } + else { scale = 0; } /* Algorithm 312, CACM vol 10, Oct 1967. */ if (a >= 0) { - t = npy_sqrt@c@((a + npy_hypot@c@(a, b)) * 0.5); + t = npy_sqrt@c@((a + npy_hypot@c@(a, b)) * 0.5@c@); result = npy_cpack@c@(t, b / (2 * t)); - } else { - t = npy_sqrt@c@((-a + npy_hypot@c@(a, b)) * 0.5); + } + else { + t = npy_sqrt@c@((-a + npy_hypot@c@(a, b)) * 0.5@c@); result = npy_cpack@c@(npy_fabs@c@(b) / (2 * t), npy_copysign@c@(t, b)); } /* Rescale. */ - if (scale) + if (scale) { return (npy_cpack@c@(npy_creal@c@(result) * 2, npy_cimag@c@(result))); - else + } + else { return (result); + } } #undef THRESH #endif -#ifndef HAVE_CPOW@C@ -@ctype@ npy_cpow@c@ (@ctype@ x, @ctype@ y) +/* + * Always use this function because of the multiplication for small + * integer powers, but in the body use cpow if it is available. + */ + +/* private function for use in npy_pow{f, ,l} */ +#ifdef HAVE_CPOW@C@ +static @ctype@ +sys_cpow@c@(@ctype@ x, @ctype@ y) { - @ctype@ b; - @type@ br, bi, yr, yi; + __@ctype@_to_c99_cast xcast; + __@ctype@_to_c99_cast ycast; + __@ctype@_to_c99_cast ret; + xcast.npy_z = x; + ycast.npy_z = y; + ret.c99_z = cpow@c@(xcast.c99_z, ycast.c99_z); + return ret.npy_z; +} +#endif - yr = npy_creal@c@(y); - yi = npy_cimag@c@(y); - b = npy_clog@c@(x); - br = npy_creal@c@(b); - bi = npy_cimag@c@(b); - return npy_cexp@c@(npy_cpack@c@(br * yr - bi * yi, br * yi + bi * yr)); -} +@ctype@ +npy_cpow@c@ (@ctype@ a, @ctype@ b) +{ + npy_intp n; + @type@ ar = npy_creal@c@(a); + @type@ br = npy_creal@c@(b); + @type@ ai = npy_cimag@c@(a); + @type@ bi = npy_cimag@c@(b); + @ctype@ r; + + if (br == 0. && bi == 0.) { + return npy_cpack@c@(1., 0.); + } + if (ar == 0. && ai == 0.) { + if (br > 0 && bi == 0) { + return npy_cpack@c@(0., 0.); + } + else { + volatile @type@ tmp = NPY_INFINITY@C@; + /* + * NB: there are four complex zeros; c0 = (+-0, +-0), so that + * unlike for reals, c0**p, with `p` negative is in general + * ill-defined. + * + * c0**z with z complex is also ill-defined. + */ + r = npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@); + + /* Raise invalid */ + tmp -= NPY_INFINITY@C@; + ar = tmp; + return r; + } + } + if (bi == 0 && (n=(npy_intp)br) == br) { + if (n == 1) { + /* unroll: handle inf better */ + return npy_cpack@c@(ar, ai); + } + else if (n == 2) { + /* unroll: handle inf better */ + return cmul@c@(a, a); + } + else if (n == 3) { + /* unroll: handle inf better */ + return cmul@c@(a, cmul@c@(a, a)); + } + else if (n > -100 && n < 100) { + @ctype@ p, aa; + npy_intp mask = 1; + if (n < 0) { + n = -n; + } + aa = c_1@c@; + p = npy_cpack@c@(ar, ai); + while (1) { + if (n & mask) { + aa = cmul@c@(aa,p); + } + mask <<= 1; + if (n < mask || mask <= 0) { + break; + } + p = cmul@c@(p,p); + } + r = npy_cpack@c@(npy_creal@c@(aa), npy_cimag@c@(aa)); + if (br < 0) { + r = cdiv@c@(c_1@c@, r); + } + return r; + } + } + +#ifdef HAVE_CPOW@C@ + return sys_cpow@c@(a, b); + +#else + { + @ctype@ loga = npy_clog@c@(a); + + ar = npy_creal@c@(loga); + ai = npy_cimag@c@(loga); + return npy_cexp@c@(npy_cpack@c@(ar*br - ai*bi, ar*bi + ai*br)); + } + #endif +} + #ifndef HAVE_CCOS@C@ -@ctype@ npy_ccos@c@(@ctype@ z) +@ctype@ +npy_ccos@c@(@ctype@ z) { - @type@ x, y; + /* ccos(z) = ccosh(I * z) */ + return npy_ccosh@c@(npy_cpack@c@(-npy_cimag@c@(z), npy_creal@c@(z))); +} +#endif + +#ifndef HAVE_CSIN@C@ +@ctype@ +npy_csin@c@(@ctype@ z) +{ + /* csin(z) = -I * csinh(I * z) */ + z = npy_csinh@c@(npy_cpack@c@(-npy_cimag@c@(z), npy_creal@c@(z))); + return npy_cpack@c@(npy_cimag@c@(z), -npy_creal@c@(z)); +} +#endif + +#ifndef HAVE_CTAN@C@ +@ctype@ +npy_ctan@c@(@ctype@ z) +{ + /* ctan(z) = -I * ctanh(I * z) */ + z = npy_ctanh@c@(npy_cpack@c@(-npy_cimag@c@(z), npy_creal@c@(z))); + return (npy_cpack@c@(npy_cimag@c@(z), -npy_creal@c@(z))); +} +#endif + +#ifndef HAVE_CCOSH@C@ +/* + * Taken from the msun library in FreeBSD, rev 226599. + * + * Hyperbolic cosine of a complex argument z = x + i y. + * + * cosh(z) = cosh(x+iy) + * = cosh(x) cos(y) + i sinh(x) sin(y). + * + * Exceptional values are noted in the comments within the source code. + * These values and the return value were taken from n1124.pdf. + * + * CCOSH_BIG is chosen such that + * spacing(0.5 * exp(CCOSH_BIG)) > 0.5*exp(-CCOSH_BIG) + * although the exact value assigned to CCOSH_BIG is not so important + */ +@ctype@ +npy_ccosh@c@(@ctype@ z) +{ +#if @precision@ == 1 + const npy_float CCOSH_BIG = 9.0f; + const npy_float CCOSH_HUGE = 1.70141183e+38f; +#endif +#if @precision@ == 2 + const npy_double CCOSH_BIG = 22.0; + const npy_double CCOSH_HUGE = 8.9884656743115795e+307; +#endif +#if @precision@ >= 3 +#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE + const npy_longdouble CCOSH_BIG = 22.0L; + const npy_longdouble CCOSH_HUGE = 8.9884656743115795e+307L; +#else + const npy_longdouble CCOSH_BIG = 24.0L; + const npy_longdouble CCOSH_HUGE = 5.94865747678615882543e+4931L; +#endif +#endif + + @type@ x, y, h, absx; + npy_int xfinite, yfinite; + x = npy_creal@c@(z); y = npy_cimag@c@(z); - return npy_cpack@c@(npy_cos@c@(x) * npy_cosh@c@(y), -(npy_sin@c@(x) * npy_sinh@c@(y))); + + xfinite = npy_isfinite(x); + yfinite = npy_isfinite(y); + + /* Handle the nearly-non-exceptional cases where x and y are finite. */ + if (xfinite && yfinite) { + if (y == 0) { + return npy_cpack@c@(npy_cosh@c@(x), x * y); + } + absx = npy_fabs@c@(x); + if (absx < CCOSH_BIG) { + /* small x: normal case */ + return npy_cpack@c@(npy_cosh@c@(x) * npy_cos@c@(y), + npy_sinh@c@(x) * npy_sin@c@(y)); + } + + /* |x| >= 22, so cosh(x) ~= exp(|x|) */ + if (absx < SCALED_CEXP_LOWER@C@) { + /* x < 710: exp(|x|) won't overflow */ + h = npy_exp@c@(absx) * 0.5@c@; + return npy_cpack@c@(h * npy_cos@c@(y), + npy_copysign@c@(h, x) * npy_sin@c@(y)); + } + else if (absx < SCALED_CEXP_UPPER@C@) { + /* x < 1455: scale to avoid overflow */ + z = _npy_scaled_cexp@c@(absx, y, -1); + return npy_cpack@c@(npy_creal@c@(z), + npy_cimag@c@(z) * npy_copysign@c@(1, x)); + } + else { + /* x >= 1455: the result always overflows */ + h = CCOSH_HUGE * x; + return npy_cpack@c@(h * h * npy_cos@c@(y), h * npy_sin@c@(y)); + } + } + + /* + * cosh(+-0 +- I Inf) = dNaN + I sign(d(+-0, dNaN))0. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as dNaN. Raise the invalid floating-point exception. + * + * cosh(+-0 +- I NaN) = d(NaN) + I sign(d(+-0, NaN))0. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as d(NaN). + */ + if (x == 0 && !yfinite) { + return npy_cpack@c@(y - y, npy_copysign@c@(0, x * (y - y))); + } + + /* + * cosh(+-Inf +- I 0) = +Inf + I (+-)(+-)0. + * + * cosh(NaN +- I 0) = d(NaN) + I sign(d(NaN, +-0))0. + * The sign of 0 in the result is unspecified. + */ + if (y == 0 && !xfinite) { + return npy_cpack@c@(x * x, npy_copysign@c@(0, x) * y); + } + + /* + * cosh(x +- I Inf) = dNaN + I dNaN. + * Raise the invalid floating-point exception for finite nonzero x. + * + * cosh(x + I NaN) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero x. Choice = don't raise (except for signaling NaNs). + */ + if (xfinite && !yfinite) { + return npy_cpack@c@(y - y, x * (y - y)); + } + + /* + * cosh(+-Inf + I NaN) = +Inf + I d(NaN). + * + * cosh(+-Inf +- I Inf) = +Inf + I dNaN. + * The sign of Inf in the result is unspecified. Choice = always +. + * Raise the invalid floating-point exception. + * + * cosh(+-Inf + I y) = +Inf cos(y) +- I Inf sin(y) + */ + if (npy_isinf(x)) { + if (!yfinite) { + return npy_cpack@c@(x * x, x * (y - y)); + } + return npy_cpack@c@((x * x) * npy_cos@c@(y), x * npy_sin@c@(y)); + } + + /* + * cosh(NaN + I NaN) = d(NaN) + I d(NaN). + * + * cosh(NaN +- I Inf) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception. + * Choice = raise. + * + * cosh(NaN + I y) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero y. Choice = don't raise (except for signaling NaNs). + */ + return npy_cpack@c@((x * x) * (y - y), (x + x) * (y - y)); } +#undef CCOSH_BIG +#undef CCOSH_HUGE #endif -#ifndef HAVE_CSIN@C@ -@ctype@ npy_csin@c@(@ctype@ z) +#ifndef HAVE_CSINH@C@ +/* + * Taken from the msun library in FreeBSD, rev 226599. + * + * Hyperbolic sine of a complex argument z = x + i y. + * + * sinh(z) = sinh(x+iy) + * = sinh(x) cos(y) + i cosh(x) sin(y). + * + * Exceptional values are noted in the comments within the source code. + * These values and the return value were taken from n1124.pdf. + */ +@ctype@ +npy_csinh@c@(@ctype@ z) +{ +#if @precision@ == 1 + const npy_float CSINH_BIG = 9.0f; + const npy_float CSINH_HUGE = 1.70141183e+38f; +#endif +#if @precision@ == 2 + const npy_double CSINH_BIG = 22.0; + const npy_double CSINH_HUGE = 8.9884656743115795e+307; +#endif +#if @precision@ >= 3 +#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE + const npy_longdouble CSINH_BIG = 22.0L; + const npy_longdouble CSINH_HUGE = 8.9884656743115795e+307L; +#else + const npy_longdouble CSINH_BIG = 24.0L; + const npy_longdouble CSINH_HUGE = 5.94865747678615882543e+4931L; +#endif +#endif + + @type@ x, y, h, absx; + npy_int xfinite, yfinite; + + x = npy_creal@c@(z); + y = npy_cimag@c@(z); + + xfinite = npy_isfinite(x); + yfinite = npy_isfinite(y); + + /* Handle the nearly-non-exceptional cases where x and y are finite. */ + if (xfinite && yfinite) { + if (y == 0) { + return npy_cpack@c@(npy_sinh@c@(x), y); + } + absx = npy_fabs@c@(x); + if (absx < CSINH_BIG) { + /* small x: normal case */ + return npy_cpack@c@(npy_sinh@c@(x) * npy_cos@c@(y), + npy_cosh@c@(x) * npy_sin@c@(y)); + } + + /* |x| >= 22, so cosh(x) ~= exp(|x|) */ + if (absx < SCALED_CEXP_LOWER@C@) { + /* x < 710: exp(|x|) won't overflow */ + h = npy_exp@c@(npy_fabs@c@(x)) * 0.5@c@; + return npy_cpack@c@(npy_copysign@c@(h, x) * npy_cos@c@(y), + h * npy_sin@c@(y)); + } + else if (x < SCALED_CEXP_UPPER@C@) { + /* x < 1455: scale to avoid overflow */ + z = _npy_scaled_cexp@c@(absx, y, -1); + return npy_cpack@c@(npy_creal@c@(z) * npy_copysign@c@(1, x), + npy_cimag@c@(z)); + } + else { + /* x >= 1455: the result always overflows */ + h = CSINH_HUGE * x; + return npy_cpack@c@(h * npy_cos@c@(y), h * h * npy_sin@c@(y)); + } + } + + /* + * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN. + * The sign of 0 in the result is unspecified. Choice = normally + * the same as dNaN. Raise the invalid floating-point exception. + * + * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN). + * The sign of 0 in the result is unspecified. Choice = normally + * the same as d(NaN). + */ + if (x == 0 && !yfinite) { + return npy_cpack@c@(npy_copysign@c@(0, x * (y - y)), y - y); + } + + /* + * sinh(+-Inf +- I 0) = +-Inf + I +-0. + * + * sinh(NaN +- I 0) = d(NaN) + I +-0. + */ + if (y == 0 && !xfinite) { + if (npy_isnan(x)) { + return z; + } + return npy_cpack@c@(x, npy_copysign@c@(0, y)); + } + + /* + * sinh(x +- I Inf) = dNaN + I dNaN. + * Raise the invalid floating-point exception for finite nonzero x. + * + * sinh(x + I NaN) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero x. Choice = don't raise (except for signaling NaNs). + */ + if (xfinite && !yfinite) { + return npy_cpack@c@(y - y, x * (y - y)); + } + + /* + * sinh(+-Inf + I NaN) = +-Inf + I d(NaN). + * The sign of Inf in the result is unspecified. Choice = normally + * the same as d(NaN). + * + * sinh(+-Inf +- I Inf) = +Inf + I dNaN. + * The sign of Inf in the result is unspecified. Choice = always +. + * Raise the invalid floating-point exception. + * + * sinh(+-Inf + I y) = +-Inf cos(y) + I Inf sin(y) + */ + if (!xfinite && !npy_isnan(x)) { + if (!yfinite) { + return npy_cpack@c@(x * x, x * (y - y)); + } + return npy_cpack@c@(x * npy_cos@c@(y), + NPY_INFINITY@C@ * npy_sin@c@(y)); + } + + /* + * sinh(NaN + I NaN) = d(NaN) + I d(NaN). + * + * sinh(NaN +- I Inf) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception. + * Choice = raise. + * + * sinh(NaN + I y) = d(NaN) + I d(NaN). + * Optionally raises the invalid floating-point exception for finite + * nonzero y. Choice = don't raise (except for signaling NaNs). + */ + return npy_cpack@c@((x * x) * (y - y), (x + x) * (y - y)); +} +#undef CSINH_BIG +#undef CSINH_HUGE +#endif + +#ifndef HAVE_CTANH@C@ +/* + * Taken from the msun library in FreeBSD, rev 226600. + * + * Hyperbolic tangent of a complex argument z = x + i y. + * + * The algorithm is from: + * + * W. Kahan. Branch Cuts for Complex Elementary Functions or Much + * Ado About Nothing's Sign Bit. In The State of the Art in + * Numerical Analysis, pp. 165 ff. Iserles and Powell, eds., 1987. + * + * Method: + * + * Let t = tan(x) + * beta = 1/cos^2(y) + * s = sinh(x) + * rho = cosh(x) + * + * We have: + * + * tanh(z) = sinh(z) / cosh(z) + * + * sinh(x) cos(y) + i cosh(x) sin(y) + * = --------------------------------- + * cosh(x) cos(y) + i sinh(x) sin(y) + * + * cosh(x) sinh(x) / cos^2(y) + i tan(y) + * = ------------------------------------- + * 1 + sinh^2(x) / cos^2(y) + * + * beta rho s + i t + * = ---------------- + * 1 + beta s^2 + * + * Modifications: + * + * I omitted the original algorithm's handling of overflow in tan(x) after + * verifying with nearpi.c that this can't happen in IEEE single or double + * precision. I also handle large x differently. + */ + +#define TANH_HUGE 22.0 +#define TANHF_HUGE 11.0F +#define TANHL_HUGE 42.0L + +@ctype@ +npy_ctanh@c@(@ctype@ z) { @type@ x, y; + @type@ t, beta, s, rho, denom; + + x = npy_creal@c@(z); + y = npy_cimag@c@(z); + + /* + * ctanh(NaN + i 0) = NaN + i 0 + * + * ctanh(NaN + i y) = NaN + i NaN for y != 0 + * + * The imaginary part has the sign of x*sin(2*y), but there's no + * special effort to get this right. + * + * ctanh(+-Inf +- i Inf) = +-1 +- 0 + * + * ctanh(+-Inf + i y) = +-1 + 0 sin(2y) for y finite + * + * The imaginary part of the sign is unspecified. This special + * case is only needed to avoid a spurious invalid exception when + * y is infinite. + */ + if (!npy_isfinite(x)) { + if (npy_isnan(x)) { + return npy_cpack@c@(x, (y == 0 ? y : x * y)); + } + return npy_cpack@c@(npy_copysign@c@(1,x), + npy_copysign@c@(0, + npy_isinf(y) ? + y : npy_sin@c@(y) * npy_cos@c@(y))); + } + + /* + * ctanh(x + i NAN) = NaN + i NaN + * ctanh(x +- i Inf) = NaN + i NaN + */ + if (!npy_isfinite(y)) { + return (npy_cpack@c@(y - y, y - y)); + } + + /* + * ctanh(+-huge + i +-y) ~= +-1 +- i 2sin(2y)/exp(2x), using the + * approximation sinh^2(huge) ~= exp(2*huge) / 4. + * We use a modified formula to avoid spurious overflow. + */ + if (npy_fabs@c@(x) >= TANH@C@_HUGE) { + @type@ exp_mx = npy_exp@c@(-npy_fabs@c@(x)); + return npy_cpack@c@(npy_copysign@c@(1, x), + 4 * npy_sin@c@(y) * npy_cos@c@(y) * + exp_mx * exp_mx); + } + + /* Kahan's algorithm */ + t = npy_tan@c@(y); + beta = 1 + t * t; /* = 1 / cos^2(y) */ + s = npy_sinh@c@(x); + rho = npy_sqrt@c@(1 + s * s); /* = cosh(x) */ + denom = 1 + beta * s * s; + return (npy_cpack@c@((beta * rho * s) / denom, t / denom)); +} +#undef TANH_HUGE +#undef TANHF_HUGE +#undef TANHL_HUGE +#endif + +#if !defined (HAVE_CACOS@C@) || !defined (HAVE_CASINH@C@) +/* + * Complex inverse trig functions taken from the msum library in FreeBSD + * revision 251404 + * + * The algorithm is very close to that in "Implementing the complex arcsine + * and arccosine functions using exception handling" by T. E. Hull, Thomas F. + * Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on + * Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335, + * http://dl.acm.org/citation.cfm?id=275324. + * + * Throughout we use the convention z = x + I*y. + * + * casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B) + * where + * A = (|z+I| + |z-I|) / 2 + * B = (|z+I| - |z-I|) / 2 = y/A + * + * These formulas become numerically unstable: + * (a) for Re(casinh(z)) when z is close to the line segment [-I, I] (that + * is, Re(casinh(z)) is close to 0); + * (b) for Im(casinh(z)) when z is close to either of the intervals + * [I, I*infinity) or (-I*infinity, -I] (that is, |Im(casinh(z))| is + * close to PI/2). + * + * These numerical problems are overcome by defining + * f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2 + * Then if A < A_crossover, we use + * log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1))) + * A-1 = f(x, 1+y) + f(x, 1-y) + * and if B > B_crossover, we use + * asin(B) = atan2(y, sqrt(A*A - y*y)) = atan2(y, sqrt((A+y)*(A-y))) + * A-y = f(x, y+1) + f(x, y-1) + * where without loss of generality we have assumed that x and y are + * non-negative. + * + * Much of the difficulty comes because the intermediate computations may + * produce overflows or underflows. This is dealt with in the paper by Hull + * et al by using exception handling. We do this by detecting when + * computations risk underflow or overflow. The hardest part is handling the + * underflows when computing f(a, b). + * + * Note that the function f(a, b) does not appear explicitly in the paper by + * Hull et al, but the idea may be found on pages 308 and 309. Introducing the + * function f(a, b) allows us to concentrate many of the clever tricks in this + * paper into one function. + */ + +/* + * Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2. + * Pass hypot(a, b) as the third argument. + */ +static NPY_INLINE @type@ +_f@c@(@type@ a, @type@ b, @type@ hypot_a_b) +{ + if (b < 0) { + return ((hypot_a_b - b) / 2); + } + if (b == 0) { + return (a / 2); + } + return (a * a / (hypot_a_b + b) / 2); +} + +/* + * All the hard work is contained in this function. + * x and y are assumed positive or zero, and less than RECIP_EPSILON. + * Upon return: + * rx = Re(casinh(z)) = -Im(cacos(y + I*x)). + * B_is_usable is set to 1 if the value of B is usable. + * If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y. + * If returning sqrt_A2my2 has potential to result in an underflow, it is + * rescaled, and new_y is similarly rescaled. + */ +static NPY_INLINE void +_do_hard_work@c@(@type@ x, @type@ y, @type@ *rx, + npy_int *B_is_usable, @type@ *B, @type@ *sqrt_A2my2, @type@ *new_y) +{ +#if @precision@ == 1 + const npy_float A_crossover = 10.0f; + const npy_float B_crossover = 0.6417f; + const npy_float FOUR_SQRT_MIN = 4.3368086899420177e-19f; +#endif +#if @precision@ == 2 + const npy_double A_crossover = 10.0; + const npy_double B_crossover = 0.6417; + const npy_double FOUR_SQRT_MIN = 5.9666725849601654e-154; +#endif +#if @precision@ == 3 + const npy_longdouble A_crossover = 10.0l; + const npy_longdouble B_crossover = 0.6417l; +#if NPy_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE + const npy_longdouble FOUR_SQRT_MIN = 5.9666725849601654e-154; +#else + const npy_longdouble FOUR_SQRT_MIN = 7.3344154702193886625e-2466l; +#endif +#endif + @type@ R, S, A; /* A, B, R, and S are as in Hull et al. */ + @type@ Am1, Amy; /* A-1, A-y. */ + + R = npy_hypot@c@(x, y + 1); /* |z+I| */ + S = npy_hypot@c@(x, y - 1); /* |z-I| */ + + /* A = (|z+I| + |z-I|) / 2 */ + A = (R + S) / 2; + /* + * Mathematically A >= 1. There is a small chance that this will not + * be so because of rounding errors. So we will make certain it is + * so. + */ + if (A < 1) { + A = 1; + } + + if (A < A_crossover) { + /* + * Am1 = fp + fm, where fp = f(x, 1+y), and fm = f(x, 1-y). + * rx = log1p(Am1 + sqrt(Am1*(A+1))) + */ + if (y == 1 && x < @TEPS@ * @TEPS@ / 128) { + /* + * fp is of order x^2, and fm = x/2. + * A = 1 (inexactly). + */ + *rx = npy_sqrt@c@(x); + } + else if (x >= @TEPS@ * npy_fabs@c@(y - 1)) { + /* + * Underflow will not occur because + * x >= DBL_EPSILON^2/128 >= FOUR_SQRT_MIN + */ + Am1 = _f@c@(x, 1 + y, R) + _f@c@(x, 1 - y, S); + *rx = npy_log1p@c@(Am1 + npy_sqrt@c@(Am1 * (A + 1))); + } + else if (y < 1) { + /* + * fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and + * A = 1 (inexactly). + */ + *rx = x / npy_sqrt@c@((1 - y) * (1 + y)); + } + else { /* if (y > 1) */ + /* + * A-1 = y-1 (inexactly). + */ + *rx = npy_log1p@c@((y - 1) + npy_sqrt@c@((y - 1) * (y + 1))); + } + } + else { + *rx = npy_log@c@(A + npy_sqrt@c@(A * A - 1)); + } + + *new_y = y; + + if (y < FOUR_SQRT_MIN) { + /* + * Avoid a possible underflow caused by y/A. For casinh this + * would be legitimate, but will be picked up by invoking atan2 + * later on. For cacos this would not be legitimate. + */ + *B_is_usable = 0; + *sqrt_A2my2 = A * (2 / @TEPS@); + *new_y = y * (2 / @TEPS@); + return; + } + + /* B = (|z+I| - |z-I|) / 2 = y/A */ + *B = y / A; + *B_is_usable = 1; + + if (*B > B_crossover) { + *B_is_usable = 0; + /* + * Amy = fp + fm, where fp = f(x, y+1), and fm = f(x, y-1). + * sqrt_A2my2 = sqrt(Amy*(A+y)) + */ + if (y == 1 && x < @TEPS@ / 128) { + /* + * fp is of order x^2, and fm = x/2. + * A = 1 (inexactly). + */ + *sqrt_A2my2 = npy_sqrt@c@(x) * npy_sqrt@c@((A + y) / 2); + } + else if (x >= @TEPS@ * npy_fabs@c@(y - 1)) { + /* + * Underflow will not occur because + * x >= DBL_EPSILON/128 >= FOUR_SQRT_MIN + * and + * x >= DBL_EPSILON^2 >= FOUR_SQRT_MIN + */ + Amy = _f@c@(x, y + 1, R) + _f@c@(x, y - 1, S); + *sqrt_A2my2 = npy_sqrt@c@(Amy * (A + y)); + } + else if (y > 1) { + /* + * fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and + * A = y (inexactly). + * + * y < RECIP_EPSILON. So the following + * scaling should avoid any underflow problems. + */ + *sqrt_A2my2 = x * (4 / @TEPS@ / @TEPS@) * y / + npy_sqrt@c@((y + 1) * (y - 1)); + *new_y = y * (4 / @TEPS@ / @TEPS@); + } + else { /* if (y < 1) */ + /* + * fm = 1-y >= DBL_EPSILON, fp is of order x^2, and + * A = 1 (inexactly). + */ + *sqrt_A2my2 = npy_sqrt@c@((1 - y) * (1 + y)); + } + } +} + +/* + * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON. + */ +static NPY_INLINE void +_clog_for_large_values@c@(@type@ x, @type@ y, + @type@ *rr, @type@ *ri) +{ +#if @precision@ == 1 + const npy_float QUARTER_SQRT_MAX = 4.611685743549481e+18f; + const npy_float SQRT_MIN = 1.0842021724855044e-19f; + #endif +#if @precision@ == 2 + const npy_double QUARTER_SQRT_MAX = 3.3519519824856489e+153; + const npy_double SQRT_MIN = 1.4916681462400413e-154; + #endif +#if @precision@ == 3 +#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE + const npy_longdouble QUARTER_SQRT_MAX = 3.3519519824856489e+153; + const npy_longdouble SQRT_MIN = 1.4916681462400413e-154; +#else + const npy_longdouble QUARTER_SQRT_MAX = 2.7268703390485398235e+2465l; + const npy_longdouble SQRT_MIN = 1.8336038675548471656e-2466l; +#endif +#endif + @type@ ax, ay, t; + + ax = npy_fabs@c@(x); + ay = npy_fabs@c@(y); + if (ax < ay) { + t = ax; + ax = ay; + ay = t; + } + + /* + * Avoid overflow in hypot() when x and y are both very large. + * Divide x and y by E, and then add 1 to the logarithm. This depends + * on E being larger than sqrt(2). + * Dividing by E causes an insignificant loss of accuracy; however + * this method is still poor since it is uneccessarily slow. + */ + if (ax > @TMAX@ / 2) { + *rr = npy_log@c@(npy_hypot@c@(x / NPY_E@c@, y / NPY_E@c@)) + 1; + } + /* + * Avoid overflow when x or y is large. Avoid underflow when x or + * y is small. + */ + else if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) { + *rr = npy_log@c@(npy_hypot@c@(x, y)); + } + else { + *rr = npy_log@c@(ax * ax + ay * ay) / 2; + } + *ri = npy_atan2@c@(y, x); +} +#endif + +#ifndef HAVE_CACOS@C@ +@ctype@ +npy_cacos@c@(@ctype@ z) +{ +#if @precision@ == 1 + /* this is sqrt(6*EPS) */ + const npy_float SQRT_6_EPSILON = 8.4572793338e-4f; + /* chosen such that pio2_hi + pio2_lo == pio2_hi but causes FE_INEXACT. */ + const volatile npy_float pio2_lo = 7.5497899549e-9f; +#endif +#if @precision@ == 2 + const npy_double SQRT_6_EPSILON = 3.65002414998885671e-08; + const volatile npy_double pio2_lo = 6.1232339957367659e-17; +#endif +#if @precision@ == 3 + const npy_longdouble SQRT_6_EPSILON = 8.0654900873493277169e-10l; + const volatile npy_longdouble pio2_lo = 2.710505431213761085e-20l; +#endif + const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@; + const @type@ pio2_hi = NPY_PI_2@c@; + @type@ x, y, ax, ay, wx, wy, rx, ry, B, sqrt_A2mx2, new_x; + npy_int sx, sy; + npy_int B_is_usable; + x = npy_creal@c@(z); y = npy_cimag@c@(z); - return npy_cpack@c@(npy_sin@c@(x) * npy_cosh@c@(y), npy_cos@c@(x) * npy_sinh@c@(y)); + sx = npy_signbit(x); + sy = npy_signbit(y); + ax = npy_fabs@c@(x); + ay = npy_fabs@c@(y); + + if (npy_isnan(x) || npy_isnan(y)) { + /* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */ + if (npy_isinf(x)) { + return npy_cpack@c@(y + y, -NPY_INFINITY@C@); + } + /* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */ + if (npy_isinf(y)) { + return npy_cpack@c@(x + x, -y); + } + /* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */ + if (x == 0) { + return npy_cpack@c@(pio2_hi + pio2_lo, y + y); + } + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + /* clog...() will raise inexact unless x or y is infinite. */ + _clog_for_large_values@c@(x, y, &wx, &wy); + rx = npy_fabs@c@(wy); + ry = wx + NPY_LOGE2@c@; + if (sy == 0) { + ry = -ry; + } + return npy_cpack@c@(rx, ry); + } + + /* Avoid spuriously raising inexact for z = 1. */ + if (x == 1 && y == 0) { + return npy_cpack@c@(0, -y); + } + + /* All remaining cases are inexact. */ + raise_inexact(); + + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) { + return npy_cpack@c@(pio2_hi - (x - pio2_lo), -y); + } + + _do_hard_work@c@(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x); + if (B_is_usable) { + if (sx == 0) { + rx = npy_acos@c@(B); + } + else { + rx = npy_acos@c@(-B); + } + } + else { + if (sx == 0) { + rx = npy_atan2@c@(sqrt_A2mx2, new_x); + } + else { + rx = npy_atan2@c@(sqrt_A2mx2, -new_x); + } + } + if (sy == 0) { + ry = -ry; + } + return npy_cpack@c@(rx, ry); +} +#endif + +#ifndef HAVE_CASIN@C@ +@ctype@ +npy_casin@c@(@ctype@ z) +{ + /* casin(z) = I * conj( casinh(I * conj(z)) ) */ + z = npy_casinh@c@(npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z))); + return npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z)); +} +#endif + +#ifndef HAVE_CATAN@C@ +@ctype@ +npy_catan@c@(@ctype@ z) +{ + /* catan(z) = I * conj( catanh(I * conj(z)) ) */ + z = npy_catanh@c@(npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z))); + return npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z)); +} +#endif + +#ifndef HAVE_CACOSH@C@ +@ctype@ +npy_cacosh@c@(@ctype@ z) +{ + /* + * cacosh(z) = I*cacos(z) or -I*cacos(z) + * where the sign is chosen so Re(cacosh(z)) >= 0. + */ + @ctype@ w; + @type@ rx, ry; + + w = npy_cacos@c@(z); + rx = npy_creal@c@(w); + ry = npy_cimag@c@(w); + /* cacosh(NaN + I*NaN) = NaN + I*NaN */ + if (npy_isnan(rx) && npy_isnan(ry)) { + return npy_cpack@c@(ry, rx); + } + /* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */ + /* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */ + if (npy_isnan(rx)) { + return npy_cpack@c@(npy_fabs@c@(ry), rx); + } + /* cacosh(0 + I*NaN) = NaN + I*NaN */ + if (npy_isnan(ry)) { + return npy_cpack@c@(ry, ry); + } + return npy_cpack@c@(npy_fabs@c@(ry), npy_copysign@c@(rx, npy_cimag@c@(z))); +} +#endif + +#ifndef HAVE_CASINH@C@ +/* + * casinh(z) = z + O(z^3) as z -> 0 + * + * casinh(z) = sign(x)*clog(sign(x)*z) + O(1/z^2) as z -> infinity + * The above formula works for the imaginary part as well, because + * Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3) + * as z -> infinity, uniformly in y + */ +@ctype@ +npy_casinh@c@(@ctype@ z) +{ +#if @precision@ == 1 + /* this is sqrt(6*EPS) */ + const npy_float SQRT_6_EPSILON = 8.4572793338e-4f; + /* chosen such that pio2_hi + pio2_lo == pio2_hi but causes FE_INEXACT. */ + const volatile npy_float pio2_lo = 7.5497899549e-9f; +#endif +#if @precision@ == 2 + const npy_double SQRT_6_EPSILON = 3.65002414998885671e-08; + const volatile npy_double pio2_lo = 6.1232339957367659e-17; +#endif +#if @precision@ == 3 + const npy_longdouble SQRT_6_EPSILON = 8.0654900873493277169e-10l; + const volatile npy_longdouble pio2_lo = 2.710505431213761085e-20l; +#endif + const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@; + const @type@ pio2_hi = NPY_PI_2@c@; + @type@ x, y, ax, ay, wx, wy, rx, ry, B, sqrt_A2my2, new_y; + npy_int B_is_usable; + + x = npy_creal@c@(z); + y = npy_cimag@c@(z); + ax = npy_fabs@c@(x); + ay = npy_fabs@c@(y); + + if (npy_isnan(x) || npy_isnan(y)) { + /* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */ + if (npy_isinf(x)) { + return npy_cpack@c@(x, y + y); + } + /* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */ + if (npy_isinf(y)) { + return npy_cpack@c@(y, x + x); + } + /* casinh(NaN + I*0) = NaN + I*0 */ + if (y == 0) { + return npy_cpack@c@(x + x, y); + } + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + /* clog...() will raise inexact unless x or y is infinite. */ + if (npy_signbit(x) == 0) { + _clog_for_large_values@c@(x, y, &wx, &wy); + wx += NPY_LOGE2@c@; + } + else { + _clog_for_large_values@c@(-x, -y, &wx, &wy); + wx += NPY_LOGE2@c@; + } + return npy_cpack@c@(npy_copysign@c@(wx, x), npy_copysign@c@(wy, y)); + } + + /* Avoid spuriously raising inexact for z = 0. */ + if (x == 0 && y == 0) { + return (z); + } + + /* All remaining cases are inexact. */ + raise_inexact(); + + if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) { + return (z); + } + + _do_hard_work@c@(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y); + if (B_is_usable) { + ry = npy_asin@c@(B); + } + else { + ry = npy_atan2@c@(new_y, sqrt_A2my2); + } + return npy_cpack@c@(npy_copysign@c@(rx, x), npy_copysign@c@(ry, y)); +} +#endif + +#ifndef HAVE_CATANH@C@ +/* + * sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow). + * Assumes x*x and y*y will not overflow. + * Assumes x and y are finite. + * Assumes y is non-negative. + * Assumes fabs(x) >= DBL_EPSILON. + */ +static NPY_INLINE @type@ +_sum_squares@c@(@type@ x, @type@ y) +{ +#if @precision@ == 1 +const npy_float SQRT_MIN = 1.0842022e-19f; +#endif +#if @precision@ == 2 +const npy_double SQRT_MIN = 1.4916681462400413e-154; /* sqrt(DBL_MIN) */ +#endif +#if @precision@ == 3 +/* this is correct for 80 bit long doubles */ +const npy_longdouble SQRT_MIN = 1.8336038675548471656e-2466l; +#endif + /* Avoid underflow when y is small. */ + if (y < SQRT_MIN) { + return (x * x); + } + + return (x * x + y * y); +} + +/* + * real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y). + * Assumes x and y are not NaN, and one of x and y is larger than + * RECIP_EPSILON. We avoid unwarranted underflow. It is important to not use + * the code creal(1/z), because the imaginary part may produce an unwanted + * underflow. + * This is only called in a context where inexact is always raised before + * the call, so no effort is made to avoid or force inexact. + */ +#if @precision@ == 1 +#define BIAS (FLT_MAX_EXP - 1) +#define CUTOFF (FLT_MANT_DIG / 2 + 1) +static NPY_INLINE npy_float +_real_part_reciprocalf(npy_float x, npy_float y) +{ + npy_float scale; + npy_uint32 hx, hy; + npy_int32 ix, iy; + + GET_FLOAT_WORD(hx, x); + ix = hx & 0x7f800000; + GET_FLOAT_WORD(hy, y); + iy = hy & 0x7f800000; + if (ix - iy >= CUTOFF << 23 || npy_isinf(x)) { + return (1 / x); + } + if (iy - ix >= CUTOFF << 23) { + return (x / y / y); + } + if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23) { + return (x / (x * x + y * y)); + } + SET_FLOAT_WORD(scale, 0x7f800000 - ix); + x *= scale; + y *= scale; + return (x / (x * x + y * y) * scale); +} +#undef BIAS +#undef CUTOFF +#endif +#if @precision@ == 2 +#define BIAS (DBL_MAX_EXP - 1) +/* more guard digits are useful iff there is extra precision. */ +#define CUTOFF (DBL_MANT_DIG / 2 + 1) /* just half or 1 guard digit */ +static NPY_INLINE npy_double +_real_part_reciprocal(npy_double x, npy_double y) +{ + npy_double scale; + npy_uint32 hx, hy; + npy_int32 ix, iy; + + /* + * This code is inspired by the C99 document n1124.pdf, Section G.5.1, + * example 2. + */ + GET_HIGH_WORD(hx, x); + ix = hx & 0x7ff00000; + GET_HIGH_WORD(hy, y); + iy = hy & 0x7ff00000; + if (ix - iy >= CUTOFF << 20 || npy_isinf(x)) { + /* +-Inf -> +-0 is special */ + return (1 / x); + } + if (iy - ix >= CUTOFF << 20) { + /* should avoid double div, but hard */ + return (x / y / y); + } + if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20) { + return (x / (x * x + y * y)); + } + scale = 1; + SET_HIGH_WORD(scale, 0x7ff00000 - ix); /* 2**(1-ilogb(x)) */ + x *= scale; + y *= scale; + return (x / (x * x + y * y) * scale); +} +#undef BIAS +#undef CUTOFF +#endif +#if @precision@ == 3 +#ifndef HAVE_LDOUBLE_DOUBLE_DOUBLE_BE +#define BIAS (LDBL_MAX_EXP - 1) +#define CUTOFF (LDBL_MANT_DIG / 2 + 1) +static NPY_INLINE npy_longdouble +_real_part_reciprocall(npy_longdouble x, + npy_longdouble y) +{ + npy_longdouble scale; + union IEEEl2bitsrep ux, uy, us; + npy_int32 ix, iy; + + ux.e = x; + ix = GET_LDOUBLE_EXP(ux); + uy.e = y; + iy = GET_LDOUBLE_EXP(uy); + if (ix - iy >= CUTOFF || npy_isinf(x)) { + return (1/x); + } + if (iy - ix >= CUTOFF) { + return (x/y/y); + } + if (ix <= BIAS + LDBL_MAX_EXP / 2 - CUTOFF) { + return (x/(x*x + y*y)); + } + us.e = 1; + SET_LDOUBLE_EXP(us, 0x7fff - ix); + scale = us.e; + x *= scale; + y *= scale; + return (x/(x*x + y*y) * scale); +} +#undef BIAS +#undef CUTOFF +#else +static NPY_INLINE npy_longdouble +_real_part_reciprocall(npy_longdouble x, + npy_longdouble y) +{ + return x/(x*x + y*y); +} +#endif +#endif + +@ctype@ +npy_catanh@c@(@ctype@ z) +{ +#if @precision@ == 1 + /* this is sqrt(3*EPS) */ + const npy_float SQRT_3_EPSILON = 5.9801995673e-4f; + /* chosen such that pio2_hi + pio2_lo == pio2_hi but causes FE_INEXACT. */ + const volatile npy_float pio2_lo = 7.5497899549e-9f; +#endif +#if @precision@ == 2 + const npy_double SQRT_3_EPSILON = 2.5809568279517849e-8; + const volatile npy_double pio2_lo = 6.1232339957367659e-17; +#endif +#if @precision@ == 3 + const npy_longdouble SQRT_3_EPSILON = 5.70316273435758915310e-10l; + const volatile npy_longdouble pio2_lo = 2.710505431213761085e-20l; +#endif + const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@; + const @type@ pio2_hi = NPY_PI_2@c@; + @type@ x, y, ax, ay, rx, ry; + + x = npy_creal@c@(z); + y = npy_cimag@c@(z); + ax = npy_fabs@c@(x); + ay = npy_fabs@c@(y); + + /* This helps handle many cases. */ + if (y == 0 && ax <= 1) { + return npy_cpack@c@(npy_atanh@c@(x), y); + } + + /* To ensure the same accuracy as atan(), and to filter out z = 0. */ + if (x == 0) { + return npy_cpack@c@(x, npy_atan@c@(y)); + } + + if (npy_isnan(x) || npy_isnan(y)) { + /* catanh(+-Inf + I*NaN) = +-0 + I*NaN */ + if (npy_isinf(x)) { + return npy_cpack@c@(npy_copysign@c@(0, x), y + y); + } + /* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */ + if (npy_isinf(y)) { + return npy_cpack@c@(npy_copysign@c@(0, x), + npy_copysign@c@(pio2_hi + pio2_lo, y)); + } + /* + * All other cases involving NaN return NaN + I*NaN. + * C99 leaves it optional whether to raise invalid if one of + * the arguments is not NaN, so we opt not to raise it. + */ + return npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@); + } + + if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { + return npy_cpack@c@(_real_part_reciprocal@c@(x, y), + npy_copysign@c@(pio2_hi + pio2_lo, y)); + } + + if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) { + /* + * z = 0 was filtered out above. All other cases must raise + * inexact, but this is the only only that needs to do it + * explicitly. + */ + raise_inexact(); + return (z); + } + + if (ax == 1 && ay < @TEPS@) { + rx = (NPY_LOGE2@c@ - npy_log@c@(ay)) / 2; + } + else { + rx = npy_log1p@c@(4 * ax / _sum_squares@c@(ax - 1, ay)) / 4; + } + + if (ax == 1) { + ry = npy_atan2@c@(2, -ay) / 2; + } + else if (ay < @TEPS@) { + ry = npy_atan2@c@(2 * ay, (1 - ax) * (1 + ax)) / 2; + } + else { + ry = npy_atan2@c@(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2; + } + + return npy_cpack@c@(npy_copysign@c@(rx, x), npy_copysign@c@(ry, y)); } #endif /**end repeat**/ @@ -245,7 +1753,8 @@ * #KIND = CABS,CARG# */ #ifdef HAVE_@KIND@@C@ -@type@ npy_@kind@@c@(@ctype@ z) +@type@ +npy_@kind@@c@(@ctype@ z) { __@ctype@_to_c99_cast z1; z1.npy_z = z; @@ -255,11 +1764,14 @@ /**end repeat1**/ /**begin repeat1 - * #kind = cexp,clog,csqrt,ccos,csin# - * #KIND = CEXP,CLOG,CSQRT,CCOS,CSIN# + * #kind = cexp,clog,csqrt,ccos,csin,ctan,ccosh,csinh,ctanh, + * cacos,casin,catan,cacosh,casinh,catanh# + * #KIND = CEXP,CLOG,CSQRT,CCOS,CSIN,CTAN,CCOSH,CSINH,CTANH, + * CACOS,CASIN,CATAN,CACOSH,CASINH,CATANH# */ #ifdef HAVE_@KIND@@C@ -@ctype@ npy_@kind@@c@(@ctype@ z) +@ctype@ +npy_@kind@@c@(@ctype@ z) { __@ctype@_to_c99_cast z1; __@ctype@_to_c99_cast ret; @@ -270,22 +1782,5 @@ #endif /**end repeat1**/ -/**begin repeat1 - * #kind = cpow# - * #KIND = CPOW# - */ -#ifdef HAVE_@KIND@@C@ -@ctype@ npy_@kind@@c@(@ctype@ x, @ctype@ y) -{ - __@ctype@_to_c99_cast xcast; - __@ctype@_to_c99_cast ycast; - __@ctype@_to_c99_cast ret; - xcast.npy_z = x; - ycast.npy_z = y; - ret.c99_z = @kind@@c@(xcast.c99_z, ycast.c99_z); - return ret.npy_z; -} -#endif -/**end repeat1**/ /**end repeat**/ diff --git a/numpy/core/src/private/npy_config.h b/numpy/core/src/private/npy_config.h index 882913e2f201..6e98dc7e9907 100644 --- a/numpy/core/src/private/npy_config.h +++ b/numpy/core/src/private/npy_config.h @@ -4,12 +4,6 @@ #include "config.h" #include "numpy/numpyconfig.h" -/* Disable broken MS math functions */ -#if defined(_MSC_VER) || defined(__MINGW32_VERSION) -#undef HAVE_ATAN2 -#undef HAVE_HYPOT -#endif - /* * largest alignment the copy loops might require * required as string, void and complex types might get copied using larger @@ -21,9 +15,56 @@ */ #define NPY_MAX_COPY_ALIGNMENT 16 +/* blacklist */ + /* Disable broken Sun Workshop Pro math functions */ #ifdef __SUNPRO_C + +#undef HAVE_ATAN2 +#undef HAVE_ATAN2F +#undef HAVE_ATAN2L + +#endif + +/* Disable broken MS math functions */ +#if defined(_MSC_VER) || defined(__MINGW32_VERSION) + #undef HAVE_ATAN2 +#undef HAVE_ATAN2F +#undef HAVE_ATAN2L + +#undef HAVE_HYPOT +#undef HAVE_HYPOTF +#undef HAVE_HYPOTL + #endif +/* Disable broken gnu trig functions on linux */ +#if defined(__linux__) && defined(__GNUC__) + +#if defined(HAVE_FEATURES_H) +#include +#define TRIG_OK __GLIBC_PREREQ(2, 16) +#else +#define TRIG_OK 0 +#endif + +#if !TRIG_OK +#undef HAVE_CASIN +#undef HAVE_CASINF +#undef HAVE_CASINL +#undef HAVE_CASINH +#undef HAVE_CASINHF +#undef HAVE_CASINHL +#undef HAVE_CATAN +#undef HAVE_CATANF +#undef HAVE_CATANL +#undef HAVE_CATANH +#undef HAVE_CATANHF +#undef HAVE_CATANHL +#endif +#undef TRIG_OK + +#endif /* defined(__linux__) && defined(__GNUC__) */ + #endif diff --git a/numpy/core/src/umath/funcs.inc.src b/numpy/core/src/umath/funcs.inc.src index 3aad44c9feec..f0fa3e9dd81c 100644 --- a/numpy/core/src/umath/funcs.inc.src +++ b/numpy/core/src/umath/funcs.inc.src @@ -177,49 +177,8 @@ npy_ObjectLogicalNot(PyObject *i1) * #ctype = npy_cfloat, npy_cdouble, npy_clongdouble# * #ftype = npy_float, npy_double, npy_longdouble# * #c = f, ,l# - * #C = F, ,L# - * #precision = 1,2,4# */ -/* - * Perform the operation result := 1 + coef * x * result, - * with real coefficient `coef`. - */ -#define SERIES_HORNER_TERM@c@(result, x, coef) \ - do { \ - nc_prod@c@((result), (x), (result)); \ - (result)->real *= (coef); \ - (result)->imag *= (coef); \ - nc_sum@c@((result), &nc_1@c@, (result)); \ - } while(0) - -/* constants */ -static @ctype@ nc_1@c@ = {1., 0.}; -static @ctype@ nc_half@c@ = {0.5, 0.}; -static @ctype@ nc_i@c@ = {0., 1.}; -static @ctype@ nc_i2@c@ = {0., 0.5}; -/* - * static @ctype@ nc_mi@c@ = {0.0@c@, -1.0@c@}; - * static @ctype@ nc_pi2@c@ = {NPY_PI_2@c@., 0.0@c@}; - */ - - -static void -nc_sum@c@(@ctype@ *a, @ctype@ *b, @ctype@ *r) -{ - r->real = a->real + b->real; - r->imag = a->imag + b->imag; - return; -} - -static void -nc_diff@c@(@ctype@ *a, @ctype@ *b, @ctype@ *r) -{ - r->real = a->real - b->real; - r->imag = a->imag - b->imag; - return; -} - static void nc_neg@c@(@ctype@ *a, @ctype@ *r) { @@ -228,26 +187,6 @@ nc_neg@c@(@ctype@ *a, @ctype@ *r) return; } -static void -nc_prod@c@(@ctype@ *a, @ctype@ *b, @ctype@ *r) -{ - @ftype@ ar=a->real, br=b->real, ai=a->imag, bi=b->imag; - r->real = ar*br - ai*bi; - r->imag = ar*bi + ai*br; - return; -} - -static void -nc_quot@c@(@ctype@ *a, @ctype@ *b, @ctype@ *r) -{ - - @ftype@ ar=a->real, br=b->real, ai=a->imag, bi=b->imag; - @ftype@ d = br*br + bi*bi; - r->real = (ar*br + ai*bi)/d; - r->imag = (ai*br - ar*bi)/d; - return; -} - static void nc_sqrt@c@(@ctype@ *x, @ctype@ *r) { @@ -307,164 +246,28 @@ nc_expm1@c@(@ctype@ *x, @ctype@ *r) static void nc_pow@c@(@ctype@ *a, @ctype@ *b, @ctype@ *r) { - npy_intp n; - @ftype@ ar = npy_creal@c@(*a); - @ftype@ br = npy_creal@c@(*b); - @ftype@ ai = npy_cimag@c@(*a); - @ftype@ bi = npy_cimag@c@(*b); - - if (br == 0. && bi == 0.) { - *r = npy_cpack@c@(1., 0.); - return; - } - if (ar == 0. && ai == 0.) { - if (br > 0 && bi == 0) { - *r = npy_cpack@c@(0., 0.); - } - else { - volatile @ftype@ tmp = NPY_INFINITY; - /* NB: there are four complex zeros; c0 = (+-0, +-0), so that unlike - * for reals, c0**p, with `p` negative is in general - * ill-defined. - * - * c0**z with z complex is also ill-defined. - */ - *r = npy_cpack@c@(NPY_NAN, NPY_NAN); - - /* Raise invalid */ - tmp -= NPY_INFINITY; - ar = tmp; - } - return; - } - if (bi == 0 && (n=(npy_intp)br) == br) { - if (n == 1) { - /* unroll: handle inf better */ - *r = npy_cpack@c@(ar, ai); - return; - } - else if (n == 2) { - /* unroll: handle inf better */ - nc_prod@c@(a, a, r); - return; - } - else if (n == 3) { - /* unroll: handle inf better */ - nc_prod@c@(a, a, r); - nc_prod@c@(a, r, r); - return; - } - else if (n > -100 && n < 100) { - @ctype@ p, aa; - npy_intp mask = 1; - if (n < 0) n = -n; - aa = nc_1@c@; - p = npy_cpack@c@(ar, ai); - while (1) { - if (n & mask) - nc_prod@c@(&aa,&p,&aa); - mask <<= 1; - if (n < mask || mask <= 0) break; - nc_prod@c@(&p,&p,&p); - } - *r = npy_cpack@c@(npy_creal@c@(aa), npy_cimag@c@(aa)); - if (br < 0) nc_quot@c@(&nc_1@c@, r, r); - return; - } - } - - *r = npy_cpow@c@(*a, *b); + *r = npy_cpow@c@(*a, *b); return; } - -static void -nc_prodi@c@(@ctype@ *x, @ctype@ *r) -{ - @ftype@ xr = x->real; - r->real = -x->imag; - r->imag = xr; - return; -} - - static void nc_acos@c@(@ctype@ *x, @ctype@ *r) { - /* - * return nc_neg(nc_prodi(nc_log(nc_sum(x,nc_prod(nc_i, - * nc_sqrt(nc_diff(nc_1,nc_prod(x,x)))))))); - */ - nc_prod@c@(x,x,r); - nc_diff@c@(&nc_1@c@, r, r); - nc_sqrt@c@(r, r); - nc_prodi@c@(r, r); - nc_sum@c@(x, r, r); - nc_log@c@(r, r); - nc_prodi@c@(r, r); - nc_neg@c@(r, r); + *r = npy_cacos@c@(*x); return; } static void nc_acosh@c@(@ctype@ *x, @ctype@ *r) { - /* - * return nc_log(nc_sum(x, - * nc_prod(nc_sqrt(nc_sum(x,nc_1)), nc_sqrt(nc_diff(x,nc_1))))); - */ - @ctype@ t; - - nc_sum@c@(x, &nc_1@c@, &t); - nc_sqrt@c@(&t, &t); - nc_diff@c@(x, &nc_1@c@, r); - nc_sqrt@c@(r, r); - nc_prod@c@(&t, r, r); - nc_sum@c@(x, r, r); - nc_log@c@(r, r); + *r = npy_cacosh@c@(*x); return; } static void nc_asin@c@(@ctype@ *x, @ctype@ *r) { - /* - * return nc_neg(nc_prodi(nc_log(nc_sum(nc_prod(nc_i,x), - * nc_sqrt(nc_diff(nc_1,nc_prod(x,x))))))); - */ - if (fabs(x->real) > 1e-3 || fabs(x->imag) > 1e-3) { - @ctype@ a, *pa=&a; - nc_prod@c@(x, x, r); - nc_diff@c@(&nc_1@c@, r, r); - nc_sqrt@c@(r, r); - nc_prodi@c@(x, pa); - nc_sum@c@(pa, r, r); - nc_log@c@(r, r); - nc_prodi@c@(r, r); - nc_neg@c@(r, r); - } - else { - /* - * Small arguments: series expansion, to avoid loss of precision - * asin(x) = x [1 + (1/6) x^2 [1 + (9/20) x^2 [1 + ...]]] - * - * |x| < 1e-3 => |rel. error| < 1e-18 (f), 1e-24, 1e-36 (l) - */ - @ctype@ x2; - nc_prod@c@(x, x, &x2); - - *r = nc_1@c@; -#if @precision@ >= 3 - SERIES_HORNER_TERM@c@(r, &x2, 81.0@C@/110); - SERIES_HORNER_TERM@c@(r, &x2, 49.0@C@/72); -#endif -#if @precision@ >= 2 - SERIES_HORNER_TERM@c@(r, &x2, 25.0@C@/42); -#endif - SERIES_HORNER_TERM@c@(r, &x2, 9.0@C@/20); - SERIES_HORNER_TERM@c@(r, &x2, 1.0@C@/6); - nc_prod@c@(r, x, r); - } + *r = npy_casin@c@(*x); return; } @@ -472,134 +275,35 @@ nc_asin@c@(@ctype@ *x, @ctype@ *r) static void nc_asinh@c@(@ctype@ *x, @ctype@ *r) { - /* - * return nc_log(nc_sum(nc_sqrt(nc_sum(nc_1,nc_prod(x,x))),x)); - */ - if (fabs(x->real) > 1e-3 || fabs(x->imag) > 1e-3) { - nc_prod@c@(x, x, r); - nc_sum@c@(&nc_1@c@, r, r); - nc_sqrt@c@(r, r); - nc_sum@c@(r, x, r); - nc_log@c@(r, r); - } - else { - /* - * Small arguments: series expansion, to avoid loss of precision - * asinh(x) = x [1 - (1/6) x^2 [1 - (9/20) x^2 [1 - ...]]] - * - * |x| < 1e-3 => |rel. error| < 1e-18 (f), 1e-24, 1e-36 (l) - */ - @ctype@ x2; - nc_prod@c@(x, x, &x2); - - *r = nc_1@c@; -#if @precision@ >= 3 - SERIES_HORNER_TERM@c@(r, &x2, -81.0@C@/110); - SERIES_HORNER_TERM@c@(r, &x2, -49.0@C@/72); -#endif -#if @precision@ >= 2 - SERIES_HORNER_TERM@c@(r, &x2, -25.0@C@/42); -#endif - SERIES_HORNER_TERM@c@(r, &x2, -9.0@C@/20); - SERIES_HORNER_TERM@c@(r, &x2, -1.0@C@/6); - nc_prod@c@(r, x, r); - } + *r = npy_casinh@c@(*x); return; } static void nc_atan@c@(@ctype@ *x, @ctype@ *r) { - /* - * return nc_prod(nc_i2,nc_log(nc_quot(nc_sum(nc_i,x),nc_diff(nc_i,x)))); - */ - if (fabs(x->real) > 1e-3 || fabs(x->imag) > 1e-3) { - @ctype@ a, *pa=&a; - nc_diff@c@(&nc_i@c@, x, pa); - nc_sum@c@(&nc_i@c@, x, r); - nc_quot@c@(r, pa, r); - nc_log@c@(r,r); - nc_prod@c@(&nc_i2@c@, r, r); - } - else { - /* - * Small arguments: series expansion, to avoid loss of precision - * atan(x) = x [1 - (1/3) x^2 [1 - (3/5) x^2 [1 - ...]]] - * - * |x| < 1e-3 => |rel. error| < 1e-18 (f), 1e-24, 1e-36 (l) - */ - @ctype@ x2; - nc_prod@c@(x, x, &x2); - - *r = nc_1@c@; -#if @precision@ >= 3 - SERIES_HORNER_TERM@c@(r, &x2, -9.0@C@/11); - SERIES_HORNER_TERM@c@(r, &x2, -7.0@C@/9); -#endif -#if @precision@ >= 2 - SERIES_HORNER_TERM@c@(r, &x2, -5.0@C@/7); -#endif - SERIES_HORNER_TERM@c@(r, &x2, -3.0@C@/5); - SERIES_HORNER_TERM@c@(r, &x2, -1.0@C@/3); - nc_prod@c@(r, x, r); - } + *r = npy_catan@c@(*x); return; } static void nc_atanh@c@(@ctype@ *x, @ctype@ *r) { - /* - * return nc_prod(nc_half,nc_log(nc_quot(nc_sum(nc_1,x),nc_diff(nc_1,x)))); - */ - if (fabs(x->real) > 1e-3 || fabs(x->imag) > 1e-3) { - @ctype@ a, *pa=&a; - nc_diff@c@(&nc_1@c@, x, r); - nc_sum@c@(&nc_1@c@, x, pa); - nc_quot@c@(pa, r, r); - nc_log@c@(r, r); - nc_prod@c@(&nc_half@c@, r, r); - } - else { - /* - * Small arguments: series expansion, to avoid loss of precision - * atan(x) = x [1 + (1/3) x^2 [1 + (3/5) x^2 [1 + ...]]] - * - * |x| < 1e-3 => |rel. error| < 1e-18 (f), 1e-24, 1e-36 (l) - */ - @ctype@ x2; - nc_prod@c@(x, x, &x2); - - *r = nc_1@c@; -#if @precision@ >= 3 - SERIES_HORNER_TERM@c@(r, &x2, 9.0@C@/11); - SERIES_HORNER_TERM@c@(r, &x2, 7.0@C@/9); -#endif -#if @precision@ >= 2 - SERIES_HORNER_TERM@c@(r, &x2, 5.0@C@/7); -#endif - SERIES_HORNER_TERM@c@(r, &x2, 3.0@C@/5); - SERIES_HORNER_TERM@c@(r, &x2, 1.0@C@/3); - nc_prod@c@(r, x, r); - } + *r = npy_catanh@c@(*x); return; } static void nc_cos@c@(@ctype@ *x, @ctype@ *r) { - @ftype@ xr=x->real, xi=x->imag; - r->real = npy_cos@c@(xr)*npy_cosh@c@(xi); - r->imag = -npy_sin@c@(xr)*npy_sinh@c@(xi); + *r = npy_ccos@c@(*x); return; } static void nc_cosh@c@(@ctype@ *x, @ctype@ *r) { - @ftype@ xr=x->real, xi=x->imag; - r->real = npy_cos@c@(xi)*npy_cosh@c@(xr); - r->imag = npy_sin@c@(xi)*npy_sinh@c@(xr); + *r = npy_ccosh@c@(*x); return; } @@ -624,63 +328,29 @@ nc_log2@c@(@ctype@ *x, @ctype@ *r) static void nc_sin@c@(@ctype@ *x, @ctype@ *r) { - @ftype@ xr=x->real, xi=x->imag; - r->real = npy_sin@c@(xr)*npy_cosh@c@(xi); - r->imag = npy_cos@c@(xr)*npy_sinh@c@(xi); + *r = npy_csin@c@(*x); return; } static void nc_sinh@c@(@ctype@ *x, @ctype@ *r) { - @ftype@ xr=x->real, xi=x->imag; - r->real = npy_cos@c@(xi)*npy_sinh@c@(xr); - r->imag = npy_sin@c@(xi)*npy_cosh@c@(xr); + *r = npy_csinh@c@(*x); return; } static void nc_tan@c@(@ctype@ *x, @ctype@ *r) { - @ftype@ sr,cr,shi,chi; - @ftype@ rs,is,rc,ic; - @ftype@ d; - @ftype@ xr=x->real, xi=x->imag; - sr = npy_sin@c@(xr); - cr = npy_cos@c@(xr); - shi = npy_sinh@c@(xi); - chi = npy_cosh@c@(xi); - rs = sr*chi; - is = cr*shi; - rc = cr*chi; - ic = -sr*shi; - d = rc*rc + ic*ic; - r->real = (rs*rc+is*ic)/d; - r->imag = (is*rc-rs*ic)/d; - return; + *r = npy_ctan@c@(*x); + return; } static void nc_tanh@c@(@ctype@ *x, @ctype@ *r) { - @ftype@ si,ci,shr,chr; - @ftype@ rs,is,rc,ic; - @ftype@ d; - @ftype@ xr=x->real, xi=x->imag; - si = npy_sin@c@(xi); - ci = npy_cos@c@(xi); - shr = npy_sinh@c@(xr); - chr = npy_cosh@c@(xr); - rs = ci*shr; - is = si*chr; - rc = ci*chr; - ic = si*shr; - d = rc*rc + ic*ic; - r->real = (rs*rc+is*ic)/d; - r->imag = (is*rc-rs*ic)/d; + *r = npy_ctanh@c@(*x); return; } -#undef SERIES_HORNER_TERM@c@ - /**end repeat**/ diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index c71b7b6585a6..092953872774 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -1363,45 +1363,53 @@ def test_precisions_consistent(self) : def test_branch_cuts(self): # check branch cuts and continuity on them - yield _check_branch_cut, np.log, -0.5, 1j, 1, -1 - yield _check_branch_cut, np.log2, -0.5, 1j, 1, -1 - yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1 - yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1 - yield _check_branch_cut, np.sqrt, -0.5, 1j, 1, -1 + yield _check_branch_cut, np.log, -0.5, 1j, 1, -1, True + yield _check_branch_cut, np.log2, -0.5, 1j, 1, -1, True + yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1, True + yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1, True + yield _check_branch_cut, np.sqrt, -0.5, 1j, 1, -1, True - yield _check_branch_cut, np.arcsin, [ -2, 2], [1j, -1j], 1, -1 - yield _check_branch_cut, np.arccos, [ -2, 2], [1j, -1j], 1, -1 - yield _check_branch_cut, np.arctan, [-2j, 2j], [1, -1 ], -1, 1 + yield _check_branch_cut, np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True + yield _check_branch_cut, np.arccos, [ -2, 2], [1j, 1j], 1, -1, True + yield _check_branch_cut, np.arctan, [0-2j, 2j], [1, 1 ], -1, 1, True - yield _check_branch_cut, np.arcsinh, [-2j, 2j], [-1, 1], -1, 1 - yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1 - yield _check_branch_cut, np.arctanh, [ -2, 2], [1j, -1j], 1, -1 + yield _check_branch_cut, np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True + yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True + yield _check_branch_cut, np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True # check against bogus branch cuts: assert continuity between quadrants - yield _check_branch_cut, np.arcsin, [-2j, 2j], [ 1, 1], 1, 1 - yield _check_branch_cut, np.arccos, [-2j, 2j], [ 1, 1], 1, 1 + yield _check_branch_cut, np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1 + yield _check_branch_cut, np.arccos, [0-2j, 2j], [ 1, 1], 1, 1 yield _check_branch_cut, np.arctan, [ -2, 2], [1j, 1j], 1, 1 yield _check_branch_cut, np.arcsinh, [ -2, 2, 0], [1j, 1j, 1 ], 1, 1 - yield _check_branch_cut, np.arccosh, [-2j, 2j, 2], [1, 1, 1j], 1, 1 - yield _check_branch_cut, np.arctanh, [-2j, 2j, 0], [1, 1, 1j], 1, 1 + yield _check_branch_cut, np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1 + yield _check_branch_cut, np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1 - @dec.knownfailureif(True, "These branch cuts are known to fail") - def test_branch_cuts_failing(self): - # XXX: signed zero not OK with ICC on 64-bit platform for log, see - # http://permalink.gmane.org/gmane.comp.python.numeric.general/25335 - yield _check_branch_cut, np.log, -0.5, 1j, 1, -1, True - yield _check_branch_cut, np.log2, -0.5, 1j, 1, -1, True - yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1, True - yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1, True - # XXX: signed zeros are not OK for sqrt or for the arc* functions - yield _check_branch_cut, np.sqrt, -0.5, 1j, 1, -1, True - yield _check_branch_cut, np.arcsin, [ -2, 2], [1j, -1j], 1, -1, True - yield _check_branch_cut, np.arccos, [ -2, 2], [1j, -1j], 1, -1, True - yield _check_branch_cut, np.arctan, [-2j, 2j], [1, -1 ], -1, 1, True - yield _check_branch_cut, np.arcsinh, [-2j, 2j], [-1, 1], -1, 1, True - yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True - yield _check_branch_cut, np.arctanh, [ -2, 2], [1j, -1j], 1, -1, True + def test_branch_cuts_complex64(self): + # check branch cuts and continuity on them + yield _check_branch_cut, np.log, -0.5, 1j, 1, -1, True, np.complex64 + yield _check_branch_cut, np.log2, -0.5, 1j, 1, -1, True, np.complex64 + yield _check_branch_cut, np.log10, -0.5, 1j, 1, -1, True, np.complex64 + yield _check_branch_cut, np.log1p, -1.5, 1j, 1, -1, True, np.complex64 + yield _check_branch_cut, np.sqrt, -0.5, 1j, 1, -1, True, np.complex64 + + yield _check_branch_cut, np.arcsin, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64 + yield _check_branch_cut, np.arccos, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64 + yield _check_branch_cut, np.arctan, [0-2j, 2j], [1, 1 ], -1, 1, True, np.complex64 + + yield _check_branch_cut, np.arcsinh, [0-2j, 2j], [1, 1], -1, 1, True, np.complex64 + yield _check_branch_cut, np.arccosh, [ -1, 0.5], [1j, 1j], 1, -1, True, np.complex64 + yield _check_branch_cut, np.arctanh, [ -2, 2], [1j, 1j], 1, -1, True, np.complex64 + + # check against bogus branch cuts: assert continuity between quadrants + yield _check_branch_cut, np.arcsin, [0-2j, 2j], [ 1, 1], 1, 1, False, np.complex64 + yield _check_branch_cut, np.arccos, [0-2j, 2j], [ 1, 1], 1, 1, False, np.complex64 + yield _check_branch_cut, np.arctan, [ -2, 2], [1j, 1j], 1, 1, False, np.complex64 + + yield _check_branch_cut, np.arcsinh, [ -2, 2, 0], [1j, 1j, 1 ], 1, 1, False, np.complex64 + yield _check_branch_cut, np.arccosh, [0-2j, 2j, 2], [1, 1, 1j], 1, 1, False, np.complex64 + yield _check_branch_cut, np.arctanh, [0-2j, 2j, 0], [1, 1, 1j], 1, 1, False, np.complex64 def test_against_cmath(self): import cmath, sys @@ -1466,7 +1474,7 @@ def check(x, rtol): # So, give more leeway for long complex tests here: check(x_series, 50*eps) else: - check(x_series, 2*eps) + check(x_series, 2.1*eps) check(x_basic, 2*eps/1e-3) # Check a few points @@ -1565,8 +1573,12 @@ def _check_branch_cut(f, x0, dx, re_sign=1, im_sign=-1, sig_zero_ok=False, x0 = np.atleast_1d(x0).astype(dtype) dx = np.atleast_1d(dx).astype(dtype) - scale = np.finfo(dtype).eps * 1e3 - atol = 1e-4 + if np.dtype(dtype).char == 'F': + scale = np.finfo(dtype).eps * 1e2 + atol = np.float32(1e-2) + else: + scale = np.finfo(dtype).eps * 1e3 + atol = 1e-4 y0 = f(x0) yp = f(x0 + dx*scale*np.absolute(x0)/np.absolute(dx)) @@ -1581,16 +1593,20 @@ def _check_branch_cut(f, x0, dx, re_sign=1, im_sign=-1, sig_zero_ok=False, # check that signed zeros also work as a displacement jr = (x0.real == 0) & (dx.real != 0) ji = (x0.imag == 0) & (dx.imag != 0) - - x = -x0 - x.real[jr] = 0.*dx.real - x.imag[ji] = 0.*dx.imag - x = -x - ym = f(x) - ym = ym[jr | ji] - y0 = y0[jr | ji] - assert_(np.all(np.absolute(y0.real - ym.real*re_sign) < atol), (y0, ym)) - assert_(np.all(np.absolute(y0.imag - ym.imag*im_sign) < atol), (y0, ym)) + if np.any(jr): + x = x0[jr] + x.real = np.NZERO + ym = f(x) + assert_(np.all(np.absolute(y0[jr].real - ym.real*re_sign) < atol), (y0[jr], ym)) + assert_(np.all(np.absolute(y0[jr].imag - ym.imag*im_sign) < atol), (y0[jr], ym)) + + + if np.any(ji): + x = x0[ji] + x.imag = np.NZERO + ym = f(x) + assert_(np.all(np.absolute(y0[ji].real - ym.real*re_sign) < atol), (y0[ji], ym)) + assert_(np.all(np.absolute(y0[ji].imag - ym.imag*im_sign) < atol), (y0[ji], ym)) def test_copysign(): assert_(np.copysign(1, -1) == -1)