Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Tree: 5704885160
Fetching contributors…

Cannot retrieve contributors at this time

737 lines (612 sloc) 22.602 kB
""" Utilties for casting numpy values in various ways
Most routines work round some numpy oddities in floating point precision and
casting. Others work round numpy casting to and from python ints
"""
from platform import processor, machine
import numpy as np
class CastingError(Exception):
pass
def float_to_int(arr, int_type, nan2zero=True, infmax=False):
""" Convert floating point array `arr` to type `int_type`
* Rounds numbers to nearest integer
* Clips values to prevent overflows when casting
* Converts NaN to 0 (for `nan2zero`==True
Casting floats to integers is delicate because the result is undefined
and platform specific for float values outside the range of `int_type`.
Define ``shared_min`` to be the minimum value that can be exactly
represented in both the float type of `arr` and `int_type`. Define
`shared_max` to be the equivalent maximum value. To avoid undefined results
we threshold `arr` at ``shared_min`` and ``shared_max``.
Parameters
----------
arr : array-like
Array of floating point type
int_type : object
Numpy integer type
nan2zero : {True, False, None}
Whether to convert NaN value to zero. Default is True. If False, and
NaNs are present, raise CastingError. If None, do not check for NaN
values and pass through directly to the ``astype`` casting mechanism.
In this last case, the resulting value is undefined.
infmax : {False, True}
If True, set np.inf values in `arr` to be `int_type` integer maximum
value, -np.inf as `int_type` integer minimum. If False, set +/- infs to
be ``shared_min``, ``shared_max`` as defined above. Therefore False
gives faster conversion at the expense of infs that are further from
infinity.
Returns
-------
iarr : ndarray
of type `int_type`
Examples
--------
>>> float_to_int([np.nan, np.inf, -np.inf, 1.1, 6.6], np.int16)
array([ 0, 32767, -32768, 1, 7], dtype=int16)
Notes
-----
Numpy relies on the C library to cast from float to int using the standard
``astype`` method of the array.
Quoting from section F4 of the C99 standard:
If the floating value is infinite or NaN or if the integral part of the
floating value exceeds the range of the integer type, then the
"invalid" floating-point exception is raised and the resulting value
is unspecified.
Hence we threshold at ``shared_min`` and ``shared_max`` to avoid casting to
values that are undefined.
See: http://en.wikipedia.org/wiki/C99 . There are links to the C99 standard
from that page.
"""
arr = np.asarray(arr)
flt_type = arr.dtype.type
int_type = np.dtype(int_type).type
# Deal with scalar as input; fancy indexing needs 1D
shape = arr.shape
arr = np.atleast_1d(arr)
mn, mx = shared_range(flt_type, int_type)
if nan2zero is None:
seen_nans = False
else:
nans = np.isnan(arr)
seen_nans = np.any(nans)
if nan2zero == False and seen_nans:
raise CastingError('NaNs in array, nan2zero is False')
iarr = np.clip(np.rint(arr), mn, mx).astype(int_type)
if seen_nans:
iarr[nans] = 0
if not infmax:
return iarr.reshape(shape)
ii = np.iinfo(int_type)
iarr[arr == np.inf] = ii.max
if ii.min != int(mn):
iarr[arr == -np.inf] = ii.min
return iarr.reshape(shape)
# Cache range values
_SHARED_RANGES = {}
def shared_range(flt_type, int_type):
""" Min and max in float type that are >=min, <=max in integer type
This is not as easy as it sounds, because the float type may not be able to
exactly represent the max or min integer values, so we have to find the next
exactly representable floating point value to do the thresholding.
Parameters
----------
flt_type : dtype specifier
A dtype specifier referring to a numpy floating point type. For
example, ``f4``, ``np.dtype('f4')``, ``np.float32`` are equivalent.
int_type : dtype specifier
A dtype specifier referring to a numpy integer type. For example,
``i4``, ``np.dtype('i4')``, ``np.int32`` are equivalent
Returns
-------
mn : object
Number of type `flt_type` that is the minumum value in the range of
`int_type`, such that ``mn.astype(int_type)`` >= min of `int_type`
mx : object
Number of type `flt_type` that is the maximum value in the range of
`int_type`, such that ``mx.astype(int_type)`` <= max of `int_type`
Examples
--------
>>> shared_range(np.float32, np.int32) == (-2147483648.0, 2147483520.0)
True
>>> shared_range('f4', 'i4') == (-2147483648.0, 2147483520.0)
True
"""
flt_type = np.dtype(flt_type).type
int_type = np.dtype(int_type).type
key = (flt_type, int_type)
# Used cached value if present
try:
return _SHARED_RANGES[key]
except KeyError:
pass
ii = np.iinfo(int_type)
fi = np.finfo(flt_type)
mn = ceil_exact(ii.min, flt_type)
if mn == -np.inf:
mn = fi.min
mx = floor_exact(ii.max, flt_type)
if mx == np.inf:
mx = fi.max
_SHARED_RANGES[key] = (mn, mx)
return mn, mx
# ----------------------------------------------------------------------------
# Routines to work out the next lowest representable integer in floating point
# types.
# ----------------------------------------------------------------------------
try:
_float16 = np.float16
except AttributeError: # float16 not present in np < 1.6
_float16 = None
class FloatingError(Exception):
pass
def on_powerpc():
""" True if we are running on a Power PC platform
Has to deal with older Macs and IBM POWER7 series among others
"""
return processor() == 'powerpc' or machine().startswith('ppc')
def type_info(np_type):
""" Return dict with min, max, nexp, nmant, width for numpy type `np_type`
Type can be integer in which case nexp and nmant are None.
Parameters
----------
np_type : numpy type specifier
Any specifier for a numpy dtype
Returns
-------
info : dict
with fields ``min`` (minimum value), ``max`` (maximum value), ``nexp``
(exponent width), ``nmant`` (significand precision not including
implicit first digit), ``minexp`` (minimum exponent), ``maxexp``
(maximum exponent), ``width`` (width in bytes). (``nexp``, ``nmant``,
``minexp``, ``maxexp``) are None for integer types. Both ``min`` and
``max`` are of type `np_type`.
Raises
------
FloatingError : for floating point types we don't recognize
Notes
-----
You might be thinking that ``np.finfo`` does this job, and it does, except
for PPC long doubles (http://projects.scipy.org/numpy/ticket/2077) and
float96 on Windows compiled with Mingw. This routine protects against such
errors in ``np.finfo`` by only accepting values that we know are likely to
be correct.
"""
dt = np.dtype(np_type)
np_type = dt.type
width = dt.itemsize
try: # integer type
info = np.iinfo(dt)
except ValueError:
pass
else:
return dict(min=np_type(info.min), max=np_type(info.max), minexp=None,
maxexp=None, nmant=None, nexp=None, width=width)
info = np.finfo(dt)
# Trust the standard IEEE types
nmant, nexp = info.nmant, info.nexp
ret = dict(min=np_type(info.min),
max=np_type(info.max),
nmant=nmant,
nexp=nexp,
minexp=info.minexp,
maxexp=info.maxexp,
width=width)
if np_type in (_float16, np.float32, np.float64,
np.complex64, np.complex128):
return ret
info_64 = np.finfo(np.float64)
if dt.kind == 'c':
assert np_type is np.longcomplex
vals = (nmant, nexp, width / 2)
else:
assert np_type is np.longdouble
vals = (nmant, nexp, width)
if vals in ((112, 15, 16), # binary128
(info_64.nmant, info_64.nexp, 8), # float64
(63, 15, 12), (63, 15, 16)): # Intel extended 80
return ret # these are OK without modification
# The remaining types are longdoubles with bad finfo values. Some we
# correct, others we wait to hear of errors.
# We start with float64 as basis
ret = type_info(np.float64)
if vals in ((52, 15, 12), # windows float96
(52, 15, 16)): # windows float128?
# On windows 32 bit at least, float96 is Intel 80 storage but operating
# at float64 precision. The finfo values give nexp == 15 (as for intel
# 80) but in calculations nexp in fact appears to be 11 as for float64
ret.update(dict(width=width))
return ret
# Oh dear, we don't recognize the type information. Try some known types
# and then give up. At this stage we're expecting exotic longdouble or their
# complex equivalent.
if not np_type in (np.longdouble, np.longcomplex) or width not in (16, 32):
raise FloatingError('We had not expected type %s' % np_type)
if (vals == (1, 1, 16) and on_powerpc() and
_check_maxexp(np.longdouble, 1024)):
# double pair on PPC. The _check_nmant routine does not work for this
# type, hence the powerpc platform check instead
ret.update(dict(nmant = 106, width=width))
elif (_check_nmant(np.longdouble, 52) and
_check_maxexp(np.longdouble, 11)):
# Got float64 despite everything
pass
elif (_check_nmant(np.longdouble, 112) and
_check_maxexp(np.longdouble, 16384)):
# binary 128, but with some busted type information. np.longcomplex
# seems to break here too, so we need to use np.longdouble and
# complexify
two = np.longdouble(2)
# See: http://matthew-brett.github.com/pydagogue/floating_point.html
max_val = (two ** 113 - 1) / (two ** 112) * two ** 16383
if np_type is np.longcomplex:
max_val += 0j
ret = dict(min = -max_val,
max= max_val,
nmant = 112,
nexp = 15,
minexp = -16382,
maxexp = 16384,
width = width)
else: # don't recognize the type
raise FloatingError('We had not expected long double type %s '
'with info %s' % (np_type, info))
return ret
def _check_nmant(np_type, nmant):
""" True if fp type `np_type` seems to have `nmant` significand digits
Note 'digits' does not include implicit digits. And in fact if there are no
implicit digits, the `nmant` number is one less than the actual digits.
Assumes base 2 representation.
Parameters
----------
np_type : numpy type specifier
Any specifier for a numpy dtype
nmant : int
Number of digits to test against
Returns
-------
tf : bool
True if `nmant` is the correct number of significand digits, false
otherwise
"""
np_type = np.dtype(np_type).type
max_contig = np_type(2 ** (nmant + 1)) # maximum of contiguous integers
tests = max_contig + np.array([-2, -1, 0, 1, 2], dtype=np_type)
return np.all(tests - max_contig == [-2, -1, 0, 0, 2])
def _check_maxexp(np_type, maxexp):
""" True if fp type `np_type` seems to have `maxexp` maximum exponent
We're testing "maxexp" as returned by numpy. This value is set to one
greater than the maximum power of 2 that `np_type` can represent.
Assumes base 2 representation. Very crude check
Parameters
----------
np_type : numpy type specifier
Any specifier for a numpy dtype
maxexp : int
Maximum exponent to test against
Returns
-------
tf : bool
True if `maxexp` is the correct maximum exponent, False otherwise.
"""
dt = np.dtype(np_type)
np_type = dt.type
two = np_type(2).reshape((1,)) # to avoid upcasting
return (np.isfinite(two ** (maxexp - 1)) and
not np.isfinite(two ** maxexp))
def as_int(x, check=True):
""" Return python integer representation of number
This is useful because the numpy int(val) mechanism is broken for large
values in np.longdouble.
It is also useful to work around a numpy 1.4.1 bug in conversion of uints to
python ints.
This routine will still raise an OverflowError for values that are outside
the range of float64.
Parameters
----------
x : object
integer, unsigned integer or floating point value
check : {True, False}
If True, raise error for values that are not integers
Returns
-------
i : int
Python integer
Examples
--------
>>> as_int(2.0)
2
>>> as_int(-2.0)
-2
>>> as_int(2.1) #doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
FloatingError: Not an integer: 2.1
>>> as_int(2.1, check=False)
2
"""
x = np.array(x)
if x.dtype.kind in 'iu':
# This works around a nasty numpy 1.4.1 bug such that:
# >>> int(np.uint32(2**32-1)
# -1
return int(str(x))
ix = int(x)
if ix == x:
return ix
fx = np.floor(x)
if check and fx != x:
raise FloatingError('Not an integer: %s' % x)
if not fx.dtype.type == np.longdouble:
return int(x)
# Subtract float64 chunks until we have all of the number. If the int is too
# large, it will overflow
ret = 0
while fx != 0:
f64 = np.float64(fx)
fx -= f64
ret += int(f64)
return ret
def int_to_float(val, flt_type):
""" Convert integer `val` to floating point type `flt_type`
Why is this so complicated?
At least in numpy <= 1.6.1, numpy longdoubles do not correctly convert to
ints, and ints do not correctly convert to longdoubles. Specifically, in
both cases, the values seem to go through float64 conversion on the way, so
to convert better, we need to split into float64s and sum up the result.
Parameters
----------
val : int
Integer value
flt_type : object
numpy floating point type
Returns
-------
f : numpy scalar
of type `flt_type`
"""
if not flt_type is np.longdouble:
return flt_type(val)
faval = np.longdouble(0)
while val != 0:
f64 = np.float64(val)
faval += f64
val -= int(f64)
return faval
def floor_exact(val, flt_type):
""" Return nearest exact integer <= `val` in float type `flt_type`
Parameters
----------
val : int
We have to pass val as an int rather than the floating point type
because large integers cast as floating point may be rounded by the
casting process.
flt_type : numpy type
numpy float type.
Returns
-------
floor_val : object
value of same floating point type as `val`, that is the nearest exact
integer in this type such that `floor_val` <= `val`. Thus if `val` is
exact in `flt_type`, `floor_val` == `val`.
Examples
--------
Obviously 2 is within the range of representable integers for float32
>>> floor_exact(2, np.float32)
2.0
As is 2**24-1 (the number of significand digits is 23 + 1 implicit)
>>> floor_exact(2**24-1, np.float32) == 2**24-1
True
But 2**24+1 gives a number that float32 can't represent exactly
>>> floor_exact(2**24+1, np.float32) == 2**24
True
As for the numpy floor function, negatives floor towards -inf
>>> floor_exact(-2**24-1, np.float32) == -2**24-2
True
"""
val = int(val)
flt_type = np.dtype(flt_type).type
sign = 1 if val > 0 else -1
try: # int_to_float deals with longdouble safely
fval = int_to_float(val, flt_type)
except OverflowError:
return sign * np.inf
if not np.isfinite(fval):
return fval
info = type_info(flt_type)
diff = val - as_int(fval)
if diff >= 0: # floating point value <= val
return fval
# Float casting made the value go up
biggest_gap = 2**(floor_log2(val) - info['nmant'])
assert biggest_gap > 1
fval -= flt_type(biggest_gap)
return fval
def ceil_exact(val, flt_type):
""" Return nearest exact integer >= `val` in float type `flt_type`
Parameters
----------
val : int
We have to pass val as an int rather than the floating point type
because large integers cast as floating point may be rounded by the
casting process.
flt_type : numpy type
numpy float type.
Returns
-------
ceil_val : object
value of same floating point type as `val`, that is the nearest exact
integer in this type such that `floor_val` >= `val`. Thus if `val` is
exact in `flt_type`, `ceil_val` == `val`.
Examples
--------
Obviously 2 is within the range of representable integers for float32
>>> ceil_exact(2, np.float32)
2.0
As is 2**24-1 (the number of significand digits is 23 + 1 implicit)
>>> ceil_exact(2**24-1, np.float32) == 2**24-1
True
But 2**24+1 gives a number that float32 can't represent exactly
>>> ceil_exact(2**24+1, np.float32) == 2**24+2
True
As for the numpy ceil function, negatives ceil towards inf
>>> ceil_exact(-2**24-1, np.float32) == -2**24
True
"""
return -floor_exact(-val, flt_type)
def int_abs(arr):
""" Absolute values of array taking care of max negative int values
Parameters
----------
arr : array-like
Returns
-------
abs_arr : array
array the same shape as `arr` in which all negative numbers have been
changed to positive numbers with the magnitude.
Examples
--------
This kind of thing is confusing in base numpy:
>>> import numpy as np
>>> np.abs(np.int8(-128))
-128
``int_abs`` fixes that:
>>> int_abs(np.int8(-128))
128
>>> int_abs(np.array([-128, 127], dtype=np.int8))
array([128, 127], dtype=uint8)
>>> int_abs(np.array([-128, 127], dtype=np.float32))
array([ 128., 127.], dtype=float32)
"""
arr = np.array(arr, copy=False)
dt = arr.dtype
if dt.kind == 'u':
return arr
if dt.kind != 'i':
return np.absolute(arr)
out = arr.astype(np.dtype(dt.str.replace('i', 'u')))
return np.choose(arr < 0, (arr, arr * -1), out=out)
def floor_log2(x):
""" floor of log2 of abs(`x`)
Embarrassingly, from http://en.wikipedia.org/wiki/Binary_logarithm
Parameters
----------
x : int
Returns
-------
L : None or int
floor of base 2 log of `x`. None if `x` == 0.
Examples
--------
>>> floor_log2(2**9+1)
9
>>> floor_log2(-2**9+1)
8
>>> floor_log2(0.5)
-1
>>> floor_log2(0) is None
True
"""
ip = 0
rem = abs(x)
if rem > 1:
while rem>=2:
ip += 1
rem //= 2
return ip
elif rem == 0:
return None
while rem < 1:
ip -= 1
rem *= 2
return ip
def best_float():
""" Floating point type with best precision
This is nearly always np.longdouble, except on Windows, where np.longdouble
is Intel80 storage, but with float64 precision for calculations. In that
case we return float64 on the basis it's the fastest and smallest at the
highest precision.
Returns
-------
best_type : numpy type
floating point type with highest precision
"""
if (type_info(np.longdouble)['nmant'] > type_info(np.float64)['nmant'] and
machine() != 'sparc64'): # sparc has crazy-slow float128
return np.longdouble
return np.float64
def have_binary128():
""" True if we have a binary128 IEEE longdouble
"""
ti = type_info(np.longdouble)
return (ti['nmant'], ti['maxexp']) == (112, 16384)
def ok_floats():
""" Return floating point types sorted by precision
Remove longdouble if it has no higher precision than float64
"""
floats = sorted(np.sctypes['float'], key=lambda f : type_info(f)['nmant'])
if best_float() != np.longdouble and np.longdouble in floats:
floats.remove(np.longdouble)
return floats
OK_FLOATS = ok_floats()
def able_int_type(values):
""" Find the smallest integer numpy type to contain sequence `values`
Prefers uint to int if minimum is >= 0
Parameters
----------
values : sequence
sequence of integer values
Returns
-------
itype : None or numpy type
numpy integer type or None if no integer type holds all `values`
Examples
--------
>>> able_int_type([0, 1]) == np.uint8
True
>>> able_int_type([-1, 1]) == np.int8
True
"""
if any([v % 1 for v in values]):
return None
mn = min(values)
mx = max(values)
if mn >= 0:
for ityp in np.sctypes['uint']:
if mx <= np.iinfo(ityp).max:
return ityp
for ityp in np.sctypes['int']:
info = np.iinfo(ityp)
if mn >= info.min and mx <= info.max:
return ityp
return None
def ulp(val=np.float64(1.0)):
""" Return gap between `val` and nearest representable number of same type
This is the value of a unit in the last place (ULP), and is similar in
meaning to the MATLAB eps function.
Parameters
----------
val : scalar, optional
scalar value of any numpy type. Default is 1.0 (float64)
Returns
-------
ulp_val : scalar
gap between `val` and nearest representable number of same type
Notes
-----
The wikipedia article on machine epsilon points out that the term *epsilon*
can be used in the sense of a unit in the last place (ULP), or as the
maximum relative rounding error. The MATLAB ``eps`` function uses the ULP
meaning, but this function is ``ulp`` rather than ``eps`` to avoid confusion
between different meanings of *eps*.
"""
val = np.array(val)
if not np.isfinite(val):
return np.nan
if val.dtype.kind in 'iu':
return 1
aval = np.abs(val)
info = type_info(val.dtype)
fl2 = floor_log2(aval)
if fl2 is None or fl2 < info['minexp']: # subnormal
fl2 = info['minexp']
# 'nmant' value does not include implicit first bit
return 2**(fl2 - info['nmant'])
Jump to Line
Something went wrong with that request. Please try again.