Skip to content

Commit

Permalink
Add half_t and bhalf_t limits (kokkos#5778)
Browse files Browse the repository at this point in the history
* Add half_t and bhalf_t limits

* Try using constexpr

* Revert "Try using constexpr"

This reverts commit 1b399bd.

* Fix norm_min_helper value type

* Add bias to epsilon when bhalf_t is float

* Remove bias. Prevent compiler from optimizing out cast.

* Fix typo

* Attempt to fix CI Werror

* core/unit_test: Add inline comment

* Add half_t docs
  • Loading branch information
e10harvey committed Mar 30, 2023
1 parent 89bdbaa commit 0476985
Show file tree
Hide file tree
Showing 2 changed files with 337 additions and 11 deletions.
317 changes: 316 additions & 1 deletion core/src/Kokkos_Half.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
#define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_HALF
#endif

#include <type_traits>
#include <Kokkos_Macros.hpp>
#include <Kokkos_NumericTraits.hpp>

#include <type_traits>
#include <iosfwd> // istream & ostream for extraction and insertion ops
#include <string>

Expand Down Expand Up @@ -1005,6 +1007,319 @@ cast_from_bhalf(bhalf_t val) {
#else
#define KOKKOS_BHALF_T_IS_FLOAT false
#endif // KOKKOS_IMPL_BHALF_TYPE_DEFINED
////////////// BEGIN HALF_T (binary16) limits //////////////
// clang-format off
// '\brief:' below are from the libc definitions for float and double:
// https://www.gnu.org/software/libc/manual/html_node/Floating-Point-Parameters.html
//
// The arithmetic encoding and equations below are derived from:
// Ref1: https://en.wikipedia.org/wiki/Single-precision_floating-point_format
// Ref2: https://en.wikipedia.org/wiki/Exponent_bias
// Ref3; https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html
//
// Some background on the magic numbers 2**10=1024 and 2**15=32768 used below:
//
// IMPORTANT: For IEEE754 encodings, see Ref1.
//
// For binary16, we have B = 2 and p = 16 with 2**16 possible significands.
// The binary16 format is: [s e e e e e f f f f f f f f f f]
// bit index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
// s: signed bit (1 bit)
// e: exponent bits (5 bits)
// f: fractional bits (10 bits)
//
// E_bias = 2**(n_exponent_bits - 1) - 1 = 2**(5 - 1) - 1 = 15
// E_subnormal = 00000 (base2)
// E_infinity = 11111 (base2)
// E_min = 1 - E_bias = 1 - 15
// E_max = 2**5 - 1 - E_bias = 2**5 - 1 - 15 = 16
//
// 2**10=1024 is the smallest denominator that is representable in binary16:
// [s e e e e e f f f f f f f f f f]
// [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
// which is: 1 / 2**-10
//
//
// 2**15 is the largest exponent factor representable in binary16, for example the
// largest integer value representable in binary16 is:
// [s e e e e e f f f f f f f f f f]
// [0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1]
// which is: 2**(2**4 + 2**3 + 2**2 + 2**1 - 15) * (1 + 2**-10 + 2**-9 + 2**-8 + 2**-7 + 2**-6 + 2**-5 + 2**-4 + 2**-3 + 2**-2 + 2**-1)) =
// 2**15 * (1 + 0.9990234375) =
// 65504.0
//

/// \brief: Infinity.
///
/// base2 encoding: bits [10,14] set
/// #define KOKKOS_IMPL_HALF_T_HUGE_VALH 0x7c00
/// Binary16 encoding:
/// [s e e e e e f f f f f f f f f f]
/// [0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0]
/// bit index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
#if defined(KOKKOS_HALF_T_IS_FLOAT) && !KOKKOS_HALF_T_IS_FLOAT

/// \brief: Minimum normalized number
///
/// Stdc defines this as the smallest number (representable in binary16).
///
/// Binary16 encoding:
/// [s e e e e e f f f f f f f f f f]
/// [1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1]
/// bit index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
///
/// and in base10: -1 * 2**(2**4 + 2**3 + 2**2 + 2**1 - 15) * (1 + 2**-10 + 2**-9 + 2**-8 + 2**-7 + 2**-6 + 2**-5 + 2**-4 + 2**-3 + 2**-2 + 2**-1)
/// = -2**15 * (1 + (2**10 - 1) / 2**10)
template <>
struct Kokkos::Experimental::Impl::finite_min_helper<
Kokkos::Experimental::half_t> {
static constexpr float value = -65504.0F;
};

/// \brief: Maximum normalized number
///
/// Stdc defines this as the maximum number (representable in binary16).
///
/// Binary16 encoding:
/// [s e e e e e f f f f f f f f f f]
/// [0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1]
/// bit index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
///
/// and in base10: 1 * 2**(2**4 + 2**3 + 2**2 + 2**1 - 15) * (1 + 2**-10 + 2**-9 + 2**-8 + 2**-7 + 2**-6 + 2**-5 + 2**-4 + 2**-3 + 2**-2 + 2**-1)
/// = 2**15 * (1 + (2**10 - 1) / 2**10)
template <>
struct Kokkos::Experimental::Impl::finite_max_helper<
Kokkos::Experimental::half_t> {
static constexpr float value = 65504.0F;
};

/// \brief: This is the difference between 1 and the smallest floating point
/// number of type binary16 that is greater than 1
///
/// Smallest number in binary16 that is greater than 1 encoding:
/// [s e e e e e f f f f f f f f f f]
/// [0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 1]
/// bit index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
///
/// and in base10: 1 * 2**(2**3 + 2**2 + 2**1 + 2**0 - 15) * (1 + 2**-10)
/// = 2**0 * (1 + 2**-10)
/// = 1.0009765625
///
/// Lastly, 1 - 1.0009765625 = 0.0009765625.
template <>
struct Kokkos::Experimental::Impl::epsilon_helper<
Kokkos::Experimental::half_t> {
static constexpr float value = 0.0009765625F;
};

/// @brief: The largest possible rounding error in ULPs
///
/// This simply uses the maximum rounding error.
///
/// Reference: https://docs.oracle.com/cd/E19957-01/806-3568/ncg_goldberg.html#689
template <>
struct Kokkos::Experimental::Impl::round_error_helper<
Kokkos::Experimental::half_t> {
static constexpr float value = 0.5F;
};

/// \brief: Minimum normalized positive half precision number
///
/// Stdc defines this as the minimum normalized positive floating
/// point number that is representable in type binary16
///
/// Smallest number in binary16 that is greater than 1 encoding:
/// [s e e e e e f f f f f f f f f f]
/// [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
/// bit index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
///
/// and in base10: 1 * 2**(2**0 - 15) * (1)
/// = 2**-14
template <>
struct Kokkos::Experimental::Impl::norm_min_helper<
Kokkos::Experimental::half_t> {
static constexpr float value = 0.00006103515625F;
};

/// \brief: Quiet not a half precision number
///
/// IEEE 754 defines this as all exponent bits high.
///
/// Quiet NaN in binary16:
/// [s e e e e e f f f f f f f f f f]
/// [1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0]
/// bit index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
template <>
struct Kokkos::Experimental::Impl::quiet_NaN_helper<
Kokkos::Experimental::half_t> {
static constexpr float value = 0xfc000;
};

/// \brief: Signaling not a half precision number
///
/// IEEE 754 defines this as all exponent bits and the first fraction bit high.
///
/// Quiet NaN in binary16:
/// [s e e e e e f f f f f f f f f f]
/// [1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0]
/// bit index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
template <>
struct Kokkos::Experimental::Impl::signaling_NaN_helper<
Kokkos::Experimental::half_t> {
static constexpr float value = 0xfe000;
};

/// \brief: Number of digits in the matissa that can be represented
/// without losing precision.
///
/// Stdc defines this as the number of base-RADIX digits in the floating point mantissa for the binary16 data type.
///
/// In binary16, we have 10 fractional bits plus the implicit leading 1.
template <>
struct Kokkos::Experimental::Impl::digits_helper<Kokkos::Experimental::half_t> {
static constexpr int value = 11;
};

/// \brief: "The number of base-10 digits that can be represented by the type T without change"
/// Reference: https://en.cppreference.com/w/cpp/types/numeric_limits/digits10.
///
/// "For base-radix types, it is the value of digits() (digits - 1 for floating-point types) multiplied by log10(radix) and rounded down."
/// Reference: https://en.cppreference.com/w/cpp/types/numeric_limits/digits10.
///
/// This is: floor(11 - 1 * log10(2))
template <>
struct Kokkos::Experimental::Impl::digits10_helper<
Kokkos::Experimental::half_t> {
static constexpr int value = 3;
};

/// \brief: Value of the base of the exponent representation.
///
/// Stdc defined this as the value of the base, or radix, of the exponent representation.
template <>
struct Kokkos::Experimental::Impl::radix_helper<Kokkos::Experimental::half_t> {
static constexpr int value = 2;
};

/// \brief: This is the smallest possible exponent value
///
/// Stdc defines this as the smallest possible exponent value for type binary16.
/// More precisely, it is the minimum negative integer such that the value min_exponent_helper
/// raised to this power minus 1 can be represented as a normalized floating point number of type float.
///
/// In binary16:
/// [s e e e e e f f f f f f f f f f]
/// [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
/// bit index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
///
/// and in base10: 1 * 2**(2**0 - 15) * (1 + 0)
/// = 2**-14
///
/// with a bias of one from (C11 5.2.4.2.2), gives -13;
template <>
struct Kokkos::Experimental::Impl::min_exponent_helper<
Kokkos::Experimental::half_t> {
static constexpr int value = -13;
};

/// \brief: This is the largest possible exponent value
///
/// In binary16:
/// [s e e e e e f f f f f f f f f f]
/// [0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0]
/// bit index: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
///
/// and in base10: 1 * 2**(2**4 + 2**3 + 2**2 + 2**1 - 15) * (1 + 0)
/// = 2**(30 - 15)
/// = 2**15
///
/// with a bias of one from (C11 5.2.4.2.2), gives 16;
template <>
struct Kokkos::Experimental::Impl::max_exponent_helper<
Kokkos::Experimental::half_t> {
static constexpr int value = 16;
};
#endif
////////////// END HALF_T (binary16) limits //////////////

////////////// BEGIN BHALF_T (bfloat16) limits //////////////
#if defined(KOKKOS_BHALF_T_IS_FLOAT) && !KOKKOS_BHALF_T_IS_FLOAT
// Minimum normalized number
template <>
struct Kokkos::Experimental::Impl::finite_min_helper<
Kokkos::Experimental::bhalf_t> {
static constexpr float value = -3.38953139e38;
};
// Maximum normalized number
template <>
struct Kokkos::Experimental::Impl::finite_max_helper<
Kokkos::Experimental::bhalf_t> {
static constexpr float value = 3.38953139e38;
};
// 1/2^7
template <>
struct Kokkos::Experimental::Impl::epsilon_helper<
Kokkos::Experimental::bhalf_t> {
static constexpr float value = 0.0078125F;
};
template <>
struct Kokkos::Experimental::Impl::round_error_helper<
Kokkos::Experimental::bhalf_t> {
static constexpr float value = 0.5F;
};
// Minimum normalized positive bhalf number
template <>
struct Kokkos::Experimental::Impl::norm_min_helper<
Kokkos::Experimental::bhalf_t> {
static constexpr float value = 1.1754494351e-38;
};
// Quiet not a bhalf number
template <>
struct Kokkos::Experimental::Impl::quiet_NaN_helper<
Kokkos::Experimental::bhalf_t> {
static constexpr float value = 0x7fc000;
};
// Signaling not a bhalf number
template <>
struct Kokkos::Experimental::Impl::signaling_NaN_helper<
Kokkos::Experimental::bhalf_t> {
static constexpr float value = 0x7fe000;
};
// Number of digits in the matissa that can be represented
// without losing precision.
template <>
struct Kokkos::Experimental::Impl::digits_helper<
Kokkos::Experimental::bhalf_t> {
static constexpr int value = 2;
};
// 7 - 1 * log10(2)
template <>
struct Kokkos::Experimental::Impl::digits10_helper<
Kokkos::Experimental::bhalf_t> {
static constexpr int value = 1;
};
// Value of the base of the exponent representation.
template <>
struct Kokkos::Experimental::Impl::radix_helper<Kokkos::Experimental::bhalf_t> {
static constexpr int value = 2;
};
// This is the smallest possible exponent value
// with a bias of one (C11 5.2.4.2.2).
template <>
struct Kokkos::Experimental::Impl::min_exponent_helper<
Kokkos::Experimental::bhalf_t> {
static constexpr int value = -125;
};
// This is the largest possible exponent value
// with a bias of one (C11 5.2.4.2.2).
template <>
struct Kokkos::Experimental::Impl::max_exponent_helper<
Kokkos::Experimental::bhalf_t> {
static constexpr int value = 128;
};
#endif
////////////// END BHALF_T (bfloat16) limits //////////////

#ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_HALF
#undef KOKKOS_IMPL_PUBLIC_INCLUDE
#undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_HALF
Expand Down
31 changes: 21 additions & 10 deletions core/unit_test/TestHalfOperators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,26 @@
#ifndef TESTHALFOPERATOR_HPP_
#define TESTHALFOPERATOR_HPP_
namespace Test {
#define FP16_EPSILON 0.0009765625F // 1/2^10
#define BF16_EPSILON 0.0078125F // 1/2^7
using namespace Kokkos::Experimental;
using ExecutionSpace = TEST_EXECSPACE;
using ScalarType = double;
using ViewType = Kokkos::View<ScalarType*, ExecutionSpace>;
using ViewTypeHost = Kokkos::View<ScalarType*, Kokkos::HostSpace>;
KOKKOS_FUNCTION
const half_t& accept_ref(const half_t& a) { return a; }
KOKKOS_FUNCTION
double accept_ref_expected(const half_t& a) {
double tmp = static_cast<double>(a);
return tmp;
}
#if !KOKKOS_BHALF_T_IS_FLOAT
KOKKOS_FUNCTION
const bhalf_t& accept_ref(const bhalf_t& a) { return a; }
KOKKOS_FUNCTION
double accept_ref_expected(const bhalf_t& a) {
double tmp = static_cast<double>(a);
return tmp;
}
#endif // !KOKKOS_BHALF_T_IS_FLOAT

enum OP_TESTS {
Expand Down Expand Up @@ -886,8 +894,16 @@ struct Functor_TestHalfOperators {
// actual_lhs(TW) = h_lhs <=> h_rhs; // Need C++20?
// expected_lhs(TW) = d_lhs <=> d_rhs; // Need C++20?

actual_lhs(PASS_BY_REF) = static_cast<double>(accept_ref(h_lhs));
expected_lhs(PASS_BY_REF) = d_lhs;
actual_lhs(PASS_BY_REF) = static_cast<double>(accept_ref(h_lhs));

// Use accept_ref and accept_ref_expected to ensure the compiler
// does not optimize out the casts half_type -> double -> half_type.
// Note that these casts are accompanied by rounding. For the bhalf_t
// epsilon, these rounding policies used for casting is enough to cause
// the unit tests to fail.
// In short, one cannot simply assign static_cast<double>(h_lhs) to
// expected_lhs(PASS_BY_REF).
expected_lhs(PASS_BY_REF) = accept_ref_expected(h_lhs);

half_tmp = static_cast<float>(h_lhs);
tmp_ptr = &(tmp_lhs = half_tmp);
Expand All @@ -910,12 +926,7 @@ struct Functor_TestHalfOperators {

template <class half_type>
void __test_half_operators(half_type h_lhs, half_type h_rhs) {
double epsilon = FLT_EPSILON;

if (std::is_same<half_type, Kokkos::Experimental::half_t>::value)
epsilon = FP16_EPSILON;
if (std::is_same<half_type, Kokkos::Experimental::bhalf_t>::value)
epsilon = BF16_EPSILON;
double epsilon = Kokkos::Experimental::epsilon<half_type>::value;

Functor_TestHalfOperators<ViewType, half_type> f_device(h_lhs, h_rhs);
Functor_TestHalfOperators<ViewTypeHost, half_type> f_host(h_lhs, h_rhs);
Expand Down

0 comments on commit 0476985

Please sign in to comment.