diff --git a/core/base/CMakeLists.txt b/core/base/CMakeLists.txt index df701fedeaa12..71168452e0e1d 100644 --- a/core/base/CMakeLists.txt +++ b/core/base/CMakeLists.txt @@ -199,6 +199,7 @@ set(BASE_SOURCES if(root7) set(BASE_HEADER_DIRS inc/ v7/inc/) list(APPEND BASE_HEADERS + ROOT/RFloat16.hxx ROOT/RDirectoryEntry.hxx ROOT/RIndexIter.hxx) target_sources(Core PRIVATE v7/src/RDirectory.cxx) diff --git a/core/base/v7/inc/ROOT/RFloat16.hxx b/core/base/v7/inc/ROOT/RFloat16.hxx new file mode 100644 index 0000000000000..6e1d109d39021 --- /dev/null +++ b/core/base/v7/inc/ROOT/RFloat16.hxx @@ -0,0 +1,154 @@ +// @(#)root/base + +/************************************************************************* + * Copyright (C) 1995-2023, Rene Brun and Fons Rademakers. * + * All rights reserved. * + * * + * For the licensing terms see $ROOTSYS/LICENSE. * + * For the list of contributors see $ROOTSYS/README/CREDITS. * + *************************************************************************/ + +#include +#include + +#ifndef ROOT_RFloat16 +#define ROOT_RFloat16 + +/** + * Conversion functions between full- and half-precision floats. The code used here is taken (with some modifications) + * from the `half` C++ library (https://half.sourceforge.net/index.html), distributed under the MIT license. + * + * Original license: + * + * The MIT License + * + * Copyright (c) 2012-2021 Christian Rau + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef HALF_ENABLE_F16C_INTRINSICS +/// Enable F16C intruction set intrinsics. +/// Defining this to 1 enables the use of [F16C compiler intrinsics](https://en.wikipedia.org/wiki/F16C) for converting +/// between half-precision and single-precision values which may result in improved performance. This will not perform +/// additional checks for support of the F16C instruction set, so an appropriate target platform is required when +/// enabling this feature. +/// +/// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on +/// supporting platforms. +#define HALF_ENABLE_F16C_INTRINSICS __F16C__ +#endif +#if HALF_ENABLE_F16C_INTRINSICS +#include +#endif + +namespace ROOT { +namespace Experimental { +namespace Internal { +//////////////////////////////////////////////////////////////////////////////// +/// \brief Get the half-precision overflow. +/// +/// \param[in] value Half-precision value with sign bit only +/// +/// \return Rounded overflowing half-precision value +constexpr std::uint16_t GetOverflowedValue(std::uint16_t value = 0) +{ + return (value | 0x7C00); +} + +//////////////////////////////////////////////////////////////////////////////// +/// \brief Round the given half-precision number to the nearest representable value. +/// +/// \param[in] value The finite half-precision number to round +/// \param[in] guardBit The most significant discarded bit +/// \param[in] stickyBit Logical OR of all but the most significant discarded bits +/// +/// \return The nearest-rounded half-precision value +constexpr std::uint16_t GetRoundedValue(std::uint16_t value, int guardBit, int stickyBit) +{ + return (value + (guardBit & (stickyBit | value))); +} + +//////////////////////////////////////////////////////////////////////////////// +/// \brief Convert an IEEE single-precision float to half-precision. +/// +/// Credit for this goes to [Jeroen van der Zijp](http://fox-toolkit.org/ftp/fasthalffloatconversion.pdf). +/// +/// \param[in] value The single-precision value to convert +/// +/// \return The converted half-precision value +inline std::uint16_t FloatToHalf(float value) +{ +#if HALF_ENABLE_F16C_INTRINSICS + return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value), _MM_FROUND_TO_NEAREST_INT)); +#else + std::uint32_t fbits; + std::memcpy(&fbits, &value, sizeof(float)); + + std::uint16_t sign = (fbits >> 16) & 0x8000; + fbits &= 0x7FFFFFFF; + if (fbits >= 0x7F800000) + return sign | 0x7C00 | ((fbits > 0x7F800000) ? (0x200 | ((fbits >> 13) & 0x3FF)) : 0); + if (fbits >= 0x47800000) + return GetOverflowedValue(sign); + if (fbits >= 0x38800000) + return GetRoundedValue(sign | (((fbits >> 23) - 112) << 10) | ((fbits >> 13) & 0x3FF), (fbits >> 12) & 1, + (fbits & 0xFFF) != 0); + if (fbits >= 0x33000000) { + int i = 125 - (fbits >> 23); + fbits = (fbits & 0x7FFFFF) | 0x800000; + return GetRoundedValue(sign | (fbits >> (i + 1)), (fbits >> i) & 1, + (fbits & ((static_cast(1) << i) - 1)) != 0); + } + + return sign; +#endif +} + +//////////////////////////////////////////////////////////////////////////////// +/// \brief Convert an IEEE half-precision float to single-precision. +/// +/// Credit for this goes to [Jeroen van der Zijp](http://fox-toolkit.org/ftp/fasthalffloatconversion.pdf). +/// +/// \param[in] value The half-precision value to convert +/// +/// \return The converted single-precision value +inline float HalfToFloat(std::uint16_t value) +{ +#if HALF_ENABLE_F16C_INTRINSICS + return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value))); +#else + std::uint32_t fbits = static_cast(value & 0x8000) << 16; + int abs = value & 0x7FFF; + if (abs) { + fbits |= 0x38000000 << static_cast(abs >= 0x7C00); + for (; abs < 0x400; abs <<= 1, fbits -= 0x800000) + ; + fbits += static_cast(abs) << 13; + } + float out; + std::memcpy(&out, &fbits, sizeof(float)); + return out; +#endif +} +} // namespace Internal +} // namespace Experimental +} // namespace ROOT + +#endif // ROOT_RFloat16 diff --git a/tree/ntuple/v7/inc/ROOT/RColumnElement.hxx b/tree/ntuple/v7/inc/ROOT/RColumnElement.hxx index 2893b1682720a..dd48afb44f1ab 100644 --- a/tree/ntuple/v7/inc/ROOT/RColumnElement.hxx +++ b/tree/ntuple/v7/inc/ROOT/RColumnElement.hxx @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -583,6 +584,39 @@ public: void Unpack(void *dst, void *src, std::size_t count) const final; }; +template <> +class RColumnElement : public RColumnElementBase { +public: + static constexpr bool kIsMappable = false; + static constexpr std::size_t kSize = sizeof(float); + static constexpr std::size_t kBitsOnStorage = 16; + RColumnElement() : RColumnElementBase(kSize) {} + bool IsMappable() const final { return kIsMappable; } + std::size_t GetBitsOnStorage() const final { return kBitsOnStorage; } + + void Pack(void *dst, void *src, std::size_t count) const final + { + float *floatArray = reinterpret_cast(src); + std::uint16_t *uint16Array = reinterpret_cast(dst); + + for (std::size_t i = 0; i < count; ++i) { + uint16Array[i] = Internal::FloatToHalf(floatArray[i]); + ByteSwapIfNecessary(uint16Array[i]); + } + } + + void Unpack(void *dst, void *src, std::size_t count) const final + { + float *floatArray = reinterpret_cast(dst); + std::uint16_t *uint16Array = reinterpret_cast(src); + + for (std::size_t i = 0; i < count; ++i) { + ByteSwapIfNecessary(floatArray[i]); + floatArray[i] = Internal::HalfToFloat(uint16Array[i]); + } + } +}; + #define __RCOLUMNELEMENT_SPEC_BODY(CppT, BaseT, BitsOnStorage) \ static constexpr std::size_t kSize = sizeof(CppT); \ static constexpr std::size_t kBitsOnStorage = BitsOnStorage; \ @@ -703,6 +737,7 @@ std::unique_ptr RColumnElementBase::Generate(EColumnType typ case EColumnType::kBit: return std::make_unique>(); case EColumnType::kReal64: return std::make_unique>(); case EColumnType::kReal32: return std::make_unique>(); + case EColumnType::kReal16: return std::make_unique>(); case EColumnType::kInt64: return std::make_unique>(); case EColumnType::kUInt64: return std::make_unique>(); case EColumnType::kInt32: return std::make_unique>(); diff --git a/tree/ntuple/v7/inc/ROOT/RField.hxx b/tree/ntuple/v7/inc/ROOT/RField.hxx index 726853f74cc58..668d56a0fffe6 100644 --- a/tree/ntuple/v7/inc/ROOT/RField.hxx +++ b/tree/ntuple/v7/inc/ROOT/RField.hxx @@ -1643,8 +1643,9 @@ public: size_t GetValueSize() const final { return sizeof(float); } size_t GetAlignment() const final { return alignof(float); } void AcceptVisitor(Detail::RFieldVisitor &visitor) const final; -}; + void SetHalfPrecision(); +}; template <> class RField : public Detail::RFieldBase { diff --git a/tree/ntuple/v7/src/RColumnElement.cxx b/tree/ntuple/v7/src/RColumnElement.cxx index 6222c6bc89823..de1352eac669e 100644 --- a/tree/ntuple/v7/src/RColumnElement.cxx +++ b/tree/ntuple/v7/src/RColumnElement.cxx @@ -36,6 +36,8 @@ ROOT::Experimental::Detail::RColumnElementBase::Generate(EColumnType type) case EColumnType::kBit: return std::make_unique>(); case EColumnType::kReal64: return std::make_unique>(); case EColumnType::kReal32: return std::make_unique>(); + // TODO: Change to std::float16_t in-memory type once available (from C++23). + case EColumnType::kReal16: return std::make_unique>(); case EColumnType::kInt64: return std::make_unique>(); case EColumnType::kUInt64: return std::make_unique>(); case EColumnType::kInt32: return std::make_unique>(); @@ -72,6 +74,7 @@ std::size_t ROOT::Experimental::Detail::RColumnElementBase::GetBitsOnStorage(ECo case EColumnType::kBit: return 1; case EColumnType::kReal64: return 64; case EColumnType::kReal32: return 32; + case EColumnType::kReal16: return 16; case EColumnType::kInt64: return 64; case EColumnType::kUInt64: return 64; case EColumnType::kInt32: return 32; @@ -106,6 +109,7 @@ std::string ROOT::Experimental::Detail::RColumnElementBase::GetTypeName(EColumnT case EColumnType::kBit: return "Bit"; case EColumnType::kReal64: return "Real64"; case EColumnType::kReal32: return "Real32"; + case EColumnType::kReal16: return "Real16"; case EColumnType::kInt64: return "Int64"; case EColumnType::kUInt64: return "UInt64"; case EColumnType::kInt32: return "Int32"; diff --git a/tree/ntuple/v7/src/RField.cxx b/tree/ntuple/v7/src/RField.cxx index 2751d43b67b4b..04857827880c1 100644 --- a/tree/ntuple/v7/src/RField.cxx +++ b/tree/ntuple/v7/src/RField.cxx @@ -966,7 +966,8 @@ void ROOT::Experimental::RField::AcceptVisitor(Detail::RFieldVisitor &visi const ROOT::Experimental::Detail::RFieldBase::RColumnRepresentations & ROOT::Experimental::RField::GetColumnRepresentations() const { - static RColumnRepresentations representations({{EColumnType::kSplitReal32}, {EColumnType::kReal32}}, {}); + static RColumnRepresentations representations( + {{EColumnType::kSplitReal32}, {EColumnType::kReal32}, {EColumnType::kReal16}}, {}); return representations; } @@ -986,6 +987,10 @@ void ROOT::Experimental::RField::AcceptVisitor(Detail::RFieldVisitor &vis visitor.VisitFloatField(*this); } +void ROOT::Experimental::RField::SetHalfPrecision() +{ + SetColumnRepresentative({EColumnType::kReal16}); +} //------------------------------------------------------------------------------ diff --git a/tree/ntuple/v7/test/ntuple_packing.cxx b/tree/ntuple/v7/test/ntuple_packing.cxx index de4cfde00e5e9..67d3af09aae9f 100644 --- a/tree/ntuple/v7/test/ntuple_packing.cxx +++ b/tree/ntuple/v7/test/ntuple_packing.cxx @@ -90,6 +90,31 @@ TEST(Packing, Bitfield) } } +TEST(Packing, HalfPrecisionFloat) +{ + ROOT::Experimental::Detail::RColumnElement element; + element.Pack(nullptr, nullptr, 0); + element.Unpack(nullptr, nullptr, 0); + + float in = 3.14; + std::uint16_t b = 0; + element.Pack(&b, &in, 1); + EXPECT_EQ(0x4248, b); // Expected bit representation: 0b01000010 01001000 + float out = 0.; + element.Unpack(&out, &b, 1); + EXPECT_FLOAT_EQ(3.140625, out); + + float in4[] = {0.1, 0.2, 0.3, 0.4}; + std::uint64_t b4; + element.Pack(&b4, &in4, 4); + float out4[] = {0., 0., 0., 0.}; + element.Unpack(&out4, &b4, 4); + EXPECT_FLOAT_EQ(0.099975586, out4[0]); + EXPECT_FLOAT_EQ(0.199951171, out4[1]); + EXPECT_FLOAT_EQ(0.300048828, out4[2]); + EXPECT_FLOAT_EQ(0.399902343, out4[3]); +} + TEST(Packing, RColumnSwitch) { ROOT::Experimental::Detail::RColumnElement(*model, "uint32"); AddField(*model, "uint64"); AddField(*model, "float"); + AddField(*model, "float16"); AddField(*model, "double"); AddField(*model, "index32"); AddField(*model, "index64"); @@ -218,9 +244,10 @@ TEST(Packing, OnDiskEncoding) *e->Get("uint16") = 1; *e->Get("uint32") = 0x00010203; *e->Get("uint64") = 0x0001020304050607L; - *e->Get("float") = std::nextafterf(1.f, 2.f); // 0 01111111 00000000000000000000001 == 0x3f800001 - *e->Get("double") = std::nextafter(1., 2.); // 0x3ff0 0000 0000 0001 - *e->Get("index32") = 39916801; // 0x0261 1501 + *e->Get("float") = std::nextafterf(1.f, 2.f); // 0 01111111 00000000000000000000001 == 0x3f800001 + *e->Get("float16") = std::nextafterf(1.f, 2.f); // 0 01111111 00000000000000000000001 == 0x3f800001 + *e->Get("double") = std::nextafter(1., 2.); // 0x3ff0 0000 0000 0001 + *e->Get("index32") = 39916801; // 0x0261 1501 *e->Get("index64") = 0x0706050403020100L; e->Get("str")->assign("abc"); @@ -233,6 +260,7 @@ TEST(Packing, OnDiskEncoding) *e->Get("uint32") = 0x04050607; *e->Get("uint64") = 0x08090a0b0c0d0e0fL; *e->Get("float") = std::nextafterf(1.f, 0.f); // 0 01111110 11111111111111111111111 = 0x3f7fffff + *e->Get("float16") = std::nextafterf(0.1f, 0.2f); // 0 01111011 10011001100110011001110 = 0x3dccccce *e->Get("double") = std::numeric_limits::max(); // 0x7fef ffff ffff ffff *e->Get("index32") = 39916808; // d(previous) == 7 *e->Get("index64") = 0x070605040302010DL; // d(previous) == 13 @@ -282,6 +310,10 @@ TEST(Packing, OnDiskEncoding) unsigned char expFloat[] = {0x01, 0xff, 0x00, 0xff, 0x80, 0x7f, 0x3f, 0x3f}; EXPECT_EQ(memcmp(sealedPage.fBuffer, expFloat, sizeof(expFloat)), 0); + source->LoadSealedPage(fnGetColumnId("float16"), RClusterIndex(0, 0), sealedPage); + unsigned char expFloat16[] = {0x00, 0x3c, 0x66, 0x2e}; + EXPECT_EQ(memcmp(sealedPage.fBuffer, expFloat16, sizeof(expFloat16)), 0); + source->LoadSealedPage(fnGetColumnId("double"), RClusterIndex(0, 0), sealedPage); unsigned char expDouble[] = {0x01, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0xf0, 0xef, 0x3f, 0x7f}; diff --git a/tree/ntuple/v7/test/ntuple_types.cxx b/tree/ntuple/v7/test/ntuple_types.cxx index 30fdcbfa32c39..948f94a904049 100644 --- a/tree/ntuple/v7/test/ntuple_types.cxx +++ b/tree/ntuple/v7/test/ntuple_types.cxx @@ -886,6 +886,58 @@ TEST(RNTuple, Casting) EXPECT_EQ(137, *fieldCast2); } +TEST(RNTuple, HalfPrecisionFloat) +{ + FileRaii fileGuard("test_ntuple_half_precision_float.root"); + + // TODO: Add std::float16 tests once available (from C++23) + auto f1Fld = RFieldBase::Create("f1", "float").Unwrap(); + dynamic_cast *>(f1Fld.get())->SetHalfPrecision(); + EXPECT_EQ(EColumnType::kReal16, f1Fld->GetColumnRepresentative()[0]); + EXPECT_EQ("float", f1Fld->GetType()); + + auto fVecFld = RFieldBase::Create("fVec", "std::vector").Unwrap(); + dynamic_cast *>(fVecFld->GetSubFields()[0])->SetHalfPrecision(); + EXPECT_EQ(EColumnType::kReal16, fVecFld->GetSubFields()[0]->GetColumnRepresentative()[0]); + + auto model = RNTupleModel::Create(); + model->AddField(std::move(f1Fld)); + model->AddField(std::move(fVecFld)); + + { + auto writer = RNTupleWriter::Recreate(std::move(model), "ntuple", fileGuard.GetPath()); + auto f1 = writer->GetModel()->GetDefaultEntry()->Get("f1"); + auto fVec = writer->GetModel()->GetDefaultEntry()->Get>("fVec"); + *f1 = 0.1f; + *fVec = {0.1f, 0.2f}; + writer->Fill(); + *f1 = 4245.5f; + *fVec = {std::numeric_limits::max(), std::numeric_limits::min(), + std::numeric_limits::lowest(), std::numeric_limits::denorm_min()}; + writer->Fill(); + } + + auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath()); + + EXPECT_EQ(4, ROOT::Experimental::Detail::RColumnElementBase::Generate(EColumnType::kReal16)->GetSize()); + + const auto *desc = reader->GetDescriptor(); + EXPECT_EQ(EColumnType::kReal16, (*desc->GetColumnIterable(desc->FindFieldId("f1")).begin()).GetModel().GetType()); + + auto f1 = reader->GetModel()->GetDefaultEntry()->Get("f1"); + auto fVec = reader->GetModel()->GetDefaultEntry()->Get>("fVec"); + reader->LoadEntry(0); + EXPECT_FLOAT_EQ(0.0999755859375f, *f1); + EXPECT_FLOAT_EQ(0.0999755859375f, (*fVec)[0]); + EXPECT_FLOAT_EQ(0.199951171875f, (*fVec)[1]); + reader->LoadEntry(1); + EXPECT_FLOAT_EQ(4244.f, *f1); + EXPECT_FLOAT_EQ(INFINITY, (*fVec)[0]); + EXPECT_FLOAT_EQ(0.0f, (*fVec)[1]); + EXPECT_FLOAT_EQ(-INFINITY, (*fVec)[2]); + EXPECT_FLOAT_EQ(0.0f, (*fVec)[3]); +} + TEST(RNTuple, Double32) { FileRaii fileGuard("test_ntuple_double32.root");