Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/base/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ set(BASE_SOURCES
if(root7)
set(BASE_HEADER_DIRS inc/ v7/inc/)
list(APPEND BASE_HEADERS
ROOT/RFloat16.hxx
ROOT/RDirectoryEntry.hxx
ROOT/RIndexIter.hxx)
target_sources(Core PRIVATE v7/src/RDirectory.cxx)
Expand Down
154 changes: 154 additions & 0 deletions core/base/v7/inc/ROOT/RFloat16.hxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
// @(#)root/base

/*************************************************************************
* Copyright (C) 1995-2023, Rene Brun and Fons Rademakers. *
* All rights reserved. *
* *
* For the licensing terms see $ROOTSYS/LICENSE. *
* For the list of contributors see $ROOTSYS/README/CREDITS. *
*************************************************************************/

#include <cstdint>
#include <cstring>

#ifndef ROOT_RFloat16
#define ROOT_RFloat16

/**
* Conversion functions between full- and half-precision floats. The code used here is taken (with some modifications)
* from the `half` C++ library (https://half.sourceforge.net/index.html), distributed under the MIT license.
*
* Original license:
*
* The MIT License
*
* Copyright (c) 2012-2021 Christian Rau
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/

#ifndef HALF_ENABLE_F16C_INTRINSICS
/// Enable F16C intruction set intrinsics.
/// Defining this to 1 enables the use of [F16C compiler intrinsics](https://en.wikipedia.org/wiki/F16C) for converting
/// between half-precision and single-precision values which may result in improved performance. This will not perform
/// additional checks for support of the F16C instruction set, so an appropriate target platform is required when
/// enabling this feature.
///
/// Unless predefined it will be enabled automatically when the `__F16C__` symbol is defined, which some compilers do on
/// supporting platforms.
#define HALF_ENABLE_F16C_INTRINSICS __F16C__
#endif
#if HALF_ENABLE_F16C_INTRINSICS
#include <immintrin.h>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious, does this actually enable intrinsics on a "standard platform"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not by default. ROOT would have to be compiled with -mf16c, but this doesn't seem to play nice with projectroot.test.test_TFormulaTests when added to the CMake configuration. It might be worth it to further investigate and come up with a solution, though.

#endif

namespace ROOT {
namespace Experimental {
namespace Internal {
////////////////////////////////////////////////////////////////////////////////
/// \brief Get the half-precision overflow.
///
/// \param[in] value Half-precision value with sign bit only
///
/// \return Rounded overflowing half-precision value
constexpr std::uint16_t GetOverflowedValue(std::uint16_t value = 0)
{
return (value | 0x7C00);
}

////////////////////////////////////////////////////////////////////////////////
/// \brief Round the given half-precision number to the nearest representable value.
///
/// \param[in] value The finite half-precision number to round
/// \param[in] guardBit The most significant discarded bit
/// \param[in] stickyBit Logical OR of all but the most significant discarded bits
///
/// \return The nearest-rounded half-precision value
constexpr std::uint16_t GetRoundedValue(std::uint16_t value, int guardBit, int stickyBit)
{
return (value + (guardBit & (stickyBit | value)));
}

////////////////////////////////////////////////////////////////////////////////
/// \brief Convert an IEEE single-precision float to half-precision.
///
/// Credit for this goes to [Jeroen van der Zijp](http://fox-toolkit.org/ftp/fasthalffloatconversion.pdf).
///
/// \param[in] value The single-precision value to convert
///
/// \return The converted half-precision value
inline std::uint16_t FloatToHalf(float value)
{
#if HALF_ENABLE_F16C_INTRINSICS
return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value), _MM_FROUND_TO_NEAREST_INT));
#else
std::uint32_t fbits;
std::memcpy(&fbits, &value, sizeof(float));

std::uint16_t sign = (fbits >> 16) & 0x8000;
fbits &= 0x7FFFFFFF;
if (fbits >= 0x7F800000)
return sign | 0x7C00 | ((fbits > 0x7F800000) ? (0x200 | ((fbits >> 13) & 0x3FF)) : 0);
if (fbits >= 0x47800000)
return GetOverflowedValue(sign);
if (fbits >= 0x38800000)
return GetRoundedValue(sign | (((fbits >> 23) - 112) << 10) | ((fbits >> 13) & 0x3FF), (fbits >> 12) & 1,
(fbits & 0xFFF) != 0);
if (fbits >= 0x33000000) {
int i = 125 - (fbits >> 23);
fbits = (fbits & 0x7FFFFF) | 0x800000;
return GetRoundedValue(sign | (fbits >> (i + 1)), (fbits >> i) & 1,
(fbits & ((static_cast<std::uint32_t>(1) << i) - 1)) != 0);
}

return sign;
#endif
}

////////////////////////////////////////////////////////////////////////////////
/// \brief Convert an IEEE half-precision float to single-precision.
///
/// Credit for this goes to [Jeroen van der Zijp](http://fox-toolkit.org/ftp/fasthalffloatconversion.pdf).
///
/// \param[in] value The half-precision value to convert
///
/// \return The converted single-precision value
inline float HalfToFloat(std::uint16_t value)
{
#if HALF_ENABLE_F16C_INTRINSICS
return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value)));
#else
std::uint32_t fbits = static_cast<std::uint32_t>(value & 0x8000) << 16;
int abs = value & 0x7FFF;
if (abs) {
fbits |= 0x38000000 << static_cast<unsigned>(abs >= 0x7C00);
for (; abs < 0x400; abs <<= 1, fbits -= 0x800000)
;
fbits += static_cast<std::uint32_t>(abs) << 13;
}
float out;
std::memcpy(&out, &fbits, sizeof(float));
return out;
#endif
}
} // namespace Internal
} // namespace Experimental
} // namespace ROOT

#endif // ROOT_RFloat16
35 changes: 35 additions & 0 deletions tree/ntuple/v7/inc/ROOT/RColumnElement.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <ROOT/RColumnModel.hxx>
#include <ROOT/RConfig.hxx>
#include <ROOT/RError.hxx>
#include <ROOT/RFloat16.hxx>
#include <ROOT/RNTupleUtil.hxx>

#include <Byteswap.h>
Expand Down Expand Up @@ -583,6 +584,39 @@ public:
void Unpack(void *dst, void *src, std::size_t count) const final;
};

template <>
class RColumnElement<float, EColumnType::kReal16> : public RColumnElementBase {
public:
static constexpr bool kIsMappable = false;
static constexpr std::size_t kSize = sizeof(float);
static constexpr std::size_t kBitsOnStorage = 16;
RColumnElement() : RColumnElementBase(kSize) {}
bool IsMappable() const final { return kIsMappable; }
std::size_t GetBitsOnStorage() const final { return kBitsOnStorage; }

void Pack(void *dst, void *src, std::size_t count) const final
{
float *floatArray = reinterpret_cast<float *>(src);
std::uint16_t *uint16Array = reinterpret_cast<std::uint16_t *>(dst);

for (std::size_t i = 0; i < count; ++i) {
uint16Array[i] = Internal::FloatToHalf(floatArray[i]);
ByteSwapIfNecessary(uint16Array[i]);
}
}

void Unpack(void *dst, void *src, std::size_t count) const final
{
float *floatArray = reinterpret_cast<float *>(dst);
std::uint16_t *uint16Array = reinterpret_cast<std::uint16_t *>(src);

for (std::size_t i = 0; i < count; ++i) {
ByteSwapIfNecessary(floatArray[i]);
floatArray[i] = Internal::HalfToFloat(uint16Array[i]);
}
}
};

#define __RCOLUMNELEMENT_SPEC_BODY(CppT, BaseT, BitsOnStorage) \
static constexpr std::size_t kSize = sizeof(CppT); \
static constexpr std::size_t kBitsOnStorage = BitsOnStorage; \
Expand Down Expand Up @@ -703,6 +737,7 @@ std::unique_ptr<RColumnElementBase> RColumnElementBase::Generate(EColumnType typ
case EColumnType::kBit: return std::make_unique<RColumnElement<CppT, EColumnType::kBit>>();
case EColumnType::kReal64: return std::make_unique<RColumnElement<CppT, EColumnType::kReal64>>();
case EColumnType::kReal32: return std::make_unique<RColumnElement<CppT, EColumnType::kReal32>>();
case EColumnType::kReal16: return std::make_unique<RColumnElement<CppT, EColumnType::kReal16>>();
case EColumnType::kInt64: return std::make_unique<RColumnElement<CppT, EColumnType::kInt64>>();
case EColumnType::kUInt64: return std::make_unique<RColumnElement<CppT, EColumnType::kUInt64>>();
case EColumnType::kInt32: return std::make_unique<RColumnElement<CppT, EColumnType::kInt32>>();
Expand Down
3 changes: 2 additions & 1 deletion tree/ntuple/v7/inc/ROOT/RField.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -1643,8 +1643,9 @@ public:
size_t GetValueSize() const final { return sizeof(float); }
size_t GetAlignment() const final { return alignof(float); }
void AcceptVisitor(Detail::RFieldVisitor &visitor) const final;
};

void SetHalfPrecision();
};

template <>
class RField<double> : public Detail::RFieldBase {
Expand Down
4 changes: 4 additions & 0 deletions tree/ntuple/v7/src/RColumnElement.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ ROOT::Experimental::Detail::RColumnElementBase::Generate<void>(EColumnType type)
case EColumnType::kBit: return std::make_unique<RColumnElement<bool, EColumnType::kBit>>();
case EColumnType::kReal64: return std::make_unique<RColumnElement<double, EColumnType::kReal64>>();
case EColumnType::kReal32: return std::make_unique<RColumnElement<float, EColumnType::kReal32>>();
// TODO: Change to std::float16_t in-memory type once available (from C++23).
case EColumnType::kReal16: return std::make_unique<RColumnElement<float, EColumnType::kReal16>>();
case EColumnType::kInt64: return std::make_unique<RColumnElement<std::int64_t, EColumnType::kInt64>>();
case EColumnType::kUInt64: return std::make_unique<RColumnElement<std::uint64_t, EColumnType::kUInt64>>();
case EColumnType::kInt32: return std::make_unique<RColumnElement<std::int32_t, EColumnType::kInt32>>();
Expand Down Expand Up @@ -72,6 +74,7 @@ std::size_t ROOT::Experimental::Detail::RColumnElementBase::GetBitsOnStorage(ECo
case EColumnType::kBit: return 1;
case EColumnType::kReal64: return 64;
case EColumnType::kReal32: return 32;
case EColumnType::kReal16: return 16;
case EColumnType::kInt64: return 64;
case EColumnType::kUInt64: return 64;
case EColumnType::kInt32: return 32;
Expand Down Expand Up @@ -106,6 +109,7 @@ std::string ROOT::Experimental::Detail::RColumnElementBase::GetTypeName(EColumnT
case EColumnType::kBit: return "Bit";
case EColumnType::kReal64: return "Real64";
case EColumnType::kReal32: return "Real32";
case EColumnType::kReal16: return "Real16";
case EColumnType::kInt64: return "Int64";
case EColumnType::kUInt64: return "UInt64";
case EColumnType::kInt32: return "Int32";
Expand Down
7 changes: 6 additions & 1 deletion tree/ntuple/v7/src/RField.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,8 @@ void ROOT::Experimental::RField<bool>::AcceptVisitor(Detail::RFieldVisitor &visi
const ROOT::Experimental::Detail::RFieldBase::RColumnRepresentations &
ROOT::Experimental::RField<float>::GetColumnRepresentations() const
{
static RColumnRepresentations representations({{EColumnType::kSplitReal32}, {EColumnType::kReal32}}, {});
static RColumnRepresentations representations(
{{EColumnType::kSplitReal32}, {EColumnType::kReal32}, {EColumnType::kReal16}}, {});
return representations;
}

Expand All @@ -986,6 +987,10 @@ void ROOT::Experimental::RField<float>::AcceptVisitor(Detail::RFieldVisitor &vis
visitor.VisitFloatField(*this);
}

void ROOT::Experimental::RField<float>::SetHalfPrecision()
{
SetColumnRepresentative({EColumnType::kReal16});
}

//------------------------------------------------------------------------------

Expand Down
38 changes: 35 additions & 3 deletions tree/ntuple/v7/test/ntuple_packing.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,31 @@ TEST(Packing, Bitfield)
}
}

TEST(Packing, HalfPrecisionFloat)
{
ROOT::Experimental::Detail::RColumnElement<float, ROOT::Experimental::EColumnType::kReal16> element;
element.Pack(nullptr, nullptr, 0);
element.Unpack(nullptr, nullptr, 0);

float in = 3.14;
std::uint16_t b = 0;
element.Pack(&b, &in, 1);
EXPECT_EQ(0x4248, b); // Expected bit representation: 0b01000010 01001000
float out = 0.;
element.Unpack(&out, &b, 1);
EXPECT_FLOAT_EQ(3.140625, out);

float in4[] = {0.1, 0.2, 0.3, 0.4};
std::uint64_t b4;
element.Pack(&b4, &in4, 4);
float out4[] = {0., 0., 0., 0.};
element.Unpack(&out4, &b4, 4);
EXPECT_FLOAT_EQ(0.099975586, out4[0]);
EXPECT_FLOAT_EQ(0.199951171, out4[1]);
EXPECT_FLOAT_EQ(0.300048828, out4[2]);
EXPECT_FLOAT_EQ(0.399902343, out4[3]);
}

TEST(Packing, RColumnSwitch)
{
ROOT::Experimental::Detail::RColumnElement<ROOT::Experimental::RColumnSwitch,
Expand Down Expand Up @@ -201,6 +226,7 @@ TEST(Packing, OnDiskEncoding)
AddField<std::uint32_t, ROOT::Experimental::EColumnType::kSplitUInt32>(*model, "uint32");
AddField<std::uint64_t, ROOT::Experimental::EColumnType::kSplitUInt64>(*model, "uint64");
AddField<float, ROOT::Experimental::EColumnType::kSplitReal32>(*model, "float");
AddField<float, ROOT::Experimental::EColumnType::kReal16>(*model, "float16");
AddField<double, ROOT::Experimental::EColumnType::kSplitReal64>(*model, "double");
AddField<ClusterSize_t, ROOT::Experimental::EColumnType::kSplitIndex32>(*model, "index32");
AddField<ClusterSize_t, ROOT::Experimental::EColumnType::kSplitIndex64>(*model, "index64");
Expand All @@ -218,9 +244,10 @@ TEST(Packing, OnDiskEncoding)
*e->Get<std::uint16_t>("uint16") = 1;
*e->Get<std::uint32_t>("uint32") = 0x00010203;
*e->Get<std::uint64_t>("uint64") = 0x0001020304050607L;
*e->Get<float>("float") = std::nextafterf(1.f, 2.f); // 0 01111111 00000000000000000000001 == 0x3f800001
*e->Get<double>("double") = std::nextafter(1., 2.); // 0x3ff0 0000 0000 0001
*e->Get<ClusterSize_t>("index32") = 39916801; // 0x0261 1501
*e->Get<float>("float") = std::nextafterf(1.f, 2.f); // 0 01111111 00000000000000000000001 == 0x3f800001
*e->Get<float>("float16") = std::nextafterf(1.f, 2.f); // 0 01111111 00000000000000000000001 == 0x3f800001
*e->Get<double>("double") = std::nextafter(1., 2.); // 0x3ff0 0000 0000 0001
*e->Get<ClusterSize_t>("index32") = 39916801; // 0x0261 1501
*e->Get<ClusterSize_t>("index64") = 0x0706050403020100L;
e->Get<std::string>("str")->assign("abc");

Expand All @@ -233,6 +260,7 @@ TEST(Packing, OnDiskEncoding)
*e->Get<std::uint32_t>("uint32") = 0x04050607;
*e->Get<std::uint64_t>("uint64") = 0x08090a0b0c0d0e0fL;
*e->Get<float>("float") = std::nextafterf(1.f, 0.f); // 0 01111110 11111111111111111111111 = 0x3f7fffff
*e->Get<float>("float16") = std::nextafterf(0.1f, 0.2f); // 0 01111011 10011001100110011001110 = 0x3dccccce
*e->Get<double>("double") = std::numeric_limits<double>::max(); // 0x7fef ffff ffff ffff
*e->Get<ClusterSize_t>("index32") = 39916808; // d(previous) == 7
*e->Get<ClusterSize_t>("index64") = 0x070605040302010DL; // d(previous) == 13
Expand Down Expand Up @@ -282,6 +310,10 @@ TEST(Packing, OnDiskEncoding)
unsigned char expFloat[] = {0x01, 0xff, 0x00, 0xff, 0x80, 0x7f, 0x3f, 0x3f};
EXPECT_EQ(memcmp(sealedPage.fBuffer, expFloat, sizeof(expFloat)), 0);

source->LoadSealedPage(fnGetColumnId("float16"), RClusterIndex(0, 0), sealedPage);
unsigned char expFloat16[] = {0x00, 0x3c, 0x66, 0x2e};
EXPECT_EQ(memcmp(sealedPage.fBuffer, expFloat16, sizeof(expFloat16)), 0);

source->LoadSealedPage(fnGetColumnId("double"), RClusterIndex(0, 0), sealedPage);
unsigned char expDouble[] = {0x01, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,
0x00, 0xff, 0x00, 0xff, 0xf0, 0xef, 0x3f, 0x7f};
Expand Down
Loading