Merge pull request #25387 from fengyuentau:complete-float16_t-renaming

Rename remaining float16_t for future proof #25387 Resolves comment: #25217 (comment). `std::float16_t` and `std::bfloat16_t` are introduced since c++23: https://en.cppreference.com/w/cpp/types/floating-point. ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [x] There is a reference to the original bug report and related work - [x] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [x] The feature is well documented and sample code can be built with the project CMake
opencv · Apr 11, 2024 · 197626a · 197626a
1 parent 2c5b296
commit 197626a
Show file tree

Hide file tree

Showing 7 changed files with 58 additions and 61 deletions.
diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h
@@ -900,7 +900,9 @@ inline hfloat hfloatFromBits(ushort w) {
 #endif
 }
 
+#if !defined(__OPENCV_BUILD) && !(defined __STDCPP_FLOAT16_T__) && !(defined __ARM_NEON)
 typedef hfloat float16_t;
+#endif
 
 }
 #endif

diff --git a/modules/dnn/src/layers/cpu_kernels/conv_block.simd.hpp b/modules/dnn/src/layers/cpu_kernels/conv_block.simd.hpp
@@ -494,10 +494,9 @@ void convBlockMR1_F32(int np, const float * a, const float * b, float *c, const
 void convBlock_F16(int np, const char * _a, const char * _b, char * _c, int ldc, bool init_c, int width,
                     const int convMR_fp16, const int convNR_fp16)
 {
-    typedef __fp16 float16_t;
-    const float16_t* a = (const float16_t*)_a;
-    const float16_t* b = (const float16_t*)_b;
-    float16_t* c = (float16_t*)_c;
+    const __fp16* a = (const __fp16*)_a;
+    const __fp16* b = (const __fp16*)_b;
+    __fp16* c = (__fp16*)_c;
     CV_Assert(convMR_fp16 == 8 && convNR_fp16 == 24);
 
     float16x8_t c00 = vdupq_n_f16(0), c01 = c00, c02 = c00;
@@ -638,12 +637,11 @@ void convBlock_F16(int np, const char * _a, const char * _b, char * _c, int ldc,
 void convBlockMR1_F16(int np, const char* _a, const char* _b, float *c, const float _bias, bool init_c,
                        const float minval, const float maxval, bool ifMinMaxAct, const int width, const int convNR_FP16)
 {
-    typedef __fp16 float16_t;
     CV_Assert(convNR_FP16 == 24); // CONV_NR_FP16 = 24
-    const float16_t* a = (const float16_t*)_a;
-    const float16_t* b = (const float16_t*)_b;
+    const __fp16* a = (const __fp16*)_a;
+    const __fp16* b = (const __fp16*)_b;
 
-    const float16_t bias = (float16_t)_bias;
+    const __fp16 bias = (__fp16)_bias;
 
     float16x8_t c0 = vdupq_n_f16(bias), c1 = c0, c2 = c0;
 

diff --git a/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp b/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.cpp
@@ -85,7 +85,7 @@ int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _outpu
         // works at FP 16.
         CONV_WINO_ATOM = CONV_WINO_ATOM_F16;
         CONV_WINO_NATOMS = CONV_WINO_NATOMS_F16;
-        esz = sizeof(float16_t);
+        esz = sizeof(__fp16);
     }
 #endif
 

diff --git a/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.simd.hpp b/modules/dnn/src/layers/cpu_kernels/conv_winograd_f63.simd.hpp
@@ -435,10 +435,9 @@ void winofunc_AtXA_8x8_F32(const float* inptr, int inpstep,
 void winofunc_accum_F16(const char* _inwptr, const char* _wptr, char* _outbuf, int Cg, int iblock,
                         const int winoIblock, const int winoKblock, const int winoAtomF16, const int winoNatomF16)
 {
-    typedef __fp16 float16_t;
-    const float16_t* inwptr = (const float16_t*)_inwptr;
-    const float16_t* wptr = (const float16_t*)_wptr;
-    float16_t* outbuf = (float16_t*)_outbuf;
+    const __fp16* inwptr = (const __fp16*)_inwptr;
+    const __fp16* wptr = (const __fp16*)_wptr;
+    __fp16* outbuf = (__fp16*)_outbuf;
 
     CV_Assert(winoIblock == 6 && winoKblock == 4 && winoAtomF16 == 8);
 
@@ -591,8 +590,7 @@ void winofunc_accum_F16(const char* _inwptr, const char* _wptr, char* _outbuf, i
 void winofunc_BtXB_8x8_F16(const float * inptr, int inpstep,
                            char * _outptr, int Cg, const int winoIblock, const int winoAtomF16)
 {
-    typedef __fp16 float16_t;
-    float16_t* outptr = (float16_t*)_outptr;
+    __fp16* outptr = (__fp16*)_outptr;
     float32x4_t x00 = vld1q_f32(inptr), x01 = vld1q_f32(inptr + 4);
     float32x4_t x10 = vld1q_f32(inptr + inpstep), x11 = vld1q_f32(inptr + inpstep + 4);
     float32x4_t x20 = vld1q_f32(inptr + inpstep*2), x21 = vld1q_f32(inptr + inpstep*2 + 4);
@@ -757,8 +755,7 @@ void winofunc_AtXA_8x8_F16(const char* _inptr, int inpstep,
                            float * bpptr, int bpstep, float* outptr, int outstep,
                            float bias, float minval, float maxval, bool ifMinMaxAct)
 {
-    typedef __fp16 float16_t;
-    const float16_t* inptr = (const float16_t*)_inptr;
+    const __fp16* inptr = (const __fp16*)_inptr;
 
     float32x4_t x00 = vcvt_f32_f16(vld1_f16(inptr)), x01 = vcvt_f32_f16(vld1_f16(inptr + 4));
     float32x4_t x10 = vcvt_f32_f16(vld1_f16(inptr + inpstep)), x11 = vcvt_f32_f16(vld1_f16(inptr + inpstep + 4));

diff --git a/modules/dnn/src/layers/cpu_kernels/convolution.cpp b/modules/dnn/src/layers/cpu_kernels/convolution.cpp
@@ -26,7 +26,7 @@ void convBlockMR1_F32(int np, const float* a, const float* b, float *c, const fl
 
 #ifdef CONV_ARM_FP16
 // Fast convert float 32 to float16
-static inline void _cvt32f16f(const float* src, float16_t* dst, int len)
+static inline void _cvt32f16f(const float* src, __fp16* dst, int len)
 {
     int j = 0;
     const int VECSZ = 4;
@@ -60,7 +60,7 @@ static inline void _cvt32f16f(const float* src, float16_t* dst, int len)
         vst1_f16(dst_FP16 + j, hv);
     }
     for( ; j < len; j++ )
-        dst[j] = float16_t(src[j]);
+        dst[j] = __fp16(src[j]);
 }
 #endif
 
@@ -74,12 +74,12 @@ float* FastConv::getWeightsWino()
     return alignPtr(weightsWinoBuf.data(), VEC_ALIGN);
 }
 
-float16_t* FastConv::getWeightsFP16()
+hfloat* FastConv::getWeightsFP16()
 {
     return alignPtr(weightsBuf_FP16.data(), VEC_ALIGN);
 }
 
-float16_t* FastConv::getWeightsWinoFP16()
+hfloat* FastConv::getWeightsWinoFP16()
 {
     return alignPtr(weightsWinoBuf_FP16.data(), VEC_ALIGN);
 }
@@ -209,7 +209,7 @@ Ptr<FastConv> initFastConv(
         if (conv->useFP16)
         {
             conv->weightsBuf_FP16.resize(nweights + VEC_ALIGN);
-            auto weightsPtr_FP16 = conv->getWeightsFP16();
+            auto weightsPtr_FP16 = (__fp16*)conv->getWeightsFP16();
 
             parallel_for_(Range(0, C), [&](const Range& r0){
                 for(int c = r0.start; c < r0.end; c++)
@@ -269,11 +269,11 @@ Ptr<FastConv> initFastConv(
 
         float* wptrWino = nullptr;
 #ifdef CONV_ARM_FP16
-        float16_t* wptrWino_FP16 = nullptr;
+        __fp16* wptrWino_FP16 = nullptr;
         if (conv->useFP16)
         {
             conv->weightsWinoBuf_FP16.resize(nweights + VEC_ALIGN);
-            wptrWino_FP16 = conv->getWeightsWinoFP16();
+            wptrWino_FP16 = (__fp16*)conv->getWeightsWinoFP16();
         }
         else
 #endif
@@ -323,15 +323,15 @@ Ptr<FastConv> initFastConv(
 #ifdef CONV_ARM_FP16
                 if (conv->useFP16)
                 {
-                    float16_t* wptr = wptrWino_FP16 + (g*Kg_nblocks + ki) * Cg *CONV_WINO_KBLOCK*CONV_WINO_AREA +
+                    __fp16* wptr = wptrWino_FP16 + (g*Kg_nblocks + ki) * Cg *CONV_WINO_KBLOCK*CONV_WINO_AREA +
                                   (c*CONV_WINO_KBLOCK + dk)*CONV_WINO_ATOM_F16;
                     for (int i = 0; i < CONV_WINO_NATOMS_F16; i++,
                             wptr += Cg * CONV_WINO_KBLOCK * CONV_WINO_ATOM_F16)
                     {
                         CV_Assert(wptrWino_FP16 <= wptr && wptr + CONV_WINO_ATOM_F16 <= wptrWino_FP16 + nweights);
                         for (int j = 0; j < CONV_WINO_ATOM_F16; j++)
                         {
-                            wptr[j] = (float16_t)kernelTm[i * CONV_WINO_ATOM_F16 + j];
+                            wptr[j] = (__fp16)kernelTm[i * CONV_WINO_ATOM_F16 + j];
                         }
                     }
                 }
@@ -367,12 +367,12 @@ Ptr<FastConv> initFastConv(
         int numStripsMR_FP16 = (Kg + CONV_MR_FP16 - 1) / CONV_MR_FP16;
         int Kg_aligned_FP16 = numStripsMR_FP16 * CONV_MR_FP16;
         size_t nweights_FP16 = ngroups * Kg_aligned_FP16 * DkHkWkCg;
-        float16_t* weightsPtr_FP16 = nullptr;
+        __fp16* weightsPtr_FP16 = nullptr;
 
         if (conv->useFP16)
         {
             conv->weightsBuf_FP16.resize(nweights_FP16 + VEC_ALIGN);
-            weightsPtr_FP16 = conv->getWeightsFP16();
+            weightsPtr_FP16 = (__fp16*)conv->getWeightsFP16();
         }
         else
 #endif
@@ -394,7 +394,7 @@ Ptr<FastConv> initFastConv(
                 int startK = si * CONV_MR_FP16;
                 CV_Assert(startK < Kg_aligned_FP16);
 
-                float16_t* packed_wptr = weightsPtr_FP16 + DkHkWkCg * (startK + g * Kg_aligned_FP16);
+                __fp16* packed_wptr = weightsPtr_FP16 + DkHkWkCg * (startK + g * Kg_aligned_FP16);
                 int dk = Kg - startK < CONV_MR_FP16 ? Kg - startK : CONV_MR_FP16; // check if we need zero padding.
 
                 int k_idx = g*Kg + startK;
@@ -405,9 +405,9 @@ Ptr<FastConv> initFastConv(
                         const float* wptr = srcWeights + wstep * k_idx + c*Hk*Wk*Dk + hwd;
                         int k = 0;
                         for(; k < dk; k++, wptr += wstep)
-                            packed_wptr[k] = (float16_t)(*wptr);
+                            packed_wptr[k] = (__fp16)(*wptr);
                         for(; k < CONV_MR_FP16; k++)
-                            packed_wptr[k] = (float16_t)0.f;
+                            packed_wptr[k] = (__fp16)0.f;
                     }
                 }
             }});
@@ -467,8 +467,8 @@ static inline void packData8(char*& inpbuf, float*& inptrIn, int& in_w, int& x0,
     float* inptrInC = (float* )inptrIn;
 
 #ifdef CONV_ARM_FP16
-    float16_t* inpbufC_FP16 = (float16_t *)inpbufC;
-    if (esz == sizeof(float16_t))
+    __fp16* inpbufC_FP16 = (__fp16 *)inpbufC;
+    if (esz == sizeof(__fp16))
     {
         if (stride_w == 1)
         {
@@ -565,16 +565,16 @@ static inline void packData2(char *& inpbuf, float*& inptrIn, int& in_w, int& x0
     float* inptrInC = inptrIn;
 
 #ifdef CONV_ARM_FP16
-    float16_t* inpbufC_FP16 = (float16_t *)inpbufC;
-    if (esz == sizeof(float16_t))
+    __fp16* inpbufC_FP16 = (__fp16 *)inpbufC;
+    if (esz == sizeof(__fp16))
     {
         for (int k = 0; k < ksize; k++)
         {
             int k1 = ofstab[k];
             float v0 = inptrInC[k1];
             float v1 = inptrInC[k1 + stride_w];
-            inpbufC_FP16[k*CONV_NR_FP16] = (float16_t)v0;
-            inpbufC_FP16[k*CONV_NR_FP16+1] = (float16_t)v1;
+            inpbufC_FP16[k*CONV_NR_FP16] = (__fp16)v0;
+            inpbufC_FP16[k*CONV_NR_FP16+1] = (__fp16)v1;
         }
     } else
 #endif
@@ -630,7 +630,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
                 if (useFP16)
                 {
                     for (int c = 0; c < Cg; c++, inptr += inp_planesize, inpbuf += CONV_NR_esz)
-                        _cvt32f16f(inptr, (float16_t *)inpbuf, CONV_NR);
+                        _cvt32f16f(inptr, (__fp16 *)inpbuf, CONV_NR);
                 }
                 else
 #endif
@@ -644,7 +644,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
                 {
                     for (int c = 0; c < Cg; c++, inptr += inp_planesize, inpbuf += CONV_NR_esz)
                     {
-                        _cvt32f16f(inptr, (float16_t *)inpbuf, slice_len);
+                        _cvt32f16f(inptr, (__fp16 *)inpbuf, slice_len);
                     }
                 }
                 else
@@ -704,11 +704,11 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
 #ifdef CONV_ARM_FP16
                             if (useFP16)
                             {
-                                float16_t* inpbufC = (float16_t *)inpbuf + s0;
+                                __fp16* inpbufC = (__fp16 *)inpbuf + s0;
                                 for (int w = w0; w < w1; w++)
                                 {
                                     int imgofs = w*dilation_w;
-                                    inpbufC[w*CONV_NR] = (float16_t)inptrInC[imgofs];
+                                    inpbufC[w*CONV_NR] = (__fp16)inptrInC[imgofs];
                                 }
                             }
                             else
@@ -765,14 +765,14 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
 #ifdef CONV_ARM_FP16
                             if (useFP16)
                             {
-                                float16_t* inpbufC = (float16_t *)inpbuf + s0;
+                                __fp16* inpbufC = (__fp16 *)inpbuf + s0;
 
                                 for (int h = h0; h < h1; h++)
                                 {
                                     for (int w = w0; w < w1; w++)
                                     {
                                         int imgofs = h*(dilation_h*Wi) + w*dilation_w;
-                                        inpbufC[(h*Wk + w)*CONV_NR] = (float16_t)inptrInC[imgofs];
+                                        inpbufC[(h*Wk + w)*CONV_NR] = (__fp16)inptrInC[imgofs];
                                     }
                                 }
                             }
@@ -838,7 +838,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
 #ifdef CONV_ARM_FP16
                             if (useFP16)
                             {
-                                float16_t* inpbufC = (float16_t* )inpbuf + s0;
+                                __fp16* inpbufC = (__fp16* )inpbuf + s0;
 
                                 for ( int d = d0; d < d1; d++)
                                 {
@@ -847,7 +847,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
                                         for (int w = w0; w < w1; w++)
                                         {
                                             int imgofs = d*dilation_d*HWi + h*(dilation_h*Wi) + w*dilation_w;
-                                            inpbufC[((d*Hk + h)*Wk + w)*CONV_NR] = (float16_t)inptrInC[imgofs];
+                                            inpbufC[((d*Hk + h)*Wk + w)*CONV_NR] = (__fp16)inptrInC[imgofs];
                                         }
                                     }
                                 }
@@ -889,7 +889,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
                 {
                     float* inpbuf_ki = (float* )inpbuf + k * CONV_NR * Cg + i;
 #ifdef CONV_ARM_FP16
-                    float16_t * inpbuf_ki_FP16 = (float16_t *)inpbuf + k * CONV_NR * Cg + i;
+                    __fp16 * inpbuf_ki_FP16 = (__fp16 *)inpbuf + k * CONV_NR * Cg + i;
 #endif
 
                     int zi = z0 * stride_d + dz - pad_front;
@@ -1053,7 +1053,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
                             if (useFP16)
                             {
                                 for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR, inptr_ki += inp_planesize)
-                                    inpbuf_ki_FP16[0] = (float16_t)(*inptr_ki);
+                                    inpbuf_ki_FP16[0] = (__fp16)(*inptr_ki);
                             }
                             else
 #endif
@@ -1069,7 +1069,7 @@ static inline void packInputData(char* inpbuf_task, float* inp, const int* ofsta
                         if (useFP16)
                         {
                             for (int c = 0; c < Cg; c++, inpbuf_ki_FP16 += CONV_NR)
-                                inpbuf_ki_FP16[0] = (float16_t)0.f;
+                                inpbuf_ki_FP16[0] = (__fp16)0.f;
                         }
                         else
 #endif
@@ -1257,7 +1257,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
         // works at FP 16.
         CONV_NR = CONV_NR_FP16;
         CONV_MR = CONV_MR_FP16;
-        esz = sizeof(float16_t);
+        esz = sizeof(__fp16);
     }
 #endif
 
@@ -1511,7 +1511,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
 
                             char *wptr = weights + (k0_block * DkHkWkCg + c0 * CONV_MR) * esz;
                             float *cptr = cbuf_task + stripe * CONV_NR;
-                            float16_t* cptr_f16 = (float16_t*)cbuf_task + stripe*CONV_NR;
+                            hfloat* cptr_f16 = (hfloat*)cbuf_task + stripe*CONV_NR;
                             for (int k = k0_block; k < k1_block; k += CONV_MR,
                                     wptr += DkHkWkCg * CONV_MR * esz, cptr += CONV_MR * ldc, cptr_f16 += CONV_MR * ldc)
                             {
@@ -1547,7 +1547,7 @@ void runFastConv(InputArray _input, OutputArray _output, const Ptr<FastConv>& co
 
                     size_t outofs = ((n * ngroups + g) * Kg + k0_block) * out_planesize + zyx0;
                     const float *cptr = cbuf_task;
-                    const float16_t *cptr_fp16 = (const float16_t *)cbuf_task;
+                    const hfloat *cptr_fp16 = (const hfloat *)cbuf_task;
                     float *outptr = out + outofs;
                     const float *pbptr = fusedAddPtr0 ? fusedAddPtr0 + outofs : 0;
 

diff --git a/modules/dnn/src/layers/cpu_kernels/convolution.hpp b/modules/dnn/src/layers/cpu_kernels/convolution.hpp
@@ -62,10 +62,10 @@ struct FastConv
     float* getWeights();
     float* getWeightsWino();
 
-    std::vector<float16_t> weightsBuf_FP16;
-    std::vector<float16_t> weightsWinoBuf_FP16;
-    float16_t* getWeightsFP16();
-    float16_t* getWeightsWinoFP16();
+    std::vector<hfloat> weightsBuf_FP16;
+    std::vector<hfloat> weightsWinoBuf_FP16;
+    hfloat* getWeightsFP16();
+    hfloat* getWeightsWinoFP16();
 
     int conv_type;
     int conv_dim;  // Flag for conv1d, conv2d, or conv3d.

diff --git a/modules/dnn/src/onnx/onnx_graph_simplifier.cpp b/modules/dnn/src/onnx/onnx_graph_simplifier.cpp
@@ -1742,12 +1742,12 @@ Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto)
 #endif
             const ::google::protobuf::RepeatedField<int32_t> field = tensor_proto.int32_data();
 
-            AutoBuffer<float16_t, 16> aligned_val;
+            AutoBuffer<hfloat, 16> aligned_val;
             size_t sz = tensor_proto.int32_data().size();
             aligned_val.allocate(sz);
-            float16_t* bufPtr = aligned_val.data();
+            hfloat* bufPtr = aligned_val.data();
 
-            float16_t *fp16Ptr = (float16_t *)field.data();
+            hfloat *fp16Ptr = (hfloat *)field.data();
             for (int i = 0; i < sz; i++)
             {
                 bufPtr[i] = fp16Ptr[i*2 + offset];
@@ -1759,11 +1759,11 @@ Mat getMatFromTensor(const opencv_onnx::TensorProto& tensor_proto)
             char* val = const_cast<char*>(tensor_proto.raw_data().c_str());
 #if CV_STRONG_ALIGNMENT
             // Aligned pointer is required.
-            AutoBuffer<float16_t, 16> aligned_val;
-            if (!isAligned<sizeof(float16_t)>(val))
+            AutoBuffer<hfloat, 16> aligned_val;
+            if (!isAligned<sizeof(hfloat)>(val))
             {
                 size_t sz = tensor_proto.raw_data().size();
-                aligned_val.allocate(divUp(sz, sizeof(float16_t)));
+                aligned_val.allocate(divUp(sz, sizeof(hfloat)));
                 memcpy(aligned_val.data(), val, sz);
                 val = (char*)aligned_val.data();
             }