From 7fb3666428168327216488ec33c4af28530ab77e Mon Sep 17 00:00:00 2001
From: "TONG, Zhigao" <tongzhigao@pingcap.com>
Date: Sat, 11 May 2024 18:19:09 +0800
Subject: [PATCH] expression: fix casting REAL type to STRING type (#16975)

close tikv/tikv#16974

expression: fix casting REAL type to STRING type
- unify the behavior of converting real types to strings in tidb#53129 and tikv.
- use ryu lib in tikv to implement casting real type to str type.
- `if abs(num) >= 1e15 or (num is not zero and abs(num) < 1e-15)` then use decimal exponent format

Signed-off-by: TONG, Zhigao <tongzhigao@pingcap.com>
---
 Cargo.lock                                  |   5 +-
 components/tidb_query_expr/Cargo.toml       |   1 +
 components/tidb_query_expr/src/impl_cast.rs | 475 ++++++++++++++++++--
 3 files changed, 449 insertions(+), 32 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index e164bb68e1c..70ce103f6b7 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5030,9 +5030,9 @@ dependencies = [
 
 [[package]]
 name = "ryu"
-version = "1.0.4"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed3d612bc64430efeb3f7ee6ef26d590dce0c43249217bddc62112540c7941e1"
+checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
 
 [[package]]
 name = "safemem"
@@ -6425,6 +6425,7 @@ dependencies = [
  "profiler",
  "protobuf",
  "regex",
+ "ryu",
  "safemem",
  "serde",
  "serde_json",
diff --git a/components/tidb_query_expr/Cargo.toml b/components/tidb_query_expr/Cargo.toml
index 29c25957c69..5c3fa3744bd 100644
--- a/components/tidb_query_expr/Cargo.toml
+++ b/components/tidb_query_expr/Cargo.toml
@@ -22,6 +22,7 @@ num-traits = "0.2"
 openssl = { workspace = true }
 protobuf = "2"
 regex = "1.1"
+ryu = "1.0"
 safemem = { version = "0.3", default-features = false }
 serde = "1.0"
 serde_json = "1.0"
diff --git a/components/tidb_query_expr/src/impl_cast.rs b/components/tidb_query_expr/src/impl_cast.rs
index da5da737453..20cfe93ccee 100644
--- a/components/tidb_query_expr/src/impl_cast.rs
+++ b/components/tidb_query_expr/src/impl_cast.rs
@@ -133,8 +133,11 @@ fn get_cast_fn_rpn_meta(
             }
         }
         (EvalType::Real, EvalType::Bytes) => {
-            if FieldTypeAccessor::tp(from_field_type) == FieldTypeTp::Float {
+            let tp = FieldTypeAccessor::tp(from_field_type);
+            if tp == FieldTypeTp::Float {
                 cast_float_real_as_string_fn_meta()
+            } else if tp == FieldTypeTp::Double {
+                cast_double_real_as_string_fn_meta()
             } else {
                 cast_any_as_string_fn_meta::<Real>()
             }
@@ -697,6 +700,238 @@ fn cast_uint_as_string(
     }
 }
 
+mod ryu_strconv {
+
+    pub trait FloatExpFormat: ryu::Float {
+        fn is_exp_format(&self) -> bool;
+    }
+    impl FloatExpFormat for f32 {
+        fn is_exp_format(&self) -> bool {
+            const EXP_FORMAT_BIG: f32 = 1e15;
+            const EXP_FORMAT_SMALL: f32 = 1e-15;
+
+            let abs = self.abs();
+            (abs) >= EXP_FORMAT_BIG || ((abs) != 0.0 && (abs) < EXP_FORMAT_SMALL)
+        }
+    }
+    impl FloatExpFormat for f64 {
+        fn is_exp_format(&self) -> bool {
+            const EXP_FORMAT_BIG: f64 = 1e15;
+            const EXP_FORMAT_SMALL: f64 = 1e-15;
+
+            let abs = self.abs();
+            (abs) >= EXP_FORMAT_BIG || ((abs) != 0.0 && (abs) < EXP_FORMAT_SMALL)
+        }
+    }
+
+    pub fn format_float<F: FloatExpFormat>(f: F) -> String {
+        let mut b = ryu::Buffer::new();
+        let str = {
+            let str = b.format(f);
+            if str == "NaN" {
+                return "NaN".to_owned();
+            } else if str == "-inf" {
+                return "-Inf".to_owned();
+            } else if str == "inf" {
+                return "+Inf".to_owned();
+            }
+            // remove tail zeros
+            let ss: &[u8] = str.as_bytes();
+            let mut new_str = str;
+            if ss.len() >= 2 {
+                let i = ss.len() - 2;
+                if ss[i] == b'.' && ss[i + 1] == b'0' {
+                    new_str = &str[..i];
+                }
+            };
+            new_str
+        };
+
+        let ss: &[u8] = str.as_bytes();
+        let mut exp_pos = -1i32;
+        let neg = ss[0] == b'-';
+        let (mut bg, mut ed) = (0usize, ss.len());
+        if neg {
+            bg += 1;
+        }
+
+        // return zero
+        if ed - bg == 1 && ss[bg] == b'0' {
+            return str.to_owned();
+        }
+
+        // check whether have in exp format already
+        for i in 0..ss.len() {
+            if ss[i] == b'e' {
+                exp_pos = i as i32;
+                break;
+            }
+        }
+
+        // check whether need exp format
+        let is_exp_format = f.is_exp_format();
+        if is_exp_format {
+            if exp_pos >= 0 {
+                return str.to_owned();
+            }
+        } else if exp_pos < 0 {
+            return str.to_owned();
+        }
+
+        let mut exp10 = 0i32;
+
+        if exp_pos >= 0 {
+            exp10 = str[exp_pos as usize + 1..].parse().unwrap();
+            ed = exp_pos as usize;
+        }
+
+        let (mut int_bg, mut int_ed) = (bg, ed);
+        let (mut float_bg, float_ed) = (ed, ed);
+
+        for i in bg..ed {
+            if ss[i] == b'.' {
+                int_ed = i;
+                float_bg = i + 1;
+                break;
+            }
+        }
+
+        if int_ed - int_bg > 1 {
+            exp10 += (int_ed - (int_bg + 1)) as i32;
+        } else if ss[int_bg] == b'0' {
+            int_bg += 1;
+
+            let mut new_float_bg = float_bg;
+            for i in float_bg..float_ed {
+                exp10 -= 1;
+                if ss[i] != b'0' {
+                    new_float_bg = i;
+                    break;
+                }
+            }
+            float_bg = new_float_bg;
+        }
+
+        {
+            let mut t = Buff::new();
+            if neg {
+                t.put_neg();
+            }
+
+            if is_exp_format {
+                if int_ed > int_bg {
+                    t.put(ss[int_bg]);
+                    int_bg += 1;
+                    t.put_dot();
+                    t.put_slice(&ss[int_bg..int_ed]);
+                    t.put_slice(&ss[float_bg..float_ed]);
+                } else {
+                    t.put(ss[float_bg]);
+                    float_bg += 1;
+                    t.put_dot();
+                    t.put_slice(&ss[float_bg..float_ed]);
+                }
+                t.trim_tail_zero();
+                t.trim();
+                t.put_exp10(exp10);
+            } else if exp10 < 0 {
+                exp10 = -exp10;
+                t.put_zero();
+                t.put_dot();
+                exp10 -= 1;
+
+                while exp10 != 0 {
+                    t.put_zero();
+                    exp10 -= 1;
+                }
+                t.put_slice(&ss[int_bg..int_ed]);
+                t.put_slice(&ss[float_bg..float_ed]);
+            } else {
+                debug_assert_eq!(int_ed - int_bg, 1);
+                t.put_slice(&ss[int_bg..int_ed]);
+                if exp10 < (float_ed - float_bg) as i32 {
+                    t.put_slice(&ss[float_bg..float_bg + exp10 as usize]);
+                    t.put_dot();
+                    float_bg += exp10 as usize;
+                    t.put_slice(&ss[float_bg..float_ed]);
+                } else {
+                    t.put_slice(&ss[float_bg..float_ed]);
+                    exp10 -= (float_ed - float_bg) as i32;
+                    while exp10 != 0 {
+                        t.put_zero();
+                        exp10 -= 1;
+                    }
+                }
+            }
+
+            t.into_string()
+        }
+    }
+
+    struct Buff {
+        buff: [u8; 35],
+        size: usize,
+    }
+
+    impl Buff {
+        fn new() -> Self {
+            Self {
+                buff: [0u8; 35],
+                size: 0,
+            }
+        }
+        fn trim(&mut self) {
+            if self.buff[self.size - 1] == b'.' {
+                self.size -= 1;
+            }
+        }
+        fn trim_tail_zero(&mut self) {
+            while self.size > 0 && self.buff[self.size - 1] == b'0' {
+                self.size -= 1;
+            }
+        }
+        fn put_slice(&mut self, s: &[u8]) {
+            self.buff[self.size..self.size + s.len()].copy_from_slice(s);
+            self.size += s.len();
+        }
+        fn put(&mut self, c: u8) {
+            self.buff[self.size] = c;
+            self.size += 1;
+        }
+        fn put_zero(&mut self) {
+            self.put(b'0')
+        }
+        fn put_dot(&mut self) {
+            self.put(b'.')
+        }
+        fn put_neg(&mut self) {
+            self.put(b'-')
+        }
+        fn into_string(self) -> String {
+            String::from_utf8(self.buff[..self.size].to_vec()).unwrap()
+        }
+        fn put_exp10(&mut self, mut e10: i32) {
+            self.put(b'e');
+            let mut str_e10: [u8; 5] = [0; 5];
+            let mut str_e10_size = 0;
+            if e10 < 0 {
+                e10 = -e10;
+                self.put_neg();
+            }
+            while e10 != 0 {
+                str_e10[str_e10_size] = (e10 % 10 + (b'0' as i32)) as u8;
+                str_e10_size += 1;
+                e10 /= 10;
+            }
+            let mut p = str_e10_size as i32 - 1;
+            while p >= 0 {
+                self.put(str_e10[p as usize]);
+                p -= 1;
+            }
+        }
+    }
+}
+
 #[rpn_fn(nullable, capture = [ctx, extra])]
 #[inline]
 fn cast_float_real_as_string(
@@ -708,7 +943,24 @@ fn cast_float_real_as_string(
         None => Ok(None),
         Some(val) => {
             let val = val.into_inner() as f32;
-            let val = val.to_string().into_bytes();
+            let val = ryu_strconv::format_float(val).into_bytes();
+            cast_as_string_helper(ctx, extra, val)
+        }
+    }
+}
+
+#[rpn_fn(nullable, capture = [ctx, extra])]
+#[inline]
+fn cast_double_real_as_string(
+    ctx: &mut EvalContext,
+    extra: &RpnFnCallExtra,
+    val: Option<&Real>,
+) -> Result<Option<Bytes>> {
+    match val {
+        None => Ok(None),
+        Some(val) => {
+            let val = val.into_inner();
+            let val = ryu_strconv::format_float(val).into_bytes();
             cast_as_string_helper(ctx, extra, val)
         }
     }
@@ -4357,37 +4609,200 @@ mod tests {
     }
 
     #[test]
-    fn test_float_real_as_string() {
-        test_none_with_ctx_and_extra(cast_float_real_as_string);
+    fn test_real_as_string() {
+        {
+            test_none_with_ctx_and_extra(cast_float_real_as_string);
 
-        let cs: Vec<(f32, Vec<u8>, String)> = vec![
-            (
-                f32::MAX,
-                f32::MAX.to_string().into_bytes(),
-                f32::MAX.to_string(),
-            ),
-            (1.0f32, 1.0f32.to_string().into_bytes(), 1.0f32.to_string()),
-            (
-                1.1113f32,
-                1.1113f32.to_string().into_bytes(),
-                1.1113f32.to_string(),
-            ),
-            (0.1f32, 0.1f32.to_string().into_bytes(), 0.1f32.to_string()),
-        ];
+            let cs: Vec<(f32, String)> = vec![
+                (f32::NAN, "NaN".to_string()),
+                (f32::INFINITY, "+Inf".to_string()),
+                (-f32::INFINITY, "-Inf".to_string()),
+            ];
 
-        let ref_cs = helper_get_cs_ref(&cs);
+            for (val, s) in &cs {
+                assert_eq!(*s, ryu_strconv::format_float(*val));
+            }
 
-        test_as_string_helper(
-            ref_cs,
-            |ctx, extra, val| {
-                cast_float_real_as_string(
-                    ctx,
-                    extra,
-                    val.map(|x| Real::new(f64::from(*x)).unwrap()).as_ref(),
-                )
-            },
-            "cast_float_real_as_string",
-        );
+            let cs: Vec<(f32, String)> = vec![
+                (f32::NAN, "NaN".to_string()),
+                (f32::INFINITY, "inf".to_string()),
+                (-f32::INFINITY, "-inf".to_string()),
+            ];
+
+            for (val, s) in &cs {
+                let mut b = ryu::Buffer::new();
+                assert_eq!(*s, b.format(*val));
+            }
+
+            assert_eq!(
+                4474.7812f64.to_string(),
+                ryu_strconv::format_float(4474.7812f64)
+            );
+
+            assert_eq!(4474.7812f32.to_string(), "4474.7813".to_string());
+
+            assert_eq!(
+                "4474.7812".to_string(),
+                ryu_strconv::format_float(4474.7812f32)
+            );
+
+            let cs: Vec<(f32, Vec<u8>, String)> = vec![
+                (1e15, "1e15".to_string().into_bytes(), "1e15".to_string()),
+                (-1e15, "-1e15".to_string().into_bytes(), "-1e15".to_string()),
+                (
+                    9.99999e14,
+                    "999999000000000".to_string().into_bytes(),
+                    "999999000000000".to_string(),
+                ),
+                (
+                    -9.99999e14,
+                    "-999999000000000".to_string().into_bytes(),
+                    "-999999000000000".to_string(),
+                ),
+                (
+                    1e15 - 1.0,
+                    "1e15".to_string().into_bytes(),
+                    "1e15".to_string(),
+                ),
+                (
+                    f32::MIN,
+                    "-3.4028235e38".to_string().into_bytes(),
+                    "-3.4028235e38".to_string(),
+                ),
+                (
+                    f32::MAX,
+                    "3.4028235e38".to_string().into_bytes(),
+                    "3.4028235e38".to_string(),
+                ),
+                (
+                    f32::MIN_POSITIVE,
+                    "1.1754944e-38".to_string().into_bytes(),
+                    "1.1754944e-38".to_string(),
+                ),
+                (-00000.0, "-0".to_string().into_bytes(), "-0".to_string()),
+                (00000.0, "0".to_string().into_bytes(), "0".to_string()),
+                (1.0f32, "1".to_string().into_bytes(), "1".to_string()),
+                (
+                    -123456789123000.0f32,
+                    "-123456790000000".to_string().into_bytes(),
+                    "-123456790000000".to_string(),
+                ),
+                (
+                    1e-15f32,
+                    "0.000000000000001".to_string().into_bytes(),
+                    "0.000000000000001".to_string(),
+                ),
+                (
+                    9.9999e-16f32,
+                    "9.9999e-16".to_string().into_bytes(),
+                    "9.9999e-16".to_string(),
+                ),
+                (
+                    1.23456789123000e-9f64 as f32,
+                    "0.0000000012345679".to_string().into_bytes(),
+                    "0.0000000012345679".to_string(),
+                ),
+            ];
+
+            let ref_cs = helper_get_cs_ref(&cs);
+
+            test_as_string_helper(
+                ref_cs,
+                |ctx, extra, val| {
+                    cast_float_real_as_string(
+                        ctx,
+                        extra,
+                        val.map(|x| Real::new((*x).into()).unwrap()).as_ref(),
+                    )
+                },
+                "cast_float_real_as_string",
+            );
+        }
+        {
+            test_none_with_ctx_and_extra(cast_double_real_as_string);
+
+            let cs: Vec<(f64, String)> = vec![
+                (f64::NAN, "NaN".to_string()),
+                (f64::INFINITY, "+Inf".to_string()),
+                (-f64::INFINITY, "-Inf".to_string()),
+            ];
+
+            for (val, s) in &cs {
+                assert_eq!(*s, ryu_strconv::format_float(*val));
+            }
+
+            let cs: Vec<(f64, Vec<u8>, String)> = vec![
+                (1e15, "1e15".to_string().into_bytes(), "1e15".to_string()),
+                (-1e15, "-1e15".to_string().into_bytes(), "-1e15".to_string()),
+                (
+                    9.99999e14,
+                    "999999000000000".to_string().into_bytes(),
+                    "999999000000000".to_string(),
+                ),
+                (
+                    -9.99999e14,
+                    "-999999000000000".to_string().into_bytes(),
+                    "-999999000000000".to_string(),
+                ),
+                (
+                    1e15 - 1.0,
+                    "999999999999999".to_string().into_bytes(),
+                    "999999999999999".to_string(),
+                ),
+                (
+                    f64::MIN,
+                    "-1.7976931348623157e308".to_string().into_bytes(),
+                    "-1.7976931348623157e308".to_string(),
+                ),
+                (
+                    f64::MAX,
+                    "1.7976931348623157e308".to_string().into_bytes(),
+                    "1.7976931348623157e308".to_string(),
+                ),
+                (
+                    f64::MIN_POSITIVE,
+                    "2.2250738585072014e-308".to_string().into_bytes(),
+                    "2.2250738585072014e-308".to_string(),
+                ),
+                (-00000.0, "-0".to_string().into_bytes(), "-0".to_string()),
+                (00000.0, "0".to_string().into_bytes(), "0".to_string()),
+                (1.0, "1".to_string().into_bytes(), "1".to_string()),
+                (
+                    -123456789123000.0,
+                    "-123456789123000".to_string().into_bytes(),
+                    "-123456789123000".to_string(),
+                ),
+                (
+                    1e-15,
+                    "0.000000000000001".to_string().into_bytes(),
+                    "0.000000000000001".to_string(),
+                ),
+                (
+                    9.9999e-16,
+                    "9.9999e-16".to_string().into_bytes(),
+                    "9.9999e-16".to_string(),
+                ),
+                (
+                    1.23456789123000e-9,
+                    "0.00000000123456789123".to_string().into_bytes(),
+                    "0.00000000123456789123".to_string(),
+                ),
+            ];
+
+            let ref_cs = helper_get_cs_ref(&cs);
+
+            test_as_string_helper(
+                ref_cs,
+                |ctx, extra, val| {
+                    cast_double_real_as_string(
+                        ctx,
+                        extra,
+                        val.map(|x| Real::new(*x).unwrap()).as_ref(),
+                    )
+                },
+                "cast_double_real_as_string",
+            );
+        }
     }
 
     #[test]