Add big and little endian parsers

ocaml-multicore · Jan 6, 2023 · 9998c86 · 9998c86
1 parent 5407ed6
commit 9998c86
Show file tree

Hide file tree

Showing 4 changed files with 230 additions and 0 deletions.
diff --git a/fuzz/fuzz_buf_read.ml b/fuzz/fuzz_buf_read.ml
@@ -136,6 +136,64 @@ module Model = struct
     match line t with
     | line -> line :: lines t
     | exception End_of_file -> []
+
+  module BE = struct
+    let uint16 t = String.get_uint16_be (take 2 t) 0
+
+    let uint32 t = String.get_int32_be (take 4 t) 0
+
+    let uint48 t =
+      let s = take 6 t in
+      let upper_16 = String.get_uint16_be s 0 |> Int64.of_int in
+      let middle_16 = String.get_uint16_be s 2 |> Int64.of_int in
+      let lower_16 = String.get_uint16_be s 4 |> Int64.of_int in
+      Int64.(
+        add 
+          (shift_left upper_16 32)
+        (add
+          (shift_left middle_16 16)
+          (lower_16))
+      )
+
+    let uint64 t = String.get_int64_be (take 8 t) 0
+
+    let float t =
+      Int32.float_of_bits (
+        String.get_int32_be (take 4 t) 0)
+
+    let double t =
+      Int64.float_of_bits (
+        String.get_int64_be (take 8 t) 0)
+  end
+
+  module LE = struct
+    let uint16 t = String.get_uint16_le (take 2 t) 0
+
+    let uint32 t = String.get_int32_le (take 4 t) 0
+
+    let uint48 t =
+      let s = take 6 t in
+      let lower_16 = String.get_uint16_le s 0 |> Int64.of_int in
+      let middle_16 = String.get_uint16_le s 2 |> Int64.of_int in
+      let upper_16 = String.get_uint16_le s 4 |> Int64.of_int in
+      Int64.(
+        add 
+          (shift_left upper_16 32)
+        (add
+          (shift_left middle_16 16)
+          (lower_16))
+      )
+
+    let uint64 t = String.get_int64_le (take 8 t) 0
+
+    let float t =
+      Int32.float_of_bits (
+        String.get_int32_le (take 4 t) 0)
+
+    let double t =
+      Int64.float_of_bits (
+        String.get_int64_le (take 8 t) 0)
+  end
 end
 
 type op = Op : 'a Crowbar.printer * 'a Buf_read.parser * (Model.t -> 'a) -> op
@@ -162,6 +220,18 @@ let op =
     "skip", Crowbar.(map [int]) (fun n -> Op (unit, Buf_read.skip n, Model.skip n));
     "end_of_input", Crowbar.const @@ Op (unit, Buf_read.end_of_input, Model.end_of_input);
     "lines", Crowbar.const @@ Op (Fmt.Dump.(list string), (Buf_read.(map List.of_seq lines)), Model.lines);
+    "be_uint16", Crowbar.const @@ Op (Fmt.int, (Buf_read.BE.uint16), Model.BE.uint16);
+    "be_uint32", Crowbar.const @@ Op (Fmt.int32, (Buf_read.BE.uint32), Model.BE.uint32);
+    "be_uint48", Crowbar.const @@ Op (Fmt.int64, (Buf_read.BE.uint48), Model.BE.uint48);
+    "be_uint64", Crowbar.const @@ Op (Fmt.int64, (Buf_read.BE.uint64), Model.BE.uint64);
+    "be_float", Crowbar.const @@ Op (Fmt.float, (Buf_read.BE.float), Model.BE.float);
+    "be_double", Crowbar.const @@ Op (Fmt.float, (Buf_read.BE.double), Model.BE.double);
+    "le_uint16", Crowbar.const @@ Op (Fmt.int, (Buf_read.LE.uint16), Model.LE.uint16);
+    "le_uint32", Crowbar.const @@ Op (Fmt.int32, (Buf_read.LE.uint32), Model.LE.uint32);
+    "le_uint48", Crowbar.const @@ Op (Fmt.int64, (Buf_read.LE.uint48), Model.LE.uint48);
+    "le_uint64", Crowbar.const @@ Op (Fmt.int64, (Buf_read.LE.uint64), Model.LE.uint64);
+    "le_float", Crowbar.const @@ Op (Fmt.float, (Buf_read.LE.float), Model.LE.float);
+    "le_double", Crowbar.const @@ Op (Fmt.float, (Buf_read.LE.double), Model.LE.double);
   ]
 
 let catch f x =

diff --git a/lib_eio/buf_read.ml b/lib_eio/buf_read.ml
@@ -143,6 +143,92 @@ let as_flow t =
 let get t i =
   Bigarray.Array1.get t.buf (t.pos + i)
 
+module BE = struct
+  let uint16 t = 
+    ensure t 2;
+    let data = Bigstringaf.get_int16_be t.buf t.pos in
+    consume t 2;
+    data
+
+  let uint32 t = 
+    ensure t 4;
+    let data = Bigstringaf.get_int32_be t.buf t.pos in
+    consume t 4;
+    data
+
+  let uint48 t = 
+    ensure t 6;
+    let upper_32 = Bigstringaf.get_int32_be t.buf t.pos |> Int64.of_int32 |> Int64.logand 0xffffffffL in
+    let lower_16 = Bigstringaf.get_int16_be t.buf (t.pos + 4) |> Int64.of_int in
+    consume t 6;
+    Int64.(
+      logor 
+        (lower_16)
+        (shift_left upper_32 16) 
+    )
+
+  let uint64 t =
+    ensure t 8;
+    let data = Bigstringaf.get_int64_be t.buf t.pos in
+    consume t 8;
+    data
+
+  let float t =
+    ensure t 4;
+    let data = Bigstringaf.unsafe_get_int32_be t.buf t.pos in
+    consume t 4;
+    Int32.float_of_bits data
+
+  let double t =
+    ensure t 8;
+    let data = Bigstringaf.unsafe_get_int64_be t.buf t.pos in
+    consume t 8;
+    Int64.float_of_bits data
+end
+
+module LE = struct
+  let uint16 t = 
+    ensure t 2;
+    let data = Bigstringaf.get_int16_le t.buf t.pos in
+    consume t 2;
+    data
+
+  let uint32 t = 
+    ensure t 4;
+    let data = Bigstringaf.get_int32_le t.buf t.pos in
+    consume t 4;
+    data
+
+  let uint48 t = 
+    ensure t 6;
+    let lower_32 = Bigstringaf.get_int32_le t.buf t.pos |> Int64.of_int32 |> Int64.logand 0xffffffffL in
+    let upper_16 = Bigstringaf.get_int16_le t.buf (t.pos + 4) |> Int64.of_int in
+    consume t 6;
+    Int64.(
+      logor 
+        (shift_left upper_16 32)
+        lower_32
+    )
+
+  let uint64 t =
+    ensure t 8;
+    let data = Bigstringaf.get_int64_le t.buf t.pos in
+    consume t 8;
+    data
+
+  let float t =
+    ensure t 4;
+    let data = Bigstringaf.unsafe_get_int32_le t.buf t.pos in
+    consume t 4;
+    Int32.float_of_bits data
+
+  let double t =
+    ensure t 8;
+    let data = Bigstringaf.unsafe_get_int64_le t.buf t.pos in
+    consume t 8;
+    Int64.float_of_bits data
+end
+
 let char c t =
   ensure t 1;
   let c2 = get t 0 in

diff --git a/lib_eio/buf_read.mli b/lib_eio/buf_read.mli
@@ -107,6 +107,48 @@ val string : string -> unit parser
 
     @raise Failure if [s] is not a prefix of the stream. *)
 
+(** Big endian parsers *)
+module BE : sig
+  val uint16 : int parser
+  (** [uint16] parses the next 2 bytes as the lower 16 bits of an [int] in big-endian byte order *)
+
+  val uint32 : int32 parser
+  (** [uint32] parses the next 4 bytes as an [int32] in big-endian byte order *)
+
+  val uint48 : int64 parser
+  (** [uint48] parses the next 6 bytes as a 48-bit unsigned big-endian integer *)
+
+  val uint64 : int64 parser
+  (** [uint64] parses the next 8 bytes as an [int64] in big-endian byte order *)
+
+  val float : float parser
+  (** [float] parses the next 4 bytes as a [float] in big-endian byte order *)
+
+  val double : float parser
+  (** [double] parses the next 8 bytes as a [float] in big-endian byte order *)
+end
+
+(** Little endian parsers *)
+module LE : sig
+  val uint16 : int parser
+  (** [uint16] parses the next 2 bytes as the lower 16 bits of an [int] in little-endian byte order *)
+
+  val uint32 : int32 parser
+  (** [uint32] parses the next 4 bytes as an [int32] in little-endian byte order *)
+
+  val uint48 : int64 parser
+  (** [uint48] parses the next 6 bytes as a 48-bit unsigned big-endian integer *)
+
+  val uint64 : int64 parser
+  (** [uint64] parses the next 8 bytes as an [int64] in little-endian byte order *)
+
+  val float : float parser
+  (** [float] parses the next 4 bytes as a [float] in little-endian byte order *)
+
+  val double : float parser
+  (** [double] parses the next 8 bytes as a [float] in little-endian byte order *)
+end
+
 val take : int -> string parser
 (** [take n] takes exactly [n] bytes from the input. *)
 

diff --git a/tests/buf_reader.md b/tests/buf_reader.md
@@ -393,6 +393,38 @@ Exception: Failure "skip_while1".
 - : string = "bbbccc"
 ```
 
+## Big Endian
+```ocaml
+# R.parse_string_exn R.BE.uint16 "\128\001" |> Printf.sprintf "0x%x";;
+- : string = "0x8001"
+# R.parse_string_exn R.BE.uint32 "\128\064\032\001" |> Printf.sprintf "0x%lx";;
+- : string = "0x80402001"
+# R.parse_string_exn R.BE.uint48 "\128\064\032\016\008\001" |> Printf.sprintf "0x%Lx";;
+- : string = "0x804020100801"
+# R.parse_string_exn R.BE.uint64 "\128\064\032\016\008\004\002\001" |> Printf.sprintf "0x%Lx";;
+- : string = "0x8040201008040201"
+# R.parse_string_exn R.BE.float "\128\064\032\001" |> Printf.sprintf "0x%e";;
+- : string = "0x-5.888953e-39"
+# R.parse_string_exn R.BE.double "\128\064\032\016\008\004\002\001" |> Printf.sprintf "0x%e";;
+- : string = "0x-1.793993e-307"
+```
+
+## Little Endian
+```ocaml
+# R.parse_string_exn R.LE.uint16 "\128\001" |> Printf.sprintf "0x%x";;
+- : string = "0x180"
+# R.parse_string_exn R.LE.uint32 "\128\064\032\001" |> Printf.sprintf "0x%lx";;
+- : string = "0x1204080"
+# R.parse_string_exn R.LE.uint48 "\128\064\032\016\008\001" |> Printf.sprintf "0x%Lx";;
+- : string = "0x10810204080"
+# R.parse_string_exn R.LE.uint64 "\128\064\032\016\008\004\002\001" |> Printf.sprintf "0x%Lx";;
+- : string = "0x102040810204080"
+# R.parse_string_exn R.LE.float "\128\064\032\001" |> Printf.sprintf "0x%e";;
+- : string = "0x2.943364e-38"
+# R.parse_string_exn R.LE.double "\128\064\032\016\008\004\002\001" |> Printf.sprintf "0x%e";;
+- : string = "0x8.209689e-304"
+```
+
 ## Take all
 
 ```ocaml