Skip to content
Browse files

Flatten stdlib Genlex and Marshal

  • Loading branch information...
1 parent aa007e1 commit 9671dac8287edfddcae675b755f4ba4ea5301e55 @thelema thelema committed May 16, 2012
Showing with 234 additions and 30 deletions.
  1. +5 −6 src/batLexing.ml
  2. +153 −1 src/batLexing.mli
  3. +17 −14 src/batMarshal.ml
  4. +57 −7 src/batMarshal.mli
  5. +2 −2 src/batteries.ml
View
11 src/batLexing.ml
@@ -20,10 +20,9 @@
*)
- open BatIO
- open Lexing
- let from_input inp =
- from_function (fun s n -> try input inp s 0 n with No_more_input -> 0)
-
- let from_channel = from_input
+open BatIO
+include Lexing
+let from_input inp =
+ from_function (fun s n -> try input inp s 0 n with No_more_input -> 0)
+let from_channel = from_input
View
154 src/batLexing.mli
@@ -18,7 +18,7 @@
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*)
-open Lexing
+
(** Simple lexing using ocaml conventions
This module extends Stdlib's
@@ -28,12 +28,164 @@ open Lexing
*)
+(** The run-time library for lexers generated by [ocamllex]. *)
+
+(** {6 Positions} *)
+
+type position = Lexing.position = {
+ pos_fname : string;
+ pos_lnum : int;
+ pos_bol : int;
+ pos_cnum : int;
+}
+(** A value of type [position] describes a point in a source file.
+ [pos_fname] is the file name; [pos_lnum] is the line number;
+ [pos_bol] is the offset of the beginning of the line (number
+ of characters between the beginning of the file and the beginning
+ of the line); [pos_cnum] is the offset of the position (number of
+ characters between the beginning of the file and the position).
+
+ See the documentation of type [lexbuf] for information about
+ how the lexing engine will manage positions.
+ *)
+
+val dummy_pos : position;;
+(** A value of type [position], guaranteed to be different from any
+ valid position.
+ *)
+
+
+(** {6 Lexer buffers} *)
+
+
+type lexbuf = Lexing.lexbuf =
+ { refill_buff : lexbuf -> unit;
+ mutable lex_buffer : string;
+ mutable lex_buffer_len : int;
+ mutable lex_abs_pos : int;
+ mutable lex_start_pos : int;
+ mutable lex_curr_pos : int;
+ mutable lex_last_pos : int;
+ mutable lex_last_action : int;
+ mutable lex_eof_reached : bool;
+ mutable lex_mem : int array;
+ mutable lex_start_p : position;
+ mutable lex_curr_p : position;
+ }
+(** The type of lexer buffers. A lexer buffer is the argument passed
+ to the scanning functions defined by the generated scanners.
+ The lexer buffer holds the current state of the scanner, plus
+ a function to refill the buffer from the input.
+
+ At each token, the lexing engine will copy [lex_curr_p] to
+ [lex_start_p], then change the [pos_cnum] field
+ of [lex_curr_p] by updating it with the number of characters read
+ since the start of the [lexbuf]. The other fields are left
+ unchanged by the lexing engine. In order to keep them
+ accurate, they must be initialised before the first use of the
+ lexbuf, and updated by the relevant lexer actions (i.e. at each
+ end of line -- see also [new_line]).
+ *)
val from_input : BatIO.input -> lexbuf
(** Create a lexer buffer on the given input
[Lexing.from_input inp] returns a lexer buffer which reads
from the input [inp], at the current reading position. *)
+val from_string : string -> lexbuf
+(** Create a lexer buffer which reads from
+ the given string. Reading starts from the first character in
+ the string. An end-of-input condition is generated when the
+ end of the string is reached. *)
+
+val from_function : (string -> int -> int) -> lexbuf
+(** Create a lexer buffer with the given function as its reading method.
+ When the scanner needs more characters, it will call the given
+ function, giving it a character string [s] and a character
+ count [n]. The function should put [n] characters or less in [s],
+ starting at character number 0, and return the number of characters
+ provided. A return value of 0 means end of input. *)
+
+
+(** {6 Functions for lexer semantic actions} *)
+
+
+(** The following functions can be called from the semantic actions
+ of lexer definitions (the ML code enclosed in braces that
+ computes the value returned by lexing functions). They give
+ access to the character string matched by the regular expression
+ associated with the semantic action. These functions must be
+ applied to the argument [lexbuf], which, in the code generated by
+ [ocamllex], is bound to the lexer buffer passed to the parsing
+ function. *)
+
+val lexeme : lexbuf -> string
+(** [Lexing.lexeme lexbuf] returns the string matched by
+ the regular expression. *)
+
+val lexeme_char : lexbuf -> int -> char
+(** [Lexing.lexeme_char lexbuf i] returns character number [i] in
+ the matched string. *)
+
+val lexeme_start : lexbuf -> int
+(** [Lexing.lexeme_start lexbuf] returns the offset in the
+ input stream of the first character of the matched string.
+ The first character of the stream has offset 0. *)
+
+val lexeme_end : lexbuf -> int
+(** [Lexing.lexeme_end lexbuf] returns the offset in the input stream
+ of the character following the last character of the matched
+ string. The first character of the stream has offset 0. *)
+
+val lexeme_start_p : lexbuf -> position
+(** Like [lexeme_start], but return a complete [position] instead
+ of an offset. *)
+
+val lexeme_end_p : lexbuf -> position
+(** Like [lexeme_end], but return a complete [position] instead
+ of an offset. *)
+
+val new_line : lexbuf -> unit
+(** Update the [lex_curr_p] field of the lexbuf to reflect the start
+ of a new line. You can call this function in the semantic action
+ of the rule that matches the end-of-line character.
+ @since 3.11.0
+*)
+
+(** {6 Miscellaneous functions} *)
+
+val flush_input : lexbuf -> unit
+(** Discard the contents of the buffer and reset the current
+ position to 0. The next use of the lexbuf will trigger a
+ refill. *)
+
+(**/**)
+
+(** {6 } *)
+
+(** The following definitions are used by the generated scanners only.
+ They are not intended to be used by user programs. *)
+
+val sub_lexeme : lexbuf -> int -> int -> string
+val sub_lexeme_opt : lexbuf -> int -> int -> string option
+val sub_lexeme_char : lexbuf -> int -> char
+val sub_lexeme_char_opt : lexbuf -> int -> char option
+
+type lex_tables = Lexing.lex_tables =
+ { lex_base : string;
+ lex_backtrk : string;
+ lex_default : string;
+ lex_trans : string;
+ lex_check : string;
+ lex_base_code : string;
+ lex_backtrk_code : string;
+ lex_default_code : string;
+ lex_trans_code : string;
+ lex_check_code : string;
+ lex_code: string;}
+
+val engine : lex_tables -> int -> lexbuf -> int
+val new_engine : lex_tables -> int -> lexbuf -> int
(** {6 Deprecated}*)
View
31 src/batMarshal.ml
@@ -20,21 +20,24 @@
*)
-open Marshal
+include Marshal
- let output out ?(sharing=true) ?(closures=false) v =
- let buf = to_string v ((if sharing then [] else [No_sharing]) @ (if closures then [Closures] else [])) in
- BatInnerIO.nwrite out buf
+let output out ?(sharing=true) ?(closures=false) v =
+ let flags = match sharing, closures with
+ | true, false -> []
+ | true, true -> [Closures]
+ | false, false -> [No_sharing]
+ | false, true -> [No_sharing; Closures]
+ in
+ let buf = to_string v flags in
+ BatInnerIO.nwrite out buf
- let input inp =
- let header = BatInnerIO.really_nread inp header_size in
- let size = data_size header 0 in
- from_string (header ^ (BatInnerIO.really_nread inp size)) 0
+let input inp =
+ let header = BatInnerIO.really_nread inp header_size in
+ let size = data_size header 0 in
+ from_string (header ^ (BatInnerIO.really_nread inp size)) 0
- let to_channel out v flags =
- output out ~sharing:(not (List.mem No_sharing flags))
- ~closures:(List.mem Closures flags)
- v
-
- let from_channel = input
+let to_channel out v flags =
+ BatInnerIO.nwrite out (to_string v flags)
+let from_channel = input
View
64 src/batMarshal.mli
@@ -51,16 +51,16 @@
mode will cause unmarshaling errors on platforms where text
channels behave differently than binary channels, e.g. Windows.
- This module extends Stdlib's
- {{:http://caml.inria.fr/pub/docs/manual-ocaml/libref/Marshal.html}Marshal}
- module, go there for documentation on the rest of the functions
- and types.
-
@author Xavier Leroy (base module)
@author David Teller
*)
-open Marshal
+
+type extern_flags = Marshal.extern_flags =
+ No_sharing (** Don't preserve sharing *)
+ | Closures (** Send function closures *)
+(** The flags to the [Marshal.to_*] functions below. *)
+
val output: _ BatInnerIO.output -> ?sharing:bool -> ?closures:bool -> 'a -> unit
(** [output out v] writes the representation of [v] on [chan].
@@ -87,16 +87,66 @@ val output: _ BatInnerIO.output -> ?sharing:bool -> ?closures:bool -> 'a -> unit
un-marshaling time, using an MD5 digest of the code transmitted
along with the code position.) *)
+external to_string :
+ 'a -> extern_flags list -> string = "caml_output_value_to_string"
+(** [Marshal.to_string v flags] returns a string containing
+ the representation of [v] as a sequence of bytes.
+ The [flags] argument has the same meaning as for
+ {!Marshal.to_channel}. *)
+
+val to_buffer : string -> int -> int -> 'a -> extern_flags list -> int
+(** [Marshal.to_buffer buff ofs len v flags] marshals the value [v],
+ storing its byte representation in the string [buff],
+ starting at character number [ofs], and writing at most
+ [len] characters. It returns the number of characters
+ actually written to the string. If the byte representation
+ of [v] does not fit in [len] characters, the exception [Failure]
+ is raised. *)
+
val input : BatInnerIO.input -> 'a
(** [input inp] reads from [inp] the
byte representation of a structured value, as produced by
one of the [Marshal.to_*] functions, and reconstructs and
returns the corresponding value.*)
+val from_string : string -> int -> 'a
+(** [Marshal.from_string buff ofs] unmarshals a structured value
+ like {!Marshal.from_channel} does, except that the byte
+ representation is not read from a channel, but taken from
+ the string [buff], starting at position [ofs]. *)
+
+val header_size : int
+(** The bytes representing a marshaled value are composed of
+ a fixed-size header and a variable-sized data part,
+ whose size can be determined from the header.
+ {!Marshal.header_size} is the size, in characters, of the header.
+ {!Marshal.data_size}[ buff ofs] is the size, in characters,
+ of the data part, assuming a valid header is stored in
+ [buff] starting at position [ofs].
+ Finally, {!Marshal.total_size}[ buff ofs] is the total size,
+ in characters, of the marshaled value.
+ Both {!Marshal.data_size} and {!Marshal.total_size} raise [Failure]
+ if [buff], [ofs] does not contain a valid header.
+
+ To read the byte representation of a marshaled value into
+ a string buffer, the program needs to read first
+ {!Marshal.header_size} characters into the buffer,
+ then determine the length of the remainder of the
+ representation using {!Marshal.data_size},
+ make sure the buffer is large enough to hold the remaining
+ data, then read it, and finally call {!Marshal.from_string}
+ to unmarshal the value. *)
+
+val data_size : string -> int -> int
+(** See {!Marshal.header_size}.*)
+
+val total_size : string -> int -> int
+(** See {!Marshal.header_size}.*)
+
+(** {6 Deprecated} *)
val to_channel : _ BatInnerIO.output -> 'a -> extern_flags list -> unit
(** @deprecated Use {!output} instead *)
val from_channel : BatInnerIO.input -> 'a
(** @deprecated Use {!input} instead *)
-
View
4 src/batteries.ml
@@ -64,11 +64,11 @@ module Hashtbl = BatHashtbl
module Int32 = BatInt32
module Int64 = BatInt64
(* Lazy *)
-module Lexing = struct include Lexing include BatLexing end
+module Lexing = BatLexing
module List = BatList
(* ListLabels *)
module Map = BatMap
-module Marshal = struct include Marshal include BatMarshal end
+module Marshal = BatMarshal
(* MoreLabels *)
module Nativeint = BatNativeint
module Oo = struct include Oo include BatOo end

0 comments on commit 9671dac

Please sign in to comment.
Something went wrong with that request. Please try again.