Skip to content

Commit

Permalink
Flatten stdlib Genlex into BatGenlex
Browse files Browse the repository at this point in the history
  • Loading branch information
thelema committed May 16, 2012
1 parent 8e5373c commit aa007e1
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 65 deletions.
2 changes: 2 additions & 0 deletions src/batFormat.ml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
open BatIO
include Format

(* internal functions *)

let output_of out = fun s i o -> ignore (really_output out s i o)
let flush_of out = BatInnerIO.get_flush out
let newline_of out = fun () -> BatInnerIO.write out '\n'
Expand Down
23 changes: 11 additions & 12 deletions src/batGenlex.ml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
open BatPervasives
open BatParserCo
open BatCharParser
open Genlex

include Genlex


let string_of_token = function
Expand Down Expand Up @@ -217,7 +218,7 @@ let to_enum_filter kwd_table =
in
fun input -> BatEnum.from_while (fun _count -> next_token {position = 0; content = input})


let to_stream_filter (kwd_table:t) (x:char Stream.t) : token Stream.t =
(BatStream.of_enum (to_enum_filter kwd_table (BatStream.enum x)))

Expand Down Expand Up @@ -323,15 +324,15 @@ struct
let char =
if case_sensitive then char
else case_char

let string =
if case_sensitive then string
else case_string

let adapt_case =
if case_sensitive then identity
else String.lowercase

let string_compare =
if case_sensitive then String.compare
else BatString.icompare
Expand Down Expand Up @@ -386,12 +387,12 @@ struct
ignore_zero_plus (either
[ satisfy BatChar.is_whitespace >>= (fun _ -> return ());
comment ])

let to_symbol p =
p >>= fun r ->
whitespaces >>= fun _ ->
return (BatString.of_list r)

let lexeme p =
p >>= fun r ->
whitespaces >>= fun _ ->
Expand Down Expand Up @@ -452,7 +453,7 @@ struct
| c -> return c
) >>= fun c ->
BatCharParser.char '\'' >>= fun _ -> return c

let string_literal = label "String Literal"
(lexeme
(BatCharParser.char '"' >>>
Expand All @@ -468,8 +469,8 @@ struct
in content [] >>= fun c ->
(*Printf.eprintf "Sending full string %S\n" (String.of_list (List.rev c));*)
return (BatString.of_list (List.rev c))))


let integer =
label "OCaml-style integer" (
lexeme(maybe (BatCharParser.char '-') >>= fun sign ->
Expand Down Expand Up @@ -507,7 +508,7 @@ struct
| None -> absolute
| Some _ -> ~-. absolute)
))

let number =
( float >>= fun f -> return (`Float f))
<|>( integer >>= fun i -> return (`Integer i) )
Expand Down Expand Up @@ -541,5 +542,3 @@ struct
end

end


141 changes: 89 additions & 52 deletions src/batGenlex.mli
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,54 @@
roughly the lexical conventions of Caml, but is parameterized by the
set of keywords of your language.
This module extends Stdlib's
{{:http://caml.inria.fr/pub/docs/manual-ocaml/libref/Genlex.html}Genlex}
module, go there for documentation on the rest of the functions
and types.
Example: a lexer suitable for a desk calculator is obtained by
{[ let lexer = make_lexer ["+";"-";"*";"/";"let";"="; "("; ")"] ]}
The associated parser would be a function from [token stream]
to, for instance, [int], and would have rules such as:
{[
let parse_expr = parser
[< 'Int n >] -> n
| [< 'Kwd "("; n = parse_expr; 'Kwd ")" >] -> n
| [< n1 = parse_expr; n2 = parse_remainder n1 >] -> n2
and parse_remainder n1 = parser
[< 'Kwd "+"; n2 = parse_expr >] -> n1+n2
| ...
]}
@author Jacques Garrigue
@author David Teller
*)

open Genlex

(** The type of tokens. The lexical classes are: [Int] and [Float]
for integer and floating-point numbers; [String] for
string literals, enclosed in double quotes; [Char] for
character literals, enclosed in single quotes; [Ident] for
identifiers (either sequences of letters, digits, underscores
and quotes, or sequences of ``operator characters'' such as
[+], [*], etc); and [Kwd] for keywords (either identifiers or
single ``special characters'' such as [(], [}], etc). *)
type token = Genlex.token =
Kwd of string
| Ident of string
| Int of int
| Float of float
| String of string
| Char of char

val make_lexer : string list -> char Stream.t -> token Stream.t
(** Construct the lexer function. The first argument is the list of
keywords. An identifier [s] is returned as [Kwd s] if [s]
belongs to this list, and as [Ident s] otherwise.
A special character [s] is returned as [Kwd s] if [s]
belongs to this list, and cause a lexical error (exception
[Parse_error]) otherwise. Blanks and newlines are skipped.
Comments delimited by [(*] and [*)] are skipped as well,
and can be nested. *)

(* {6 Batteries extensions to genlex } *)
type lexer_error =
| IllegalCharacter of char
| NotReallyAChar
Expand Down Expand Up @@ -65,101 +102,101 @@ val string_of_token : token -> string

(**{6 Extending to other languages}*)
open BatCharParser

module Languages :
sig
module type Definition =
sig
val comment_delimiters : (string * string) option
val line_comment_start : string option
val nested_comments : bool
val ident_start : (char, char, position) BatParserCo.t
val ident_letter : (char, char, position) BatParserCo.t
val op_start : (char, char, position) BatParserCo.t
val op_letter : (char, char, position) BatParserCo.t
val reserved_names : string list
val case_sensitive : bool
module type Definition =
sig
val comment_delimiters : (string * string) option
val line_comment_start : string option
val nested_comments : bool
val ident_start : (char, char, position) BatParserCo.t
val ident_letter : (char, char, position) BatParserCo.t
val op_start : (char, char, position) BatParserCo.t
val op_letter : (char, char, position) BatParserCo.t
val reserved_names : string list
val case_sensitive : bool
(**[true] if the language is case-sensitive, [false] otherwise.
If the language is not case-sensitive, every identifier is returned
as lower-case.*)
end
end

module Library :
sig
module OCaml : Definition
module C : Definition
end
module Library :
sig
module OCaml : Definition
module C : Definition
end

module Make(M:Definition) :
sig
(**Create a lexer from a language definition*)
module Make(M:Definition) :
sig
(**Create a lexer from a language definition*)

(** {6 High-level API} *)
(** {6 High-level API} *)

(** Drop comments, present reserved operators and reserved
(** Drop comments, present reserved operators and reserved
names as [Kwd], operators and identifiers as [Ident],
integer numbers as [Int], floating-point numbers as
[Float] and characters as [Char].
If the language is not [case_sensitive], identifiers and
keywords are returned in lower-case.
*)
val feed : (char, position) BatParserCo.Source.t -> (token, position) BatParserCo.Source.t
*)
val feed : (char, position) BatParserCo.Source.t -> (token, position) BatParserCo.Source.t


(** {6 Medium-level API} *)
val start : (char, unit, position) BatParserCo.t
(** {6 Medium-level API} *)
val start : (char, unit, position) BatParserCo.t
(**Remove any leading whitespaces*)


val ident : (char, string, position) BatParserCo.t
val ident : (char, string, position) BatParserCo.t
(**Accepts any non-reserved identifier/operator.
If the language is not [case_sensitive], the identifier
is returned in lower-case.*)

val kwd : (char, string, position) BatParserCo.t
val kwd : (char, string, position) BatParserCo.t
(**Accepts any identifier.
If the language is not [case_sensitive], the identifier
is returned in lower-case.*)

val identifier : string -> (char, unit, position) BatParserCo.t
val keyword : string -> (char, unit, position) BatParserCo.t
val identifier : string -> (char, unit, position) BatParserCo.t
val keyword : string -> (char, unit, position) BatParserCo.t

val char_literal : (char, char, position) BatParserCo.t
val char_literal : (char, char, position) BatParserCo.t
(**Accepts a character literal, i.e. one character
(or an escape) between two quotes.*)

val string_literal :(char, string, position) BatParserCo.t
val string_literal :(char, string, position) BatParserCo.t
(**Accepts a string, i.e. one sequence of
characters or escapes between two double
quotes, on one line.*)

val integer: (char, int , position) BatParserCo.t
val integer: (char, int , position) BatParserCo.t
(**Parse an integer.*)

val float: (char, float , position) BatParserCo.t
val float: (char, float , position) BatParserCo.t
(**Parse a floating-point number.*)

val number: (char, [`Float of float | `Integer of int] , position) BatParserCo.t
val number: (char, [`Float of float | `Integer of int] , position) BatParserCo.t
(**Parse either an integer or a floating-point number.*)

(** {6 Low-level API} *)
val char : char -> (char, char , position) BatParserCo.t
(** {6 Low-level API} *)
val char : char -> (char, char , position) BatParserCo.t
(** As {!CharParser.char}, but case-insensitive if specified
by {!case_sensitive}. *)

val string : string -> (char, string, position) BatParserCo.t
val string : string -> (char, string, position) BatParserCo.t
(** As {!CharParser.string}, but case-insensitive if specified
by {!case_sensitive}. *)

val line_comment : (char, unit , position) BatParserCo.t
val multiline_comment : (char, unit , position) BatParserCo.t
val comment : (char, unit , position) BatParserCo.t
val whitespaces : (char, unit , position) BatParserCo.t
(* val lexeme : (char, 'a , position) BatParserCo.t -> (char, 'a , position) BatParserCo.t*)
(**Apply this filter to your own parsers if you want them
val line_comment : (char, unit , position) BatParserCo.t
val multiline_comment : (char, unit , position) BatParserCo.t
val comment : (char, unit , position) BatParserCo.t
val whitespaces : (char, unit , position) BatParserCo.t
(* val lexeme : (char, 'a , position) BatParserCo.t -> (char, 'a , position) BatParserCo.t*)
(**Apply this filter to your own parsers if you want them
to ignore following comments.*)

end
end

end

2 changes: 1 addition & 1 deletion src/batteries.ml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ module Digest = BatDigest
(* Filename *)
module Format = BatFormat
module Gc = BatGc
module Genlex = struct include Genlex include BatGenlex end
module Genlex = BatGenlex
module Hashtbl = BatHashtbl
module Int32 = BatInt32
module Int64 = BatInt64
Expand Down

0 comments on commit aa007e1

Please sign in to comment.