Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
feat: add Seq-based iterators
close #158
  • Loading branch information
c-cube committed Jul 21, 2018
1 parent 159c491 commit 9791b31
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 56 deletions.
130 changes: 74 additions & 56 deletions lib/core.ml
Expand Up @@ -1059,8 +1059,9 @@ module Mark = struct
end

type 'a gen = unit -> 'a option
type 'a seq = 'a Seq.t

let all_gen ?(pos=0) ?len re s =
let all_seq ?(pos=0) ?len re s : _ seq =
if pos < 0 then invalid_arg "Re.all";
(* index of the first position we do not consider.
!pos < limit is an invariant *)
Expand All @@ -1072,27 +1073,36 @@ let all_gen ?(pos=0) ?len re s =
in
(* iterate on matches. When a match is found, search for the next
one just after its end *)
let pos = ref pos in
fun () ->
if !pos >= limit
then None (* no more matches *)
let rec aux pos () =
if pos >= limit
then Seq.Nil (* no more matches *)
else
match match_str ~groups:true ~partial:false re s
~pos:!pos ~len:(limit - !pos) with
~pos ~len:(limit - pos) with
| Match substr ->
let p1, p2 = Group.offset substr 0 in
pos := if p1=p2 then p2+1 else p2;
Some substr
let pos = if p1=p2 then p2+1 else p2 in
Seq.Cons (substr, aux pos)
| Running
| Failed -> None
| Failed -> Seq.Nil
in
aux pos

let all_gen ?pos ?len re s =
let l = ref (all_seq ?pos ?len re s) in
fun () ->
match !l () with
| Seq.Nil -> None
| Seq.Cons (x, tl) ->
l := tl;
Some x

let all ?pos ?len re s =
let l = ref [] in
let g = all_gen ?pos ?len re s in
let rec iter () = match g() with
| None -> List.rev !l
| Some sub -> l := sub :: !l; iter ()
in iter ()
Seq.fold_left (fun l x -> x :: l) [] (all_seq ?pos ?len re s) |> List.rev

let matches_seq ?pos ?len re s : _ seq =
all_seq ?pos ?len re s
|> Seq.map (fun sub -> Group.get sub 0)

let matches_gen ?pos ?len re s =
let g = all_gen ?pos ?len re s in
Expand All @@ -1114,7 +1124,7 @@ type split_token =
| `Delim of groups
]

let split_full_gen ?(pos=0) ?len re s =
let split_full_seq ?(pos=0) ?len re s : _ seq =
if pos < 0 then invalid_arg "Re.split";
let limit = match len with
| None -> String.length s
Expand All @@ -1126,53 +1136,62 @@ let split_full_gen ?(pos=0) ?len re s =
pos: first position after last match of [re]
limit: first index we ignore (!pos < limit is an invariant) *)
let pos0 = pos in
let state = ref `Idle in
let i = ref pos and pos = ref pos in
let next () = match !state with
| `Idle when !pos >= limit ->
if !i < limit then (
let sub = String.sub s !i (limit - !i) in
incr i;
Some (`Text sub)
) else None
let rec aux state i pos () = match state with
| `Idle when pos >= limit ->
if i < limit then (
let sub = String.sub s i (limit - i) in
Seq.Cons (`Text sub, aux state (i+1) pos)
) else Seq.Nil
| `Idle ->
begin match match_str ~groups:true ~partial:false re s ~pos:!pos
~len:(limit - !pos) with
begin match match_str ~groups:true ~partial:false re s ~pos
~len:(limit - pos) with
| Match substr ->
let p1, p2 = Group.offset substr 0 in
pos := if p1=p2 then p2+1 else p2;
let old_i = !i in
i := p2;
let pos = if p1=p2 then p2+1 else p2 in
let old_i = i in
let i = p2 in
if p1 > pos0 then (
(* string does not start by a delimiter *)
let text = String.sub s old_i (p1 - old_i) in
state := `Yield (`Delim substr);
Some (`Text text)
) else Some (`Delim substr)
| Running -> None
let state = `Yield (`Delim substr) in
Seq.Cons (`Text text, aux state i pos)
) else Seq.Cons (`Delim substr, aux state i pos)
| Running -> Seq.Nil
| Failed ->
if !i < limit
if i < limit
then (
let text = String.sub s !i (limit - !i) in
i := limit;
Some (`Text text) (* yield last string *)
let text = String.sub s i (limit - i) in
(* yield last string *)
Seq.Cons (`Text text, aux state limit pos)
) else
None
Seq.Nil
end
| `Yield x ->
state := `Idle;
Seq.Cons (x, aux `Idle i pos)
in
aux `Idle pos pos

let split_full_gen ?pos ?len re s : _ gen =
let seq = ref (split_full_seq ?pos ?len re s) in
fun () -> match !seq () with
| Seq.Nil -> None
| Seq.Cons (x, tl) ->
seq := tl;
Some x
in next

let split_full ?pos ?len re s =
let l = ref [] in
let g = split_full_gen ?pos ?len re s in
let rec iter () = match g() with
| None -> List.rev !l
| Some s -> l := s :: !l; iter ()
in iter ()

let split_gen ?pos ?len re s =
Seq.fold_left (fun l x -> x :: l) [] (split_full_seq ?pos ?len re s)
|> List.rev

let split_seq ?pos ?len re s : _ seq =
let seq = split_full_seq ?pos ?len re s in
let rec filter seq () = match seq () with
| Seq.Nil -> Seq.Nil
| Seq.Cons (`Delim _, tl) -> filter tl ()
| Seq.Cons (`Text s,tl) -> Seq.Cons (s, filter tl)
in filter seq

let split_gen ?pos ?len re s : _ gen =
let g = split_full_gen ?pos ?len re s in
let rec next() = match g() with
| None -> None
Expand All @@ -1181,13 +1200,12 @@ let split_gen ?pos ?len re s =
in next

let split ?pos ?len re s =
let l = ref [] in
let g = split_full_gen ?pos ?len re s in
let rec iter () = match g() with
| None -> List.rev !l
| Some (`Delim _) -> iter()
| Some (`Text s) -> l := s :: !l; iter ()
in iter ()
Seq.fold_left
(fun l x -> match x with
| `Delim _ -> l
| `Text s -> s :: l)
[] (split_full_seq ?pos ?len re s)
|> List.rev

let replace ?(pos=0) ?len ?(all=true) re ~f s =
if pos < 0 then invalid_arg "Re.replace";
Expand Down
27 changes: 27 additions & 0 deletions lib/core.mli
Expand Up @@ -125,6 +125,7 @@ end
(** {2 High Level Operations} *)

type 'a gen = unit -> 'a option
type 'a seq = 'a Seq.t

val all :
?pos:int -> (** Default: 0 *)
Expand All @@ -139,6 +140,13 @@ val all_gen :
re -> string -> Group.t gen
(** Same as {!all} but returns a generator *)

val all_seq :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> Group.t seq
(** Same as {!all} but returns an iterator
@since NEXT_RELEASE *)

val matches :
?pos:int -> (** Default: 0 *)
?len:int ->
Expand All @@ -153,6 +161,13 @@ val matches_gen :
re -> string -> string gen
(** Same as {!matches}, but returns a generator. *)

val matches_seq :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> string seq
(** Same as {!matches}, but returns an iterator
@since NEXT_RELEASE *)

val split :
?pos:int -> (** Default: 0 *)
?len:int ->
Expand All @@ -166,6 +181,12 @@ val split_gen :
?len:int ->
re -> string -> string gen

val split_seq :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> string seq
(** @since NEXT_RELEASE *)

type split_token =
[ `Text of string (** Text between delimiters *)
| `Delim of Group.t (** Delimiter *)
Expand All @@ -181,6 +202,12 @@ val split_full_gen :
?len:int ->
re -> string -> split_token gen

val split_full_seq :
?pos:int -> (** Default: 0 *)
?len:int ->
re -> string -> split_token seq
(** @since NEXT_RELEASE *)

val replace :
?pos:int -> (** Default: 0 *)
?len:int ->
Expand Down
1 change: 1 addition & 0 deletions lib/jbuild
Expand Up @@ -3,4 +3,5 @@
(library
((name re)
(synopsis "Pure OCaml regular expression library")
(libraries (seq))
(public_name re)))
1 change: 1 addition & 0 deletions re.opam
Expand Up @@ -22,6 +22,7 @@ build-test: [["jbuilder" "runtest" "-p" name "-j" jobs]]
depends: [
"jbuilder" {build & >= "1.0+beta10"}
"ounit" {test}
"seq"
]

available: [ocaml-version >= "4.02.3"]

0 comments on commit 9791b31

Please sign in to comment.