Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ Unreleased
----------
* Add `Re.split_delim` (#233)
* Fix handling of empty matches in splitting and substitution functions (#233)
* Add support for character classes in `Re.Posix` (#263)

1.11.0 (19-Aug-2023)
--------------------
Expand Down
54 changes: 11 additions & 43 deletions lib/perl.ml
Original file line number Diff line number Diff line change
Expand Up @@ -25,38 +25,13 @@ module Re = Core
exception Parse_error = Parse_buffer.Parse_error
exception Not_supported

let posix_class_of_string = function
| "alpha" -> Re.alpha
| "alnum" -> Re.alnum
| "ascii" -> Re.ascii
| "blank" -> Re.blank
| "cntrl" -> Re.cntrl
| "digit" -> Re.digit
| "lower" -> Re.lower
| "print" -> Re.print
| "space" -> Re.space
| "upper" -> Re.upper
| "word" -> Re.wordc
| "punct" -> Re.punct
| "graph" -> Re.graph
| "xdigit" -> Re.xdigit
| class_ -> invalid_arg ("Invalid pcre class: " ^ class_)

let posix_class_strings =
[ "alpha" ; "alnum" ; "ascii"
; "blank" ; "cntrl" ; "digit"
; "lower" ; "print" ; "space"
; "upper" ; "word" ; "punct"
; "graph" ; "xdigit" ]

let parse multiline dollar_endonly dotall ungreedy s =
let buf = Parse_buffer.create s in
let accept = Parse_buffer.accept buf in
let eos () = Parse_buffer.eos buf in
let test c = Parse_buffer.test buf c in
let unget () = Parse_buffer.unget buf in
let get () = Parse_buffer.get buf in
let accept_s = Parse_buffer.accept_s buf in
let greedy_mod r =
let gr = accept '?' in
let gr = if ungreedy then not gr else gr in
Expand Down Expand Up @@ -222,24 +197,17 @@ let parse multiline dollar_endonly dotall ungreedy s =
let c = get () in
if c = '[' then begin
if accept '=' then raise Not_supported;
if accept ':' then
let compl = accept '^' in
let cls =
try List.find accept_s posix_class_strings
with Not_found -> raise Parse_error in
if not (accept_s ":]") then raise Parse_error;
let re =
let posix_class = posix_class_of_string cls in
if compl then Re.compl [posix_class] else posix_class in
`Set (re)
else if accept '.' then begin
if eos () then raise Parse_error;
let c = get () in
if not (accept '.') then raise Not_supported;
if not (accept ']') then raise Parse_error;
`Char c
end else
`Char c
match Posix_class.parse buf with
| Some set -> `Set set
| None ->
if accept '.' then begin
if eos () then raise Parse_error;
let c = get () in
if not (accept '.') then raise Not_supported;
if not (accept ']') then raise Parse_error;
`Char c
end else
`Char c
end else if c = '\\' then begin
if eos () then raise Parse_error;
let c = get () in
Expand Down
45 changes: 25 additions & 20 deletions lib/posix.ml
Original file line number Diff line number Diff line change
Expand Up @@ -97,32 +97,37 @@ let parse newline s =
end
and bracket s =
if s <> [] && accept ']' then s else begin
let c = char () in
if accept '-' then begin
if accept ']' then Re.char c :: Re.char '-' :: s else begin
let c' = char () in
bracket (Re.rg c c' :: s)
end
end else
bracket (Re.char c :: s)
match char () with
| `Char c ->
if accept '-' then begin
if accept ']' then Re.char c :: Re.char '-' :: s else begin
match char () with
`Char c' ->
bracket (Re.rg c c' :: s)
| `Set st' ->
bracket (Re.char c :: Re.char '-' :: st' :: s)
end
end else
bracket (Re.char c :: s)
| `Set st -> bracket (st :: s)
end
and char () =
if eos () then raise Parse_error;
let c = get () in
if c = '[' then begin
if accept '=' then raise Not_supported
else if accept ':' then begin
raise Not_supported (*XXX*)
end else if accept '.' then begin
if eos () then raise Parse_error;
let c = get () in
if not (accept '.') then raise Not_supported;
if not (accept ']') then raise Parse_error;
c
end else
c
match Posix_class.parse buf with
| Some set -> `Set set
| None ->
if accept '.' then begin
if eos () then raise Parse_error;
let c = get () in
if not (accept '.') then raise Not_supported;
if not (accept ']') then raise Parse_error;
`Char c
end else
`Char c
end else
c
`Char c
in
let res = regexp () in
if not (eos ()) then raise Parse_error;
Expand Down
40 changes: 40 additions & 0 deletions lib/posix_class.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
module Re = Core

let of_name = function
| "alpha" -> Re.alpha
| "alnum" -> Re.alnum
| "ascii" -> Re.ascii
| "blank" -> Re.blank
| "cntrl" -> Re.cntrl
| "digit" -> Re.digit
| "lower" -> Re.lower
| "print" -> Re.print
| "space" -> Re.space
| "upper" -> Re.upper
| "word" -> Re.wordc
| "punct" -> Re.punct
| "graph" -> Re.graph
| "xdigit" -> Re.xdigit
| class_ -> invalid_arg ("Invalid pcre class: " ^ class_)

let names =
[ "alpha" ; "alnum" ; "ascii"
; "blank" ; "cntrl" ; "digit"
; "lower" ; "print" ; "space"
; "upper" ; "word" ; "punct"
; "graph" ; "xdigit" ]

let parse buf =
let accept = Parse_buffer.accept buf in
let accept_s = Parse_buffer.accept_s buf in
match accept ':' with
| false -> None
| true ->
let compl = accept '^' in
let cls =
try List.find accept_s names
with Not_found -> raise Parse_buffer.Parse_error
in
if not (accept_s ":]") then raise Parse_buffer.Parse_error;
let posix_class = of_name cls in
Some (if compl then Re.compl [posix_class] else posix_class)
3 changes: 3 additions & 0 deletions lib/posix_class.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
val names : string list
val of_name : string -> Core.t
val parse : Parse_buffer.t -> Core.t option
10 changes: 7 additions & 3 deletions lib_test/test_posix.ml
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
open OUnit

let execp = Re.execp

let test_class_space () =
match Re.Posix.compile_pat {|a[[:space:]]b|} with
| exception Re.Posix.Not_supported -> ()
| (_ : Re.re) -> assert false
let re = Re.Posix.compile_pat {|a[[:space:]]b|} in
let exec = Re.execp re in
assert_bool "matches with space" (exec "a b");
assert_bool "does not match without a space" (not (exec "ab"));
assert_bool "does not match with a different char" (not (exec "a_b"))

let suite = "posix" >:::
[ "regression 213" >:: test_class_space
Expand Down