Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ANSI escape sequence parser for coloured logs #466

Merged
merged 1 commit into from
Feb 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions ci/src/cI_char_stream.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
type t = string * int
type span = string * int * int

let of_string s = (s, 0)
let to_string (s, i) = String.sub s i (String.length s - i)

let skip (s, a) = (s, a + 1)
let skip_all (s, _) = (s, String.length s)

let string_of_span (s, a, b) = String.sub s a (b - a)

let (--) (s, a) (_, b) = assert (b >= a); (s, a, b)

let find (base, off) c =
try Some (base, String.index_from base off c)
with Not_found -> None

let avail (base, off) = String.length base - off

let is_empty (base, off) = String.length base = off

let next (base, off) =
if String.length base = off then None
else Some (base.[off], (base, off + 1))

let equal (a:t) (b:t) = a = b
40 changes: 40 additions & 0 deletions ci/src/cI_char_stream.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
type t
(** A base string with an index into it, representing the rest of the string from that point. *)

val of_string : string -> t
(** [of_string s] is a cursor at the start of [s]. *)

val to_string : t -> string
(** [to_string t] is the substring from [t] to the end of the input. *)

val skip : t -> t
(** [skip t] is the stream without its first character. [t] must be non-empty. *)

val skip_all : t -> t
(** [skip_all t] is the empty stream at the end of [t]. *)

val find : t -> char -> t option
(** [find t c] is a stream from the first occurance of [c] in [t], if any. *)

val avail : t -> int
(** [avail t] is the number of remaining characters in the stream. *)

val is_empty : t -> bool
(** [is_empty t] is [avail t = 0]. *)

val next : t -> (char * t) option
(** [next t] is [Some (c, t2)], where [c] is the next character in the stream and [t2] is [skip t],
or [None] if [is_empty t]. *)

val equal : t -> t -> bool
(** [equal a b] is [true] iff the streams [a] and [b] are at the same offset in the same base string. *)

type span = string * int * int
(** [(s, a, b)] represents the span of [s] from index [a] up to but excluding [b]. *)

val (--) : t -> t -> span
(** [a -- b] is the span from [a] (inclusive) to [b] (exclusive).
[a] must not have a higher offset than [b]. *)

val string_of_span : span -> string
(** [string_of_span (s, a, b)] is the sub-string of [s] from [a] to [b]. *)
125 changes: 125 additions & 0 deletions ci/src/cI_escape_parser.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
module Stream = CI_char_stream

type colour =
[ `Black
| `Blue
| `Cyan
| `Green
| `Magenta
| `Red
| `White
| `Yellow ]

type sgr =
[ `BgCol of [`Default | colour]
| `Bold
| `FgCol of [`Default | colour]
| `Italic
| `NoBold
| `NoItalic
| `NoReverse
| `NoUnderline
| `Reset
| `Reverse
| `Underline ]

type escape =
[ `Reset
| `Ctrl of
[ `SelectGraphicRendition of sgr list]
]

let is_param_byte c =
let c = Char.code c in
c land 0xf0 = 0x30

let is_im_byte c =
let c = Char.code c in
c land 0xf0 = 0x40

let is_final_byte c =
let c = Char.code c in
c >= 0x40 && c <= 0x7e

exception Unknown_escape

let colour = function
| 0 -> `Black
| 1 -> `Red
| 2 -> `Green
| 3 -> `Yellow
| 4 -> `Blue
| 5 -> `Magenta
| 6 -> `Cyan
| 7 -> `White
| _ -> raise Unknown_escape

let sgr = function
| "" -> `Reset
| x ->
match int_of_string x with
| exception _ -> raise Unknown_escape
| 0 -> `Reset
| 1 -> `Bold
| 3 -> `Italic
| 4 -> `Underline
| 7 -> `Reverse
| 22 -> `NoBold
| 23 -> `NoItalic
| 24 -> `NoUnderline
| 27 -> `NoReverse
| x when x >= 30 && x <= 37 -> `FgCol (colour (x - 30))
| 39 -> `FgCol `Default
| x when x >= 40 && x <= 47 -> `BgCol (colour (x - 40))
| 49 -> `BgCol `Default
| _ -> raise Unknown_escape


let parse_ctrl ~params = function
| "m" -> `SelectGraphicRendition (List.map sgr params)
| _ -> raise Unknown_escape

let read_intermediates ~params start =
let rec aux s =
match Stream.next s with
| None -> `Incomplete (* No final byte *)
| Some (x, s) when is_im_byte x -> aux s
| Some (x, s2) when is_final_byte x ->
let func = Stream.(start -- s2 |> string_of_span) in
let params = Astring.String.cuts ~sep:";" params in
begin
try `Escape (`Ctrl (parse_ctrl ~params func), s2)
with Unknown_escape -> `Invalid s2
end
| Some _ -> `Invalid s
in
aux start

let read_params start =
let rec aux s =
match Stream.next s with
| None -> `Incomplete (* No final byte *)
| Some (x, s) when is_param_byte x -> aux s
| Some _ ->
let params = Stream.(start -- s |> string_of_span) in
read_intermediates ~params s
in
aux start

(* Parse [esc], an escape sequence. *)
let parse_escape esc =
match Stream.(next (Stream.skip esc)) with
| Some ('[', s) -> read_params s (* [esc] is a control sequence *)
| Some (']', s) -> `Invalid s (* [esc] is a operating system command sequence (todo) *)
| Some ('c', s) -> `Escape (`Reset, s)
| Some (_, s) -> `Invalid s (* TODO: other types of escape *)
| None -> `Incomplete

let parse input =
(* In theory, we could also get the 8-bit escape character encoded as two
UTF-8 bytes, but for now we just process the "<ESC>[" sequence, which
seems to be what everyone is using. *)
match Stream.find input '\x1b' with
| None -> `Literal (Stream.skip_all input)
| Some i when Stream.equal input i -> parse_escape input
| Some i -> `Literal i
39 changes: 39 additions & 0 deletions ci/src/cI_escape_parser.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
type colour =
[ `Black
| `Blue
| `Cyan
| `Green
| `Magenta
| `Red
| `White
| `Yellow ]

type sgr =
[ `BgCol of [`Default | colour]
| `Bold
| `FgCol of [`Default | colour]
| `Italic
| `NoBold
| `NoItalic
| `NoReverse
| `NoUnderline
| `Reset
| `Reverse
| `Underline ]

type escape =
[ `Reset
| `Ctrl of [ `SelectGraphicRendition of sgr list] ]

val parse : CI_char_stream.t ->
[ `Literal of CI_char_stream.t
| `Escape of escape * CI_char_stream.t
| `Invalid of CI_char_stream.t
| `Incomplete ]
(** [parse stream] returns the first token in [stream] and the stream directly after it,
or [`Incomplete] if more data is required to parse the first token.
[`Literal s2] indicates that everything between [stream] and [s2] should be output as literal text.
[`Escape (e, s2)] indicates that the first token was escape sequence [e].
[`Invalid s2] indicates that the first token was malformed or not understood and processing should continue
from [s2].
*)
Loading