Skip to content

Commit

Permalink
Add ANSI escape sequence parser for coloured logs
Browse files Browse the repository at this point in the history
Also, we now stream saved logs too.

Signed-off-by: Thomas Leonard <thomas.leonard@docker.com>
  • Loading branch information
Thomas Leonard committed Jan 31, 2017
1 parent f12f08b commit 4514ec9
Show file tree
Hide file tree
Showing 10 changed files with 421 additions and 11 deletions.
26 changes: 26 additions & 0 deletions ci/src/cI_char_stream.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
type t = string * int
type span = string * int * int

let of_string s = (s, 0)
let to_string (s, i) = String.sub s i (String.length s - i)

let skip (s, a) = (s, a + 1)
let skip_all (s, _) = (s, String.length s)

let string_of_span (s, a, b) = String.sub s a (b - a)

let (--) (s, a) (_, b) = assert (b >= a); (s, a, b)

let find (base, off) c =
try Some (base, String.index_from base off c)
with Not_found -> None

let avail (base, off) = String.length base - off

let is_empty (base, off) = String.length base = off

let next (base, off) =
if String.length base = off then None
else Some (base.[off], (base, off + 1))

let equal (a:t) (b:t) = a = b
40 changes: 40 additions & 0 deletions ci/src/cI_char_stream.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
type t
(** A base string with an index into it, representing the rest of the string from that point. *)

val of_string : string -> t
(** [of_string s] is a cursor at the start of [s]. *)

val to_string : t -> string
(** [to_string t] is the substring from [t] to the end of the input. *)

val skip : t -> t
(** [skip t] is the stream without its first character. [t] must be non-empty. *)

val skip_all : t -> t
(** [skip_all t] is the empty stream at the end of [t]. *)

val find : t -> char -> t option
(** [find t c] is a stream from the first occurance of [c] in [t], if any. *)

val avail : t -> int
(** [avail t] is the number of remaining characters in the stream. *)

val is_empty : t -> bool
(** [is_empty t] is [avail t = 0]. *)

val next : t -> (char * t) option
(** [next t] is [Some (c, t2)], where [c] is the next character in the stream and [t2] is [skip t],
or [None] if [is_empty t]. *)

val equal : t -> t -> bool
(** [equal a b] is [true] iff the streams [a] and [b] are at the same offset in the same base string. *)

type span = string * int * int
(** [(s, a, b)] represents the span of [s] from index [a] up to but excluding [b]. *)

val (--) : t -> t -> span
(** [a -- b] is the span from [a] (inclusive) to [b] (exclusive).
[a] must not have a higher offset than [b]. *)

val string_of_span : span -> string
(** [string_of_span (s, a, b)] is the sub-string of [s] from [a] to [b]. *)
134 changes: 134 additions & 0 deletions ci/src/cI_escape_parser.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
module Stream = CI_char_stream

type colour =
[ `Black
| `Blue
| `Cyan
| `Green
| `Magenta
| `Red
| `White
| `Yellow ]

type sgr =
[ `BgCol of [`Default | colour]
| `Bold
| `FgCol of [`Default | colour]
| `Italic
| `NoBold
| `NoItalic
| `NoReverse
| `NoUnderline
| `Reset
| `Reverse
| `Underline ]

type escape =
[ `Reset
| `Ctrl of
[ `SelectGraphicRendition of sgr list]
]

let is_param_byte c =
let c = Char.code c in
c land 0xf0 = 0x30

let is_im_byte c =
let c = Char.code c in
c land 0xf0 = 0x40

let is_final_byte c =
let c = Char.code c in
c >= 0x40 && c <= 0x7e

exception Unknown_escape

let intp ~default = function
| [] -> default
| [p] ->
begin match Astring.String.to_int p with
| None -> raise Unknown_escape
| Some x -> x
end
| _ -> raise Unknown_escape

let colour = function
| 0 -> `Black
| 1 -> `Red
| 2 -> `Green
| 3 -> `Yellow
| 4 -> `Blue
| 5 -> `Magenta
| 6 -> `Cyan
| 7 -> `White
| _ -> raise Unknown_escape

let sgr = function
| 0 -> `Reset
| 1 -> `Bold
| 3 -> `Italic
| 4 -> `Underline
| 7 -> `Reverse
| 22 -> `NoBold
| 23 -> `NoItalic
| 24 -> `NoUnderline
| 27 -> `NoReverse
| x when x >= 30 && x <= 37 -> `FgCol (colour (x - 30))
| 39 -> `FgCol `Default
| x when x >= 40 && x <= 47 -> `BgCol (colour (x - 40))
| 49 -> `BgCol `Default
| _ -> raise Unknown_escape

let parse_ctrl ~params = function
| "m" ->
begin match List.map int_of_string params with
| exception _ -> raise Unknown_escape
| [] -> `SelectGraphicRendition [`Reset]
| params -> `SelectGraphicRendition (List.map sgr params)
end
| _ -> raise Unknown_escape

let read_intermediates ~params start =
let rec aux s =
match Stream.next s with
| None -> `Incomplete (* No final byte *)
| Some (x, s) when is_im_byte x -> aux s
| Some (x, s2) when is_final_byte x ->
let func = Stream.(start -- s2 |> string_of_span) in
let params = Astring.String.cuts ~sep:";" params in
begin
try `Escape (`Ctrl (parse_ctrl ~params func), s2)
with Unknown_escape -> `Invalid s2
end
| Some _ -> `Invalid s
in
aux start

let read_params start =
let rec aux s =
match Stream.next s with
| None -> `Incomplete (* No final byte *)
| Some (x, s) when is_param_byte x -> aux s
| Some _ ->
let params = Stream.(start -- s |> string_of_span) in
read_intermediates ~params s
in
aux start

(* Parse [esc], an escape sequence. *)
let parse_escape esc =
match Stream.(next (Stream.skip esc)) with
| Some ('[', s) -> read_params s (* [esc] is a control sequence *)
| Some (']', s) -> `Invalid s (* [esc] is a operating system command sequence (todo) *)
| Some ('c', s) -> `Escape (`Reset, s)
| Some (_, s) -> `Invalid s (* TODO: other types of escape *)
| None -> `Incomplete

let parse input =
(* In theory, we could also get the 8-bit escape character encoded as two
UTF-8 bytes, but for now we just process the "<ESC>[" sequence, which
seems to be what everyone is using. *)
match Stream.find input '\x1b' with
| None -> `Literal (Stream.skip_all input)
| Some i when Stream.equal input i -> parse_escape input
| Some i -> `Literal i
39 changes: 39 additions & 0 deletions ci/src/cI_escape_parser.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
type colour =
[ `Black
| `Blue
| `Cyan
| `Green
| `Magenta
| `Red
| `White
| `Yellow ]

type sgr =
[ `BgCol of [`Default | colour]
| `Bold
| `FgCol of [`Default | colour]
| `Italic
| `NoBold
| `NoItalic
| `NoReverse
| `NoUnderline
| `Reset
| `Reverse
| `Underline ]

type escape =
[ `Reset
| `Ctrl of [ `SelectGraphicRendition of sgr list] ]

val parse : CI_char_stream.t ->
[ `Literal of CI_char_stream.t
| `Escape of escape * CI_char_stream.t
| `Invalid of CI_char_stream.t
| `Incomplete ]
(** [parse stream] returns the first token in [stream] and the stream directly after it,
or [`Incomplete] if more data is required to parse the first token.
[`Literal s2] indicates that everything between [stream] and [s2] should be output as literal text.
[`Escape (e, s2)] indicates that the first token was escape sequence [e].
[`Invalid s2] indicates that the first token was malformed or not understood and processing should continue
from [s2].
*)
Loading

0 comments on commit 4514ec9

Please sign in to comment.