Skip to content

Commit

Permalink
Merge pull request #466 from talex5/colour-logs
Browse files Browse the repository at this point in the history
Add ANSI escape sequence parser for coloured logs
  • Loading branch information
talex5 committed Feb 1, 2017
2 parents f12f08b + 38aae50 commit dc64751
Show file tree
Hide file tree
Showing 10 changed files with 413 additions and 11 deletions.
26 changes: 26 additions & 0 deletions ci/src/cI_char_stream.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
type t = string * int
type span = string * int * int

let of_string s = (s, 0)
let to_string (s, i) = String.sub s i (String.length s - i)

let skip (s, a) = (s, a + 1)
let skip_all (s, _) = (s, String.length s)

let string_of_span (s, a, b) = String.sub s a (b - a)

let (--) (s, a) (_, b) = assert (b >= a); (s, a, b)

let find (base, off) c =
try Some (base, String.index_from base off c)
with Not_found -> None

let avail (base, off) = String.length base - off

let is_empty (base, off) = String.length base = off

let next (base, off) =
if String.length base = off then None
else Some (base.[off], (base, off + 1))

let equal (a:t) (b:t) = a = b
40 changes: 40 additions & 0 deletions ci/src/cI_char_stream.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
type t
(** A base string with an index into it, representing the rest of the string from that point. *)

val of_string : string -> t
(** [of_string s] is a cursor at the start of [s]. *)

val to_string : t -> string
(** [to_string t] is the substring from [t] to the end of the input. *)

val skip : t -> t
(** [skip t] is the stream without its first character. [t] must be non-empty. *)

val skip_all : t -> t
(** [skip_all t] is the empty stream at the end of [t]. *)

val find : t -> char -> t option
(** [find t c] is a stream from the first occurance of [c] in [t], if any. *)

val avail : t -> int
(** [avail t] is the number of remaining characters in the stream. *)

val is_empty : t -> bool
(** [is_empty t] is [avail t = 0]. *)

val next : t -> (char * t) option
(** [next t] is [Some (c, t2)], where [c] is the next character in the stream and [t2] is [skip t],
or [None] if [is_empty t]. *)

val equal : t -> t -> bool
(** [equal a b] is [true] iff the streams [a] and [b] are at the same offset in the same base string. *)

type span = string * int * int
(** [(s, a, b)] represents the span of [s] from index [a] up to but excluding [b]. *)

val (--) : t -> t -> span
(** [a -- b] is the span from [a] (inclusive) to [b] (exclusive).
[a] must not have a higher offset than [b]. *)

val string_of_span : span -> string
(** [string_of_span (s, a, b)] is the sub-string of [s] from [a] to [b]. *)
125 changes: 125 additions & 0 deletions ci/src/cI_escape_parser.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
module Stream = CI_char_stream

type colour =
[ `Black
| `Blue
| `Cyan
| `Green
| `Magenta
| `Red
| `White
| `Yellow ]

type sgr =
[ `BgCol of [`Default | colour]
| `Bold
| `FgCol of [`Default | colour]
| `Italic
| `NoBold
| `NoItalic
| `NoReverse
| `NoUnderline
| `Reset
| `Reverse
| `Underline ]

type escape =
[ `Reset
| `Ctrl of
[ `SelectGraphicRendition of sgr list]
]

let is_param_byte c =
let c = Char.code c in
c land 0xf0 = 0x30

let is_im_byte c =
let c = Char.code c in
c land 0xf0 = 0x40

let is_final_byte c =
let c = Char.code c in
c >= 0x40 && c <= 0x7e

exception Unknown_escape

let colour = function
| 0 -> `Black
| 1 -> `Red
| 2 -> `Green
| 3 -> `Yellow
| 4 -> `Blue
| 5 -> `Magenta
| 6 -> `Cyan
| 7 -> `White
| _ -> raise Unknown_escape

let sgr = function
| "" -> `Reset
| x ->
match int_of_string x with
| exception _ -> raise Unknown_escape
| 0 -> `Reset
| 1 -> `Bold
| 3 -> `Italic
| 4 -> `Underline
| 7 -> `Reverse
| 22 -> `NoBold
| 23 -> `NoItalic
| 24 -> `NoUnderline
| 27 -> `NoReverse
| x when x >= 30 && x <= 37 -> `FgCol (colour (x - 30))
| 39 -> `FgCol `Default
| x when x >= 40 && x <= 47 -> `BgCol (colour (x - 40))
| 49 -> `BgCol `Default
| _ -> raise Unknown_escape


let parse_ctrl ~params = function
| "m" -> `SelectGraphicRendition (List.map sgr params)
| _ -> raise Unknown_escape

let read_intermediates ~params start =
let rec aux s =
match Stream.next s with
| None -> `Incomplete (* No final byte *)
| Some (x, s) when is_im_byte x -> aux s
| Some (x, s2) when is_final_byte x ->
let func = Stream.(start -- s2 |> string_of_span) in
let params = Astring.String.cuts ~sep:";" params in
begin
try `Escape (`Ctrl (parse_ctrl ~params func), s2)
with Unknown_escape -> `Invalid s2
end
| Some _ -> `Invalid s
in
aux start

let read_params start =
let rec aux s =
match Stream.next s with
| None -> `Incomplete (* No final byte *)
| Some (x, s) when is_param_byte x -> aux s
| Some _ ->
let params = Stream.(start -- s |> string_of_span) in
read_intermediates ~params s
in
aux start

(* Parse [esc], an escape sequence. *)
let parse_escape esc =
match Stream.(next (Stream.skip esc)) with
| Some ('[', s) -> read_params s (* [esc] is a control sequence *)
| Some (']', s) -> `Invalid s (* [esc] is a operating system command sequence (todo) *)
| Some ('c', s) -> `Escape (`Reset, s)
| Some (_, s) -> `Invalid s (* TODO: other types of escape *)
| None -> `Incomplete

let parse input =
(* In theory, we could also get the 8-bit escape character encoded as two
UTF-8 bytes, but for now we just process the "<ESC>[" sequence, which
seems to be what everyone is using. *)
match Stream.find input '\x1b' with
| None -> `Literal (Stream.skip_all input)
| Some i when Stream.equal input i -> parse_escape input
| Some i -> `Literal i
39 changes: 39 additions & 0 deletions ci/src/cI_escape_parser.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
type colour =
[ `Black
| `Blue
| `Cyan
| `Green
| `Magenta
| `Red
| `White
| `Yellow ]

type sgr =
[ `BgCol of [`Default | colour]
| `Bold
| `FgCol of [`Default | colour]
| `Italic
| `NoBold
| `NoItalic
| `NoReverse
| `NoUnderline
| `Reset
| `Reverse
| `Underline ]

type escape =
[ `Reset
| `Ctrl of [ `SelectGraphicRendition of sgr list] ]

val parse : CI_char_stream.t ->
[ `Literal of CI_char_stream.t
| `Escape of escape * CI_char_stream.t
| `Invalid of CI_char_stream.t
| `Incomplete ]
(** [parse stream] returns the first token in [stream] and the stream directly after it,
or [`Incomplete] if more data is required to parse the first token.
[`Literal s2] indicates that everything between [stream] and [s2] should be output as literal text.
[`Escape (e, s2)] indicates that the first token was escape sequence [e].
[`Invalid s2] indicates that the first token was malformed or not understood and processing should continue
from [s2].
*)
Loading

0 comments on commit dc64751

Please sign in to comment.