From ae506c9745d42d67a5e9c5f4ccc73f59e5e9ab71 Mon Sep 17 00:00:00 2001 From: Rudi Grinberg Date: Thu, 8 Jun 2023 18:51:09 +0100 Subject: [PATCH] chore: vendor cmarkit To avoid pointless recompilation when cmdliner is installed Signed-off-by: Rudi Grinberg --- dune-project | 1 - ocaml-lsp-server.opam | 1 - ocaml-lsp-server/vendor/cmarkit/LICENSE.md | 13 + ocaml-lsp-server/vendor/cmarkit/cmarkit.ml | 3194 +++++++++++++++++ ocaml-lsp-server/vendor/cmarkit/cmarkit.mli | 1891 ++++++++++ .../vendor/cmarkit/cmarkit_base.ml | 1360 +++++++ .../vendor/cmarkit/cmarkit_base.mli | 401 +++ .../vendor/cmarkit/cmarkit_commonmark.ml | 446 +++ .../vendor/cmarkit/cmarkit_commonmark.mli | 266 ++ .../vendor/cmarkit/cmarkit_data.ml | 59 + .../vendor/cmarkit/cmarkit_data.mli | 50 + .../vendor/cmarkit/cmarkit_data_html.ml | 2165 +++++++++++ .../vendor/cmarkit/cmarkit_data_uchar.ml | 658 ++++ .../vendor/cmarkit/cmarkit_html.ml | 518 +++ .../vendor/cmarkit/cmarkit_html.mli | 185 + .../vendor/cmarkit/cmarkit_latex.ml | 423 +++ .../vendor/cmarkit/cmarkit_latex.mli | 226 ++ .../vendor/cmarkit/cmarkit_renderer.ml | 104 + .../vendor/cmarkit/cmarkit_renderer.mli | 275 ++ ocaml-lsp-server/vendor/cmarkit/dune | 3 + 20 files changed, 12237 insertions(+), 2 deletions(-) create mode 100644 ocaml-lsp-server/vendor/cmarkit/LICENSE.md create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit.ml create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit.mli create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_base.ml create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_base.mli create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_commonmark.ml create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_commonmark.mli create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_data.ml create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_data.mli create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_data_html.ml create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_data_uchar.ml create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_html.ml create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_html.mli create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_latex.ml create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_latex.mli create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_renderer.ml create mode 100644 ocaml-lsp-server/vendor/cmarkit/cmarkit_renderer.mli create mode 100644 ocaml-lsp-server/vendor/cmarkit/dune diff --git a/dune-project b/dune-project index b8a87a2e4..5697232ca 100644 --- a/dune-project +++ b/dune-project @@ -56,7 +56,6 @@ possible and does not make any assumptions about IO. ordering dune-build-info spawn - (cmarkit (>= 0.2.0)) (odoc-parser (>= 2.0.0)) (ppx_expect (and (>= v0.15.0) :with-test)) (ocamlformat (and :with-test (= 0.24.1))) diff --git a/ocaml-lsp-server.opam b/ocaml-lsp-server.opam index ecc301427..a9f5c2e90 100644 --- a/ocaml-lsp-server.opam +++ b/ocaml-lsp-server.opam @@ -32,7 +32,6 @@ depends: [ "ordering" "dune-build-info" "spawn" - "cmarkit" {>= "0.2.0"} "odoc-parser" {>= "2.0.0"} "ppx_expect" {>= "v0.15.0" & with-test} "ocamlformat" {with-test & = "0.24.1"} diff --git a/ocaml-lsp-server/vendor/cmarkit/LICENSE.md b/ocaml-lsp-server/vendor/cmarkit/LICENSE.md new file mode 100644 index 000000000..eecc26a39 --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/LICENSE.md @@ -0,0 +1,13 @@ +Copyright (c) 2020 The cmarkit programmers + +Permission to use, copy, modify, and/or distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit.ml b/ocaml-lsp-server/vendor/cmarkit/cmarkit.ml new file mode 100644 index 000000000..553700f10 --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit.ml @@ -0,0 +1,3194 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +module String_map = Map.Make (String) +module Ascii = Cmarkit_base.Ascii +module Text = Cmarkit_base.Text +module Match = Cmarkit_base +module Textloc = Cmarkit_base.Textloc +module Meta = Cmarkit_base.Meta +module Layout = struct + type blanks = string + type nonrec string = string + type nonrec char = char + type count = int + type indent = int + let string ?(meta = Meta.none) s = s, meta + let empty = string "" +end + +type byte_pos = Textloc.byte_pos +type line_span = Match.line_span = + (* Substring on a single line, hereafter abbreviated to span *) + { line_pos : Textloc.line_pos; first : byte_pos; last : byte_pos } + +type 'a node = 'a * Meta.t + +module Block_line = struct + let _list_of_string flush s = (* cuts [s] on newlines *) + let rec loop s acc max start k = + if k > max then List.rev (flush s start max acc) else + if not (s.[k] = '\n' || s.[k] = '\r') + then loop s acc max start (k + 1) else + let acc = flush s start (k - 1) acc in + let next = k + 1 in + let start = + if s.[k] = '\r' && next <= max && s.[next] = '\n' then next + 1 else + next + in + loop s acc max start start + in + loop s [] (String.length s - 1) 0 0 + + let flush ?(meta = Meta.none) s start last acc = + let sub = String.sub s start (last - start + 1) in + (sub, meta) :: acc + + let flush_tight ?(meta = Meta.none) s start last acc = + (* If [s] has newlines, blanks after newlines are layout *) + if start > last then ("", ("", meta)) :: acc else + match acc with + | [] (* On the first line the blanks are legit *) -> + ("", (String.sub s start (last - start + 1), meta)) :: acc + | acc -> + let nb = Match.first_non_blank s ~last ~start in + (String.sub s start (nb - 1 - start + 1), + (String.sub s nb (last - nb + 1), meta)) :: acc + + (* Block lines *) + + type t = string node + + let to_string = fst + let list_of_string ?meta s = _list_of_string (flush ?meta) s + let list_textloc = function + | [] -> Textloc.none | [(_, m)] -> Meta.textloc m + | (_, first) :: _ as l -> + let _, last = List.hd (List.rev l) in + Textloc.reloc ~first:(Meta.textloc first) ~last:(Meta.textloc last) + + (* Tight lines *) + + type tight = Layout.blanks * t + + let tight_to_string l = fst (snd l) + let tight_list_of_string ?meta s = _list_of_string (flush_tight ?meta) s + let tight_list_textloc = function + | [] -> Textloc.none | [_, (_, m)] -> Meta.textloc m + | (_, (_, first)) :: _ as l -> + let (_, (_, last)) = List.hd (List.rev l) in + Textloc.reloc ~first:(Meta.textloc first) ~last:(Meta.textloc last) + + (* Blank lines *) + + type blank = Layout.blanks node +end + +module Label = struct + type key = string + type t = { meta : Meta.t; key : key; text : Block_line.tight list } + let make ?(meta = Meta.none) ~key text = { key; text; meta } + let with_meta meta l = { l with meta } + let meta t = t.meta + let key t = t.key + let text t = t.text + let textloc t = Block_line.tight_list_textloc t.text + let text_to_string t = + String.concat " " (List.map Block_line.tight_to_string t.text) + + let compare l0 l1 = String.compare l0.key l1.key + + (* Definitions *) + + module Map = Map.Make (String) + type def = .. + type defs = def Map.t + + (* Resolvers *) + + type context = + [ `Def of t option * t + | `Ref of [ `Link | `Image ] * t * (t option) ] + + type resolver = context -> t option + let default_resolver = function + | `Def (None, k) -> Some k + | `Def (Some _, k) -> None + | `Ref (_, _, k) -> k +end + +module Link_definition = struct + type layout = + { indent : Layout.indent; + angled_dest : bool; + before_dest : Block_line.blank list; + after_dest : Block_line.blank list; + title_open_delim : Layout.char; + after_title : Block_line.blank list; } + + let layout_for_dest dest = + let needs_angles c = Ascii.is_control c || c = ' ' in + let angled_dest = String.exists needs_angles dest in + { indent = 0; angled_dest; before_dest = []; + after_dest = []; title_open_delim = '\"'; after_title = [] } + + let default_layout = + { indent = 0; angled_dest = false; before_dest = []; + after_dest = []; title_open_delim = '\"'; after_title = [] } + + type t = + { layout : layout; + label : Label.t option; + defined_label : Label.t option; + dest : string node option; + title : Block_line.tight list option; } + + let make ?layout ?defined_label ?label ?dest ?title () = + let layout = match dest with + | None -> default_layout | Some (d, _) -> layout_for_dest d + in + let defined_label = match defined_label with None -> label | Some d -> d in + { layout; label; defined_label; dest; title } + + let layout ld = ld.layout + let label ld = ld.label + let defined_label ld = ld.defined_label + let dest ld = ld.dest + let title ld = ld.title + + type Label.def += Def of t node +end + +module Inline = struct + type t = .. + + module Autolink = struct + type t = { is_email : bool; link : string node; } + let is_email a = a.is_email + let link a = a.link + let make link = + let is_email = + let l = String.concat "" ["<"; fst link; ">"] in + match Match.autolink_email l ~last:(String.length l - 1) ~start:0 with + | None -> false | Some _ -> true + in + { is_email; link } + end + + module Break = struct + type type' = [ `Hard | `Soft ] + type t = + { layout_before : Layout.blanks node; + type' : type'; + layout_after : Layout.blanks node; } + + let make + ?(layout_before = Layout.empty) ?(layout_after = Layout.empty) type' + = + { layout_before; type'; layout_after } + + let type' b = b.type' + let layout_before b = b.layout_before + let layout_after b = b.layout_after + end + + module Code_span = struct + type t = + { backtick_count : Layout.count; + code_layout : Block_line.tight list; } + + let make ~backtick_count code_layout = { backtick_count; code_layout } + + let min_backtick_count ~min counts = + let rec loop min = function + | c :: cs -> if min <> c then min else loop (c + 1) cs | [] -> min + in + loop min (List.sort Int.compare counts) + + let of_string ?(meta = Meta.none) = function + | "" -> { backtick_count = 1 ; code_layout = ["", ("", meta)] } + | s -> + (* This finds out the needed backtick count, whether spaces are needed, + and treats blanks after newline as layout *) + let max = String.length s - 1 in + let need_sp = s.[0] = '`' || s.[max] = '`' in + let s = if need_sp then String.concat "" [" "; s; " "] else s in + let backtick_counts, code_layout = + let rec loop bt_counts acc max btc start k = match k > max with + | true -> + (* assert (btc = 0) because of [need_sp] *) + bt_counts, + if acc = [] then ["", (s, meta)] else + List.rev (Block_line.flush_tight ~meta s start max acc) + | false -> + if s.[k] = '`' + then loop bt_counts acc max (btc + 1) start (k + 1) else + let bt_counts = if btc > 0 then btc :: bt_counts else bt_counts in + if not (s.[k] = '\n' || s.[k] = '\r') + then loop bt_counts acc max 0 start (k + 1) else + let acc = Block_line.flush_tight ~meta s start (k - 1) acc in + let start = + if k + 1 <= max && s.[k] = '\r' && s.[k + 1] = '\n' + then k + 2 else k + 1 + in + loop bt_counts acc max 0 start start + in + loop [] [] max 0 0 0 + in + let backtick_count = min_backtick_count ~min:1 backtick_counts in + { backtick_count; code_layout } + + let backtick_count cs = cs.backtick_count + let code_layout cs = cs.code_layout + let code cs = + (* Extract code, see https://spec.commonmark.org/0.30/#code-spans *) + let sp c = Char.equal c ' ' in + let s = List.map Block_line.tight_to_string cs.code_layout in + let s = String.concat " " s in + if s = "" then "" else + if s.[0] = ' ' && s.[String.length s - 1] = ' ' && + not (String.for_all sp s) + then String.sub s 1 (String.length s - 2) else s + end + + module Emphasis = struct + type inline = t + type t = { delim : Layout.char; inline : inline } + let make ?(delim = '*') inline = { delim; inline } + let inline e = e.inline + let delim e = e.delim + end + + module Link = struct + type inline = t + + type reference_layout = [ `Collapsed | `Full | `Shortcut ] + type reference = + [ `Inline of Link_definition.t node + | `Ref of reference_layout * Label.t * Label.t ] + + type t = { text : inline; reference : reference; } + + let make text reference = { text; reference } + let text l = l.text + let reference l = l.reference + let referenced_label l = match l.reference with + | `Inline _ -> None | `Ref (_, _, k) -> Some k + + let reference_definition defs l = match l.reference with + | `Inline ld -> Some (Link_definition.Def ld) + | `Ref (_, _, def) -> Label.Map.find_opt (Label.key def) defs + + let is_unsafe l = + let allowed_data_url l = + let allowed = ["image/gif"; "image/png"; "image/jpeg"; "image/webp"] in + (* Extract mediatype from data:[][;base64], *) + match String.index_from_opt l 4 ',' with + | None -> false + | Some j -> + let k = match String.index_from_opt l 4 ';' with + | None -> j | Some k -> k + in + let t = String.sub l 5 (min j k - 5) in + List.mem t allowed + in + Ascii.caseless_starts_with ~prefix:"javascript:" l || + Ascii.caseless_starts_with ~prefix:"vbscript:" l || + Ascii.caseless_starts_with ~prefix:"file:" l || + (Ascii.caseless_starts_with ~prefix:"data:" l && not (allowed_data_url l)) + end + + module Raw_html = struct + type t = Block_line.tight list + end + + module Text = struct + type t = string + end + + type t += + | Autolink of Autolink.t node + | Break of Break.t node + | Code_span of Code_span.t node + | Emphasis of Emphasis.t node + | Image of Link.t node + | Inlines of t list node + | Link of Link.t node + | Raw_html of Raw_html.t node + | Strong_emphasis of Emphasis.t node + | Text of Text.t node + + let empty = Inlines ([], Meta.none) + + let err_unknown = "Unknown Cmarkit.Inline.t type extension" + + (* Extensions *) + + module Strikethrough = struct + type nonrec t = t + let make = Fun.id + let inline = Fun.id + end + + module Math_span = struct + type t = { display : bool; tex_layout : Block_line.tight list; } + let make ~display tex_layout = { display; tex_layout } + let display ms = ms.display + let tex_layout ms = ms.tex_layout + let tex ms = + let s = List.map Block_line.tight_to_string ms.tex_layout in + String.concat " "s + end + + type t += + | Ext_strikethrough of Strikethrough.t node + | Ext_math_span of Math_span.t node + + (* Functions on inlines *) + + let is_empty = function + | Text ("", _) | Inlines ([], _) -> true | _ -> false + + let ext_none _ = invalid_arg err_unknown + let meta ?(ext = ext_none) = function + | Autolink (_, m) | Break (_, m) | Code_span (_, m) | Emphasis (_, m) + | Image (_, m) | Inlines (_, m) | Link (_, m) | Raw_html (_, m) + | Strong_emphasis (_, m) | Text (_, m) -> m + | Ext_strikethrough (_, m) -> m | Ext_math_span (_, m) -> m + | i -> ext i + + let rec normalize ?(ext = ext_none) = function + | Autolink _ | Break _ | Code_span _ | Raw_html _ | Text _ + | Inlines ([], _) | Ext_math_span _ as i -> i + | Image (l, m) -> Image ({ l with text = normalize ~ext l.text }, m) + | Link (l, m) -> Link ({ l with text = normalize ~ext l.text }, m) + | Inlines ([i], _) -> i + | Emphasis (e, m) -> + Emphasis ({ e with inline = normalize ~ext e.inline}, m) + | Strong_emphasis (e, m) -> + Strong_emphasis ({ e with inline = normalize ~ext e.inline}, m) + | Inlines (i :: is, m) -> + let rec loop acc = function + | Inlines (is', m) :: is -> loop acc (List.rev_append (List.rev is') is) + | Text (t', m') as i' :: is -> + begin match acc with + | Text (t, m) :: acc -> + let tl = Textloc.span (Meta.textloc m) (Meta.textloc m') in + let i = Text (t ^ t', Meta.with_textloc ~keep_id:true m tl) in + loop (i :: acc) is + | _ -> loop (normalize ~ext i' :: acc) is + end + | i :: is -> loop (normalize ~ext i :: acc) is + | [] -> List.rev acc + in + let is = loop [normalize ~ext i] is in + (match is with [i] -> i | _ -> Inlines (is, m)) + | Ext_strikethrough (i, m) -> Ext_strikethrough (normalize ~ext i, m) + | i -> ext i + + let ext_none ~break_on_soft = ext_none + let to_plain_text ?(ext = ext_none) ~break_on_soft i = + let push s acc = (s :: List.hd acc) :: List.tl acc in + let newline acc = [] :: (List.rev (List.hd acc)) :: List.tl acc in + let rec loop ~break_on_soft acc = function + | Autolink (a, _) :: is -> + let acc = push (String.concat "" ["<"; fst a.link; ">"]) acc in + loop ~break_on_soft acc is + | Break ({ type' = `Hard }, _) :: is -> + loop ~break_on_soft (newline acc) is + | Break ({ type' = `Soft }, _) :: is -> + let acc = if break_on_soft then newline acc else (push " " acc) in + loop ~break_on_soft acc is + | Code_span (cs, _) :: is -> + loop ~break_on_soft (push (Code_span.code cs) acc) is + | Emphasis ({ inline }, _) :: is | Strong_emphasis ({ inline }, _) :: is -> + loop ~break_on_soft acc (inline :: is) + | Inlines (is', _) :: is -> + loop ~break_on_soft acc (List.rev_append (List.rev is') is) + | Link (l, _) :: is | Image (l, _) :: is -> + loop ~break_on_soft acc (l.text :: is) + | Raw_html _ :: is -> + loop ~break_on_soft acc is + | Text (t, _) :: is -> + loop ~break_on_soft (push t acc) is + | Ext_strikethrough (i, _) :: is -> + loop ~break_on_soft acc (i :: is) + | Ext_math_span (m, _) :: is -> + loop ~break_on_soft (push (Math_span.tex m) acc) is + | i :: is -> + loop ~break_on_soft acc (ext ~break_on_soft i :: is) + | [] -> + List.rev ((List.rev (List.hd acc)) :: List.tl acc) + in + loop ~break_on_soft ([] :: []) [i] + + let id ?buf ?ext i = + let text = to_plain_text ?ext ~break_on_soft:false i in + let s = String.concat "\n" (List.map (String.concat "") text) in + let b = match buf with + | Some b -> Buffer.reset b; b | None -> Buffer.create 256 + in + let[@inline] collapse_blanks b ~prev_byte = + (* Collapses non initial white *) + if Ascii.is_blank prev_byte && Buffer.length b <> 0 + then Buffer.add_char b '-' + in + let rec loop b s max ~prev_byte k = + if k > max then Buffer.contents b else + match s.[k] with + | ' ' | '\t' as prev_byte -> loop b s max ~prev_byte (k + 1) + | '_' | '-' as c -> + collapse_blanks b ~prev_byte; + Buffer.add_char b c; + loop b s max ~prev_byte:c (k + 1) + | c -> + let () = collapse_blanks b ~prev_byte in + let d = String.get_utf_8_uchar s k in + let u = Uchar.utf_decode_uchar d in + let u = match Uchar.to_int u with 0x0000 -> Uchar.rep | _ -> u in + let k' = k + Uchar.utf_decode_length d in + if Cmarkit_data.is_unicode_punctuation u + then loop b s max ~prev_byte:'\x00' k' else + let () = match Cmarkit_data.unicode_case_fold u with + | None -> Buffer.add_utf_8_uchar b u + | Some fold -> Buffer.add_string b fold + in + let prev_byte = s.[k] in + loop b s max ~prev_byte k' + in + loop b s (String.length s - 1) ~prev_byte:'\x00' 0 +end + +(* Blocks *) + +module Block = struct + type t = .. + + module Blank_line = struct + type t = Layout.blanks + end + + module Block_quote = struct + type nonrec t = { indent : Layout.indent; block : t; } + let make ?(indent = 0) block = { indent; block } + let indent bq = bq.indent + let block bq = bq.block + end + + module Code_block = struct + type fenced_layout = + { indent : Layout.indent; + opening_fence : Layout.string node; + closing_fence : Layout.string node option; } + + let default_fenced_layout = + { indent = 0; + opening_fence = Layout.empty; + closing_fence = Some Layout.empty } + + type layout = [ `Indented | `Fenced of fenced_layout ] + type t = + { layout : layout; + info_string : string node option; + code : string node list; } + + let make ?(layout = `Fenced default_fenced_layout) ?info_string code = + let layout = match info_string, layout with + | Some _, `Indented -> `Fenced default_fenced_layout + | _, layout -> layout + in + { layout; info_string; code } + + let layout cb = cb.layout + let info_string cb = cb.info_string + let code cb = cb.code + + let make_fence cb = + let rec loop char counts = function + | [] -> counts + | (c, _) :: cs -> + let max = String.length c - 1 in + let k = ref 0 in + while (!k <= max && c.[!k] = char) do incr k done; + loop char (if !k <> 0 then !k :: counts else counts) cs + in + let char = match cb.info_string with + | Some (i, _) when String.exists (Char.equal '`') i -> '~' + | None | Some _ -> '`' + in + let counts = loop char [] cb.code in + char, + Inline.Code_span.min_backtick_count (* not char specific *) ~min:3 counts + + let language_of_info_string s = + let rec next_white s max i = + if i > max || Ascii.is_white s.[i] then i else + next_white s max (i + 1) + in + if s = "" then None else + let max = String.length s - 1 in + let white = next_white s max 0 in + let rem_first = Match.first_non_blank s ~last:max ~start:white in + let lang = String.sub s 0 white in + if lang = "" then None else + Some (lang, String.sub s rem_first (max - rem_first + 1)) + + let is_math_block = function + | None -> false | Some (i, _) -> match language_of_info_string i with + | Some ("math", _) -> true + | Some _ | None -> false + end + + module Heading = struct + type atx_layout = + { indent : Layout.indent; + after_opening : Layout.blanks; + closing : Layout.string; } + + let default_atx_layout = { indent = 0; after_opening = ""; closing = "" } + + type setext_layout = + { leading_indent : Layout.indent; + trailing_blanks : Layout.blanks; + underline_indent : Layout.indent; + underline_count : Layout.count node; + underline_blanks : Layout.blanks; } + + type layout = [ `Atx of atx_layout | `Setext of setext_layout ] + type id = [ `Auto of string | `Id of string ] + type t = { layout : layout; level : int; inline : Inline.t; id : id option } + + let make ?id ?(layout = `Atx default_atx_layout) ~level inline = + let max = match layout with `Atx _ -> 6 | `Setext _ -> 2 in + let level = Int.max 1 (Int.min level max) in + {layout; level; inline; id} + + let layout h = h.layout + let level h = h.level + let inline h = h.inline + let id h = h.id + end + + module Html_block = struct + type t = string node list + end + + module List_item = struct + type block = t + type t = + { before_marker : Layout.indent; + marker : Layout.string node; + after_marker : Layout.indent; + block : block; + ext_task_marker : Uchar.t node option } + + let make + ?(before_marker = 0) ?(marker = Layout.empty) ?(after_marker = 1) + ?ext_task_marker block + = + { before_marker; marker; after_marker; block; ext_task_marker } + + let block i = i.block + let before_marker i = i.before_marker + let marker i = i.marker + let after_marker i = i.after_marker + let ext_task_marker i = i.ext_task_marker + let task_status_of_task_marker u = match Uchar.to_int u with + | 0x0020 -> `Unchecked + | 0x0078 (* x *) | 0x0058 (* X *) | 0x2713 (* ✓ *) | 0x2714 (* ✔ *) + | 0x10102 (* 𐄂 *) | 0x1F5F8 (* 🗸*) -> `Checked + | 0x007E (* ~ *) -> `Cancelled + | _ -> `Other u + end + + module List' = struct + type type' = [ `Unordered of Layout.char | `Ordered of int * Layout.char ] + type t = + { type' : type'; + tight : bool; + items : List_item.t node list; } + + let make ?(tight = true) type' items = { type'; tight; items } + + let type' l = l.type' + let tight l = l.tight + let items l = l.items + end + + module Paragraph = struct + type t = + { leading_indent : Layout.indent; + inline : Inline.t; + trailing_blanks : Layout.blanks; } + + let make ?(leading_indent = 0) ?(trailing_blanks = "") inline = + { leading_indent; inline; trailing_blanks } + + let inline p = p.inline + let leading_indent p = p.leading_indent + let trailing_blanks p = p.trailing_blanks + end + + module Thematic_break = struct + type t = { indent : Layout.indent; layout : Layout.string } + let make ?(indent = 0) ?(layout = "---") () = { indent; layout } + let indent t = t.indent + let layout t = t.layout + end + + type t += + | Blank_line of Layout.blanks node + | Block_quote of Block_quote.t node + | Blocks of t list node + | Code_block of Code_block.t node + | Heading of Heading.t node + | Html_block of Html_block.t node + | Link_reference_definition of Link_definition.t node + | List of List'.t node + | Paragraph of Paragraph.t node + | Thematic_break of Thematic_break.t node + + let empty = Blocks ([], Meta.none) + + (* Extensions *) + + module Table = struct + type align = [ `Left | `Center | `Right ] + type sep = align option * Layout.count + type cell_layout = Layout.blanks * Layout.blanks + type row = + [ `Header of (Inline.t * cell_layout) list + | `Sep of sep node list + | `Data of (Inline.t * cell_layout) list ] + + type t = + { indent : Layout.indent; + col_count : int; + rows : (row node * Layout.blanks) list } + + let col_count rows = + let rec loop c = function + | (((`Header cols | `Data cols), _), _) :: rs -> + loop (Int.max (List.length cols) c) rs + | (((`Sep cols), _), _) :: rs -> + loop (Int.max (List.length cols) c) rs + | [] -> c + in + loop 0 rows + + let make ?(indent = 0) rows = { indent; col_count = col_count rows; rows } + let indent t = t.indent + let col_count t = t.col_count + let rows t = t.rows + + let parse_sep_row cs = + let rec loop acc = function + | [] -> Some (List.rev acc) + | (Inline.Text (s, meta), ("", "")) :: cs -> + if s = "" then None else + let max = String.length s - 1 in + let first_colon = s.[0] = ':' and last_colon = s.[max] = ':' in + let first = if first_colon then 1 else 0 in + let last = if last_colon then max - 1 else max in + begin + match + for i = first to last do if s.[i] <> '-' then raise Exit; done + with + | exception Exit -> None + | () -> + let count = last - first + 1 in + let sep = match first_colon, last_colon with + | false, false -> None + | true, true -> Some `Center + | true, false -> Some `Left + | false, true -> Some `Right + in + loop (((sep, count), meta) :: acc) cs + end + | _ -> None + in + loop [] cs + end + + module Footnote = struct + type nonrec t = + { indent : Layout.indent; + label : Label.t; + defined_label : Label.t option; + block : t } + + let make ?(indent = 0) ?defined_label:d label block = + let defined_label = match d with None -> Some label | Some d -> d in + { indent; label; defined_label; block } + + let indent fn = fn.indent + let label fn = fn.label + let defined_label fn = fn.defined_label + let block fn = fn.block + + type Label.def += Def of t node + let stub label defined_label = + Def ({ indent = 0; label; defined_label; block = empty}, Meta.none) + end + + type t += + | Ext_math_block of Code_block.t node + | Ext_table of Table.t node + | Ext_footnote_definition of Footnote.t node + + (* Functions on blocks *) + + let err_unknown = "Unknown Cmarkit.Block.t type extension" + + let ext_none _ = invalid_arg err_unknown + let meta ?(ext = ext_none) = function + | Blank_line (_, m) | Block_quote (_, m) | Blocks (_, m) | Code_block (_, m) + | Heading (_, m) | Html_block (_, m) | Link_reference_definition (_, m) + | List (_, m) | Paragraph (_, m) | Thematic_break (_, m) + | Ext_math_block (_, m) | Ext_table (_, m) + | Ext_footnote_definition (_, m) -> m + | b -> ext b + + let rec normalize ?(ext = ext_none) = function + | Blank_line _ | Code_block _ | Heading _ | Html_block _ + | Link_reference_definition _ | Paragraph _ | Thematic_break _ + | Blocks ([], _) | Ext_math_block _ | Ext_table _ as b -> b + | Block_quote (b, m) -> + let b = { b with block = normalize ~ext b.block } in + Block_quote (b, m) + | List (l, m) -> + let item (i, meta) = + let block = List_item.block i in + { i with List_item.block = normalize ~ext block }, meta + in + List ({ l with items = List.map item l.items }, m) + | Blocks (b :: bs, m) -> + let rec loop acc = function + | Blocks (bs', m) :: bs -> loop acc (List.rev_append (List.rev bs') bs) + | b :: bs -> loop (normalize ~ext b :: acc) bs + | [] -> List.rev acc + in + let bs = loop [normalize ~ext b] bs in + (match bs with [b] -> b | _ -> Blocks (bs, m)) + | Ext_footnote_definition (fn, m) -> + let fn = { fn with block = normalize ~ext fn.block } in + Ext_footnote_definition (fn, m) + | b -> ext b + + let rec defs + ?(ext = fun b defs -> invalid_arg err_unknown) ?(init = Label.Map.empty) + = function + | Blank_line _ | Code_block _ | Heading _ | Html_block _ + | Paragraph _ | Thematic_break _ + | Ext_math_block _ | Ext_table _ -> init + | Block_quote (b, _) -> defs ~ext ~init (Block_quote.block b) + | Blocks (bs, _) -> List.fold_left (fun init b -> defs ~ext ~init b) init bs + | List (l, _) -> + let add init (i, _) = defs ~ext ~init (List_item.block i) in + List.fold_left add init l.items + | Link_reference_definition ld -> + begin match Link_definition.defined_label (fst ld) with + | None -> init + | Some def -> + Label.Map.add (Label.key def) (Link_definition.Def ld) init + end + | Ext_footnote_definition fn -> + let init = match Footnote.defined_label (fst fn) with + | None -> init + | Some def -> Label.Map.add (Label.key def) (Footnote.Def fn) init + in + defs ~ext ~init (Footnote.block (fst fn)) + | b -> ext init b +end + +(* Parsing *) + +(* Closer indexes. + + They map closing delimiters to the position where they + start. Shortcuts forward searches in inline parsing. See + Inline_struct. *) + +module Pos_set = Set.Make (Int) (* Sets of positions. *) +module Closer = struct + type t = + | Backticks of int (* run length *) + | Right_brack + | Right_paren (* Only for ruling out pathological cases. *) + | Emphasis_marks of char + | Strikethrough_marks + | Math_span_marks of int (* run length *) + + let compare = Stdlib.compare +end + +module Closer_index = struct + include Map.Make (Closer) + type nonrec t = Pos_set.t t + + let add cl pos cidx = + let add = function + | None -> Some (Pos_set.singleton pos) + | Some occs -> Some (Pos_set.add pos occs) + in + update cl add cidx + + let closer_pos cl ~after cidx = match find_opt cl cidx with + | None -> None + | Some occs -> Pos_set.find_first_opt (fun pos -> pos > after) occs + + let closer_exists cl ~after cidx = match closer_pos cl ~after cidx with + | None -> false | Some _ -> true +end + +(* Columns. That notion is needed to handle tab stops. + See https://spec.commonmark.org/current/#tabs *) + +type col = int +let[@inline] next_tab_stop col = (col + 4) land (lnot 3) + +(* Parser abstraction *) + +type parser = + { file : Textloc.fpath (* input file name *); + i : string (* input string *); + buf : Buffer.t (* scratch buffer. *); + exts : bool; (* parse extensions if [true]. *) + nolocs : bool; (* do not compute locations if [true]. *) + nolayout : bool; (* do not compute layout fields if [true]. *) + heading_auto_ids : bool; (* compute heading ids. *) + nested_links : bool; + mutable defs : Label.defs; + resolver : Label.resolver; + mutable cidx : Closer_index.t; (* For inline parsing. *) + (* Current line (only used during block parsing) *) + mutable current_line_pos : Textloc.line_pos; + mutable current_line_last_char : + (* first char of line - 1 on empty lines *) Textloc.byte_pos; + mutable current_char : Textloc.byte_pos; + mutable current_char_col : col; + mutable next_non_blank : + (* current_line_last_char + 1 if none. *) Textloc.byte_pos; + mutable next_non_blank_col : col; + mutable tab_consumed_cols : + (* number of cols consumed from the tab if i.[current_char] is '\t' *) + col; } + +let parser + ?(defs = Label.Map.empty) ?(resolver = Label.default_resolver) + ?(nested_links = false) ?(heading_auto_ids = false) ?(layout = false) + ?(locs = false) ?(file = Textloc.file_none) ~strict i + = + let nolocs = not locs and nolayout = not layout and exts = not strict in + { file; i; buf = Buffer.create 512; exts; nolocs; nolayout; + heading_auto_ids; nested_links; defs; resolver; cidx = Closer_index.empty; + current_line_pos = 1, 0; current_line_last_char = -1; current_char = 0; + current_char_col = 0; next_non_blank = 0; next_non_blank_col = 0; + tab_consumed_cols = 0; } + +let find_label_defining_key p key = match Label.Map.find_opt key p.defs with +| Some (Link_definition.Def ld) -> Link_definition.defined_label (fst ld) +| Some (Block.Footnote.Def fn) -> Block.Footnote.defined_label (fst fn) +| None -> None +| _ -> assert false + +let set_label_def p l def = p.defs <- Label.Map.add (Label.key l) def p.defs +let def_label p l = + p.resolver (`Def (find_label_defining_key p (Label.key l), l)) + +let find_def_for_ref ~image p ref = + let kind = if image then `Image else `Link in + let def = find_label_defining_key p (Label.key ref) in + p.resolver (`Ref (kind, ref, def)) + +let debug_span p s = String.sub p.i s.first (s.last - s.first + 1) +let debug_line p = + let first = snd p.current_line_pos and last = p.current_line_last_char in + String.sub p.i first (last - first + 1) + +let current_line_span p ~first ~last = + { line_pos = p.current_line_pos; first; last } + +(* Making metas and text locations. This is centralized here to be able + to disable their creation which has a non-negligible impact on + performance. *) + +let meta p textloc = if p.nolocs then Meta.none else Meta.make ~textloc () + +let textloc_of_span p span = + if p.nolocs then Textloc.none else + let first_byte = span.first and last_byte = span.last in + let first_line = span.line_pos and last_line = span.line_pos in + Textloc.v ~file:p.file ~first_byte ~last_byte ~first_line ~last_line + +let textloc_of_lines p ~first ~last ~first_line ~last_line = + if p.nolocs then Textloc.none else + let first_byte = first and first_line = first_line.line_pos in + let last_byte = last and last_line = last_line.line_pos in + Textloc.v ~file:p.file ~first_byte ~last_byte ~first_line ~last_line + +let meta_of_spans p ~first:first_line ~last:last_line = + if p.nolocs then Meta.none else + let first = first_line.first and last = last_line.last in + meta p (textloc_of_lines p ~first ~last ~first_line ~last_line) + +let meta_of_metas p ~first ~last = + if p.nolocs then Meta.none else + meta p (Textloc.span (Meta.textloc first) (Meta.textloc last)) + +let clean_raw_span ?pad p span = + Text.utf_8_clean_raw ?pad p.buf p.i ~first:span.first ~last:span.last, + meta p (textloc_of_span p span) + +let clean_unref_span p span = + Text.utf_8_clean_unref p.buf p.i ~first:span.first ~last:span.last, + meta p (textloc_of_span p span) + +let clean_unesc_unref_span p span = + Text.utf_8_clean_unesc_unref p.buf p.i ~first:span.first ~last:span.last, + meta p (textloc_of_span p span) + +let layout_clean_raw_span ?pad p span = + if p.nolayout then Layout.empty else clean_raw_span ?pad p span + +let layout_clean_raw_span' ?pad p span = + (* Like [layout_raw_span] but no meta *) + if p.nolayout then "" else + Text.utf_8_clean_raw ?pad p.buf p.i ~first:span.first ~last:span.last + +let _tight_block_lines xxx_span p ~rev_spans = + let rec loop p acc = function + | [] -> acc + | [_, fst_line] -> ("", xxx_span p fst_line) :: acc + | (line_start, span) :: spans -> + let acc = + let layout = + if p.nolayout || span.first <= line_start then "" else + Text.utf_8_clean_raw p.buf p.i ~first:line_start + ~last:(span.first - 1) + in + (layout, xxx_span p span) :: acc + in + loop p acc spans + in + loop p [] rev_spans + +let tight_block_lines p ~rev_spans = + _tight_block_lines clean_unesc_unref_span p ~rev_spans + +let raw_tight_block_lines p ~rev_spans = + _tight_block_lines clean_raw_span p ~rev_spans + +let first_non_blank_in_span p s = Match.first_non_blank_in_span p.i s +let first_non_blank_over_nl ~next_line p lines line ~start = + match Match.first_non_blank_over_nl ~next_line p.i lines ~line ~start with + | `None -> None + | `This_line non_blank -> + let layout = + if non_blank = start then [] else + [clean_raw_span p { line with first = start ; last = non_blank - 1}] + in + Some (lines, line, layout, non_blank) + | `Next_line (lines, newline, non_blank) -> + let first_layout = clean_raw_span p { line with first = start } in + let next_layout = clean_raw_span p { newline with last = non_blank -1 } in + let layout = [first_layout; next_layout] in + Some (lines, newline, layout, non_blank) + +(* Inline structure parsing *) + +module Inline_struct = struct + + (* Tokens for parsing inlines. + + The list of tokens of a paragraph are the points to consider to + parse it into inlines. Tokens gradually become [Inline] tokens + containing parsed inlines. Between two tokens there is implicit + textual data. This data gradually becomes part of [Inline] tokens + or, at the end of of the parsing process, becomes [Text] inlines. + + The token list also represents newlines explicitly, either via + the [Newline] token or via the [Inline] token since inlines may + start on a line and up on another one. *) + + type emphasis_marks = + { start : byte_pos; + char : char; + count : int; + may_open : bool; + may_close : bool } + + type strikethrough_marks = + { start : byte_pos; + may_open : bool; + may_close : bool } + + type math_span_marks = + { start : byte_pos; + count : int; + may_open : bool; + may_close : bool; } + + type token = + | Autolink_or_html_start of { start : byte_pos } + | Backticks of + { start : byte_pos; + count : int; + escaped : bool } + | Emphasis_marks of emphasis_marks + | Inline of + { start : byte_pos; + inline : Inline.t; + endline : line_span; + next : byte_pos } + | Link_start of + { start : byte_pos; + image : bool } + | Newline of + { start : (* points on spaces or \ on the broken line *) byte_pos; + break_type : Inline.Break.type'; + newline : line_span; } + | Right_brack of { start : byte_pos } + | Right_paren of { start : byte_pos } (* Only used for closer index *) + | Strikethrough_marks of strikethrough_marks + | Math_span_marks of math_span_marks + + let token_start = function + | Autolink_or_html_start { start } | Backticks { start } + | Emphasis_marks { start } | Inline { start } -> start | Link_start { start } + | Newline { start } | Right_brack { start } -> start + | Right_paren { start } -> start + | Strikethrough_marks { start } -> start + | Math_span_marks { start } -> start + + let has_backticks ~count ~after cidx = + Closer_index.closer_exists (Closer.Backticks count) ~after cidx + + let has_right_brack ~after cidx = + Closer_index.closer_exists Closer.Right_brack ~after cidx + + let has_right_paren ~after cidx = + Closer_index.closer_exists Closer.Right_paren ~after cidx + + let emphasis_closer_pos ~char ~after cidx = + Closer_index.closer_pos (Closer.Emphasis_marks char) ~after cidx + + let has_emphasis_closer ~char ~after cidx = + Closer_index.closer_exists (Closer.Emphasis_marks char) ~after cidx + + let has_strikethrough_closer ~after cidx = + Closer_index.closer_exists Closer.Strikethrough_marks ~after cidx + + let has_math_span_closer ~count ~after cidx = + Closer_index.closer_exists (Closer.Math_span_marks count) ~after cidx + + let rev_token_list_and_make_closer_index toks = + let rec loop cidx acc = function + | Backticks { start; count; _ } as t :: toks -> + let cidx = Closer_index.add (Closer.Backticks count) start cidx in + loop cidx (t :: acc) toks + | Right_brack { start } as t :: toks -> + let cidx = Closer_index.add Closer.Right_brack start cidx in + loop cidx (t :: acc) toks + | Right_paren { start } :: toks -> + let cidx = Closer_index.add Closer.Right_paren start cidx in + loop cidx (* we don't use the token for parsing *) acc toks + | Emphasis_marks { start; char; may_close = true } as t :: toks -> + let cidx = Closer_index.add (Closer.Emphasis_marks char) start cidx in + loop cidx (t :: acc) toks + | Strikethrough_marks { start; may_close = true } as t :: toks -> + let cidx = Closer_index.add Closer.Strikethrough_marks start cidx in + loop cidx (t :: acc) toks + | Math_span_marks { start; count; may_close = true } as t :: toks -> + let cidx = Closer_index.add (Closer.Math_span_marks count) start cidx in + loop cidx (t :: acc) toks + | t :: toks -> loop cidx (t :: acc) toks + | [] -> cidx, acc + in + loop Closer_index.empty [] toks + + let rec rev_tokens_and_shorten_last_line ~to_last:last acc = function + (* Used to make the text delimitation precise for nested inlines *) + | Newline ({ newline; _ } as nl) :: toks -> + let t = Newline { nl with newline = { newline with last }} in + List.rev_append toks (t :: acc) + | Inline ({ endline; _ } as i) :: toks -> + let t = Inline { i with endline = { endline with last }} in + List.rev_append toks (t :: acc) + | t :: toks -> rev_tokens_and_shorten_last_line ~to_last:last (t :: acc) toks + | [] -> acc + + let rec drop_stop_after_right_brack = function + | Right_brack _ :: toks -> toks + | _ :: toks -> drop_stop_after_right_brack toks + | [] -> [] + + let rec drop_until ~start = function + | t :: toks when token_start t < start -> drop_until ~start toks + | toks -> toks + + let rec next_line = function + (* N.B. when we use this function considering Inline tokens is not needed. *) + | [] -> None + | Newline { newline; _ } :: toks -> Some (toks, newline) + | _ :: toks -> next_line toks + + (* Tokenization *) + + let newline_token s prev_line newline = + (* https://spec.commonmark.org/current/#softbreak *) + (* https://spec.commonmark.org/current/#hard-line-breaks *) + let start (* includes spaces or '\\' on prev line *), break_type = + let first = prev_line.first and last = prev_line.last in + let non_space = Match.rev_drop_spaces s ~first ~start:last in + if non_space = last && s.[non_space] = '\\' then (non_space, `Hard) else + let start = non_space + 1 in + (start, if last - start + 1 >= 2 then `Hard else `Soft) + in + Newline { start; break_type; newline } + + let add_backtick_token acc s line ~prev_bslash ~start = + let last = Match.run_of ~char:'`' s ~last:line.last ~start:(start + 1) in + let count = last - start + 1 and escaped = prev_bslash in + Backticks {start; count; escaped} :: acc, last + 1 + + let try_add_image_link_start_token acc s line ~start = + let next = start + 1 in + if next > line.last || s.[next] <> '[' then acc, next else + Link_start { start; image = true } :: acc, next + 1 + + let try_add_emphasis_token acc s line ~start = + let first = line.first and last = line.last and char = s.[start] in + let run_last = Match.run_of ~char ~last s ~start:(start + 1) in + let count = run_last - start + 1 in + let prev_uchar = Match.prev_uchar s ~first ~before:start in + let next_uchar = Match.next_uchar s ~last ~after:run_last in + let prev_white = Cmarkit_data.is_unicode_whitespace prev_uchar in + let next_white = Cmarkit_data.is_unicode_whitespace next_uchar in + let prev_punct = Cmarkit_data.is_unicode_punctuation prev_uchar in + let next_punct = Cmarkit_data.is_unicode_punctuation next_uchar in + let is_left_flanking = + not next_white && (not next_punct || (prev_white || prev_punct)) + in + let is_right_flanking = + not prev_white && (not prev_punct || (next_white || next_punct)) + in + let next = run_last + 1 in + if not is_left_flanking && not is_right_flanking then acc, next else + let may_open = + (char = '*' && is_left_flanking) || + (char = '_' && is_left_flanking && (not is_right_flanking || prev_punct)) + in + let may_close = + (char = '*' && is_right_flanking) || + (char = '_' && is_right_flanking && (not is_left_flanking || next_punct)) + in + if not may_open && not may_close then acc, next else + Emphasis_marks { start; char; count; may_open; may_close } :: acc, next + + let try_add_strikethrough_marks_token acc s line ~start = + let first = line.first and last = line.last and char = s.[start] in + let run_last = Match.run_of ~char ~last s ~start:(start + 1) in + let count = run_last - start + 1 in + let next = run_last + 1 in + if count <> 2 then acc, next else + let prev_uchar = Match.prev_uchar s ~first ~before:start in + let next_uchar = Match.next_uchar s ~last ~after:run_last in + let may_close = not (Cmarkit_data.is_unicode_whitespace prev_uchar) in + let may_open = not (Cmarkit_data.is_unicode_whitespace next_uchar) in + if not may_open && not may_close then acc, next else + Strikethrough_marks { start; may_open; may_close } :: acc, next + + let try_add_math_span_marks_token acc s line ~start = + let first = line.first and last = line.last and char = s.[start] in + let run_last = Match.run_of ~char ~last s ~start:(start + 1) in + let count = run_last - start + 1 in + let next = run_last + 1 in + if count > 2 then acc, next else + let may_open, may_close = + if count <> 1 then true, true else + let prev_uchar = Match.prev_uchar s ~first ~before:start in + let next_uchar = Match.next_uchar s ~last ~after:run_last in + let may_close = not (Cmarkit_data.is_unicode_whitespace prev_uchar) in + let may_open = not (Cmarkit_data.is_unicode_whitespace next_uchar) in + may_open, may_close + in + if not may_open && not may_close then acc, next else + Math_span_marks { start; count; may_open; may_close } :: acc, next + + let tokenize ~exts s lines = + (* For inlines this is where we conditionalize for extensions. All code + paths after that no longer check for p.exts: there just won't be + extension data to process if [exts] was not [true] here. *) + let rec loop ~exts s lines line ~prev_bslash acc k = + if k > line.last then match lines with + | [] -> rev_token_list_and_make_closer_index acc + | newline :: lines -> + let t = newline_token s line newline in + loop ~exts s lines newline ~prev_bslash:false (t :: acc) newline.first + else + if s.[k] = '\\' + then loop ~exts s lines line ~prev_bslash:(not prev_bslash) acc (k+1) else + let acc, next = match s.[k] with + | '`' -> add_backtick_token acc s line ~prev_bslash ~start:k + | c when prev_bslash -> acc, k + 1 + | '*' | '_' -> try_add_emphasis_token acc s line ~start:k + | ']' -> Right_brack { start = k } :: acc, k + 1 + | '[' -> Link_start { start = k; image = false } :: acc, k + 1 + | '!' -> try_add_image_link_start_token acc s line ~start:k + | '<' -> Autolink_or_html_start { start = k } :: acc, k + 1 + | ')' -> Right_paren { start = k } :: acc, k + 1 + | '~' when exts -> try_add_strikethrough_marks_token acc s line ~start:k + | '$' when exts -> try_add_math_span_marks_token acc s line ~start:k + | _ -> acc, k + 1 + in + loop ~exts s lines line ~prev_bslash:false acc next + in + let line = List.hd lines and lines = List.tl lines in + let cidx, toks = loop ~exts s lines line ~prev_bslash:false [] line.first in + cidx, toks, line + + (* Making inlines and inline tokens *) + + let break_inline p line ~start ~break_type:type' ~newline = + let layout_before = { line with first = start } in + let layout_after = + let non_blank = first_non_blank_in_span p newline in + { newline with last = non_blank - 1 } + in + let m = meta_of_spans p ~first:layout_before ~last:layout_after in + let layout_before = layout_clean_raw_span p layout_before in + let layout_after = layout_clean_raw_span p layout_after in + Inline.Break ({ layout_before; type'; layout_after }, m) + + let try_add_text_inline p line ~first ~last acc = + if first > last then acc else + let first = match first = line.first with + | true -> first_non_blank_in_span p line (* strip leading blanks *) + | false -> first + in + Inline.Text (clean_unesc_unref_span p { line with first; last }) :: acc + + let inlines_inline p ~first ~last ~first_line ~last_line = function + | [i] -> i + | is -> + let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in + Inline.Inlines (is, meta p textloc) + + let code_span_token p ~count ~first ~last ~first_line ~last_line rev_spans = + let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in + let code_layout = raw_tight_block_lines p ~rev_spans in + let meta = meta p textloc in + let cs = Inline.Code_span ({ backtick_count = count; code_layout }, meta) in + Inline { start = first; inline = cs; endline = last_line; next = last + 1 } + + let autolink_token p line ~first ~last ~is_email = + let meta = meta p (textloc_of_span p { line with first; last }) in + let link = { line with first = first + 1; last = last - 1 } in + let link = clean_unref_span p link in + let inline = Inline.Autolink ({ is_email; link }, meta) in + Inline { start = first; inline; endline = line; next = last + 1 } + + let raw_html_token p ~first ~last ~first_line ~last_line rev_spans = + let raw = raw_tight_block_lines p ~rev_spans in + let textloc = + let first = Meta.textloc (snd (snd (List.hd raw))) in + let last = snd (List.hd rev_spans) in + let last_byte = last.last and last_line = last.line_pos in + Textloc.set_last first ~last_byte ~last_line + in + let inline = Inline.Raw_html (raw, meta p textloc) in + Inline { start = first; inline; endline = last_line; next = last + 1 } + + let link_token p ~first ~last ~first_line ~last_line ~image link = + let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in + let link = link, meta p textloc in + let inline = if image then Inline.Image link else Inline.Link link in + Inline { start = first; inline; endline = last_line; next = last + 1 } + + let emphasis_token p ~first ~last ~first_line ~last_line ~strong emph = + let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in + let delim = p.i.[first] in + let e = { Inline.Emphasis.delim; inline = emph}, meta p textloc in + let i = if strong then Inline.Strong_emphasis e else Inline.Emphasis e in + Inline { start = first; inline = i ; endline = last_line; next = last + 1 } + + let ext_strikethrough_token p ~first ~last ~first_line ~last_line s = + let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in + let inline = Inline.Ext_strikethrough (s, meta p textloc) in + Inline { start = first; inline; endline = last_line; next = last + 1 } + + let ext_math_span_token p ~count ~first ~last ~first_line ~last_line rspans = + let textloc = textloc_of_lines p ~first ~last ~first_line ~last_line in + let tex_layout = raw_tight_block_lines p ~rev_spans:rspans in + let meta = meta p textloc in + let ms = Inline.Math_span.make ~display:(count = 2) tex_layout in + let inline = Inline.Ext_math_span (ms, meta) in + Inline { start = first; inline; endline = last_line; next = last + 1 } + + (* Parsers *) + + let try_code p toks start_line ~start:cstart ~count ~escaped = + (* https://spec.commonmark.org/current/#code-span *) + if escaped || not (has_backticks ~count ~after:cstart p.cidx) then None else + let rec match_backticks toks line ~count spans k = match toks with + | [] -> None + | Backticks { start; count = c; _ } :: toks -> + if c <> count then match_backticks toks line ~count spans k else + let span = line.first, { line with first = k; last = start - 1} in + let spans = span :: spans in + let first = cstart and last = start + count - 1 in + let first_line = start_line and last_line = line in + let t = + code_span_token p ~count ~first ~last ~first_line ~last_line spans + in + Some (toks, line, t) + | Newline { newline } :: toks -> + let spans = (line.first, { line with first = k }) :: spans in + let k = first_non_blank_in_span p newline in + match_backticks toks newline ~count spans k + | _ :: toks -> match_backticks toks line ~count spans k + in + let first = cstart + count in + match_backticks toks { start_line with first } ~count [] first + + let try_math_span p toks start_line ~start:cstart ~count = + if not (has_math_span_closer ~count ~after:cstart p.cidx) then None else + let rec match_math_marks toks line ~count spans k = match toks with + | [] -> None + | Math_span_marks { start; count = c; may_close; _ } :: toks -> + if c <> count || not may_close + then match_math_marks toks line ~count spans k else + let span = line.first, { line with first = k; last = start - 1 } in + let spans = span :: spans in + let first = cstart and last = start + count - 1 in + let first_line = start_line and last_line = line in + let t = + ext_math_span_token p ~count ~first ~last ~first_line ~last_line + spans + in + Some (toks, line, t) + | Newline { newline } :: toks -> + let spans = (line.first, { line with first = k }) :: spans in + let k = first_non_blank_in_span p newline in + match_math_marks toks newline ~count spans k + | _ :: toks -> match_math_marks toks line ~count spans k + in + let first = cstart + count in + match_math_marks toks { start_line with first } ~count [] first + + let try_autolink_or_html p toks line ~start = + match Match.autolink_uri p.i ~last:line.last ~start with + | Some last -> + let t = autolink_token p line ~first:start ~last ~is_email:false in + let toks = drop_until ~start:(last + 1) toks in + Some (toks, line, t) + | None -> + match Match.autolink_email p.i ~last:line.last ~start with + | Some last -> + let t = autolink_token p line ~first:start ~last ~is_email:true in + let toks = drop_until ~start:(last + 1) toks in + Some (toks, line, t) + | None -> + match Match.raw_html ~next_line p.i toks ~line ~start with + | None -> None + | Some (toks, last_line, spans, last) -> + let first = start and first_line = line in + let t = raw_html_token p ~first ~last ~first_line ~last_line spans in + let toks = drop_until ~start:(last + 1) toks in + Some (toks, last_line, t) + + let label_of_rev_spans p ~key rev_spans = + let meta = + if p.nolocs || rev_spans = [] then Meta.none else + let first = snd (List.hd (List.rev rev_spans)) in + let last = snd (List.hd rev_spans) in + meta_of_spans p ~first ~last + in + let text = tight_block_lines p ~rev_spans in + { Label.meta; key; text } + + let try_full_reflink_remainder p toks line ~image ~start (* is label's [ *) = + (* https://spec.commonmark.org/current/#full-reference-link *) + match Match.link_label p.buf ~next_line p.i toks ~line ~start with + | None -> None + | Some (toks, line, rev_spans, last, key) -> + let ref = label_of_rev_spans p ~key rev_spans in + let toks = drop_stop_after_right_brack toks in + match find_def_for_ref p ~image ref with + | None -> Some None + | Some def -> Some (Some (toks, line, `Ref (`Full, ref, def), last)) + + let try_shortcut_reflink p toks line ~image ~start (* is starting [ or ! *) = + (* https://spec.commonmark.org/current/#shortcut-reference-link *) + let start = if image then start + 1 (* [ *) else start in + match Match.link_label p.buf ~next_line p.i toks ~line ~start with + | None -> None + | Some (toks, line, rev_spans, last, key) -> + let ref = label_of_rev_spans p ~key rev_spans in + let toks = drop_stop_after_right_brack toks in + match find_def_for_ref p ~image ref with + | None -> None + | Some def -> Some (toks, line, `Ref (`Shortcut, ref, def), last) + + let try_collapsed_reflink p toks line ~image ~start (* is starting [ or ! *) = + (* https://spec.commonmark.org/current/#collapsed-reference-link *) + let start = if image then start + 1 (* [ *) else start in + match Match.link_label p.buf ~next_line p.i toks ~line ~start with + | None -> None + | Some (toks, line, rev_spans, last, key) -> + let ref = label_of_rev_spans p ~key rev_spans in + let last = last + 2 in (* adjust for ][] *) + let toks = drop_stop_after_right_brack toks in + let toks = drop_stop_after_right_brack toks in + match find_def_for_ref p ~image ref with + | None -> None + | Some def -> Some (toks, line, `Ref (`Collapsed, ref, def), last) + + let try_inline_link_remainder p toks start_line ~image ~start:st (* is ( *) = + (* https://spec.commonmark.org/current/#inline-link *) + if not (has_right_paren ~after:st p.cidx) then None else + let first_non_blank_over_nl = first_non_blank_over_nl ~next_line in + match first_non_blank_over_nl p toks start_line ~start:(st + 1) with + | None -> None + | Some (toks, line, before_dest, start) -> + let toks, line, angled_dest, dest, start = + match Match.link_destination p.i ~last:line.last ~start with + | None -> toks, line, false, None, start + | Some (angled, first, last) -> + let dest = clean_unesc_unref_span p { line with first; last } in + let next = if angled then last + 2 else last + 1 in + toks, line, angled, Some dest, next + in + let toks, line, after_dest, title_open_delim, title, start = + match first_non_blank_over_nl p toks line ~start with + | None -> + toks, line, [], '\"', None, start + | Some (_, _, _, start') when start' = start -> + toks, line, [], '\"', None, start + | Some (toks, line, after_destination, start) -> + match Match.link_title ~next_line p.i toks ~line ~start with + | None -> toks, line, after_destination, '\"', None, start + | Some (toks, line, rev_spans, last) -> + let title = tight_block_lines p ~rev_spans in + toks, line, after_destination, p.i.[start], + Some title, last + 1 + in + let toks, line, after_title, last = + match first_non_blank_over_nl p toks line ~start with + | None -> toks, line, [], start + | Some (toks, line, after_title, start as v) -> v + in + if last > line.last || p.i.[last] <> ')' then None else + let layout = + { Link_definition.indent = 0; angled_dest; before_dest; + after_dest; title_open_delim; after_title; } + in + let label = None and defined_label = None in + let ld = { Link_definition.layout; label; defined_label; dest; title }in + let textloc = + let first = st and last = start in + textloc_of_lines p ~first ~last ~first_line:start_line ~last_line:line + in + let ld = (ld, meta p textloc) in + let toks = drop_until ~start:(last + 1) toks in + Some (toks, line, `Inline ld, last) + + let find_link_text_tokens p toks start_line ~start = + (* XXX The repetition with first_pass is annoying here. + we should figure out something for that not to happen. *) + (* https://spec.commonmark.org/current/#link-text *) + let rec loop toks line nest acc = match toks with + | Right_brack { start = last } :: toks when nest = 0 -> + let acc = rev_tokens_and_shorten_last_line ~to_last:(last - 1) [] acc in + Some (toks, line, acc, last) + | Backticks { start; count; escaped } :: toks -> + begin match try_code p toks line ~start ~count ~escaped with + | None -> loop toks line nest acc + | Some (toks, line, t) -> loop toks line nest (t :: acc) + end + | Math_span_marks { start; count; may_open; } :: toks -> + if not may_open then loop toks line nest acc else + begin match try_math_span p toks line ~start ~count with + | None -> loop toks line nest acc + | Some (toks, line, t) -> loop toks line nest (t :: acc) + end + | Autolink_or_html_start { start } :: toks -> + begin match try_autolink_or_html p toks line ~start with + | None -> loop toks line nest acc + | Some (toks, line, t) -> loop toks line nest (t :: acc) + end + | Right_brack _ as t :: toks -> loop toks line (nest - 1) (t :: acc) + | Link_start _ as t :: toks -> loop toks line (nest + 1) (t :: acc) + | Newline { newline; _ } as t :: toks -> loop toks newline nest (t :: acc) + | Inline { endline; _ } as t :: toks -> loop toks endline nest (t :: acc) + | t :: toks -> loop toks line nest (t :: acc) + | [] -> None + in + loop toks start_line 0 [] + + let try_link_def + p ~start ~start_toks ~start_line ~toks ~line ~text_last ~image text + = + let next = text_last + 1 in + let link = + if next > line.last + then try_shortcut_reflink p start_toks start_line ~image ~start else + match p.i.[next] with + | '(' -> + (match try_inline_link_remainder p toks line ~image ~start:next with + | None -> try_shortcut_reflink p start_toks start_line ~image ~start + | Some _ as v -> v) + | '[' -> + let next' = next + 1 in + if next' <= line.last && p.i.[next'] = ']' + then try_collapsed_reflink p start_toks start_line ~image ~start else + let r = try_full_reflink_remainder p toks line ~image ~start:next in + begin match r with + | None -> try_shortcut_reflink p start_toks start_line ~image ~start + | Some None -> None (* Example 570 *) + | Some (Some _ as v) -> v + end + | c -> + try_shortcut_reflink p start_toks start_line ~image ~start + in + match link with + | None -> None + | Some (toks, endline, reference, last) -> + let first = start in + let text = + let first_line = start_line and last_line = line in + inlines_inline p text ~first ~last:text_last ~first_line ~last_line + in + let link = { Inline.Link.text; reference } in + let first_line = start_line and last_line = endline in + let t = link_token p ~image ~first ~last ~first_line ~last_line link in + let had_link = not image && not p.nested_links in + Some (toks, endline, t, had_link) + + (* The following sequence of mutually recursive functions define + inline parsing. We have three passes over a paragraph's token + list see the [parse_tokens] function below. *) + + let rec try_link p start_toks start_line ~image ~start = + if not (has_right_brack ~after:start p.cidx) then None else + match find_link_text_tokens p start_toks start_line ~start with + | None -> None + | Some (toks, line, text_toks, text_last (* with ] delim *)) -> + let text, had_link = + let text_start = + let first = start + (if image then 2 else 1) in + let last = + if start_line == line then text_last - 1 else start_line.last + in + { start_line with first; last } + in + parse_tokens p text_toks text_start + in + if had_link && not image + then None (* Could try to keep render *) else + try_link_def + p ~start ~start_toks ~start_line ~toks ~line ~text_last ~image text + + and first_pass p toks line = + (* Parse inline atoms and links. Links are parsed here otherwise + link reference data gets parsed as atoms. *) + let rec loop p toks line ~had_link acc = match toks with + | [] -> List.rev acc, had_link + | Backticks { start; count; escaped } :: toks -> + begin match try_code p toks line ~start ~count ~escaped with + | None -> loop p toks line ~had_link acc + | Some (toks, line, t) -> loop p toks line ~had_link (t :: acc) + end + | Math_span_marks { start; count; may_open; } :: toks -> + if not may_open then loop p toks line ~had_link acc else + begin match try_math_span p toks line ~start ~count with + | None -> loop p toks line ~had_link acc + | Some (toks, line, t) -> loop p toks line ~had_link (t :: acc) + end + | Autolink_or_html_start { start } :: toks -> + begin match try_autolink_or_html p toks line ~start with + | None -> loop p toks line ~had_link acc + | Some (toks, line, t) -> loop p toks line ~had_link (t :: acc) + end + | Link_start { start; image } :: toks -> + begin match try_link p toks line ~image ~start with + | None -> loop p toks line ~had_link acc + | Some (toks, line, t, had_link) -> + loop p toks line ~had_link (t :: acc) + end + | Right_brack start :: toks -> loop p toks line ~had_link acc + | Newline { newline = l } as t :: toks -> loop p toks l ~had_link (t :: acc) + | t :: toks -> loop p toks line ~had_link (t :: acc) + in + loop p toks line ~had_link:false [] + + (* Second pass *) + + and find_emphasis_text p toks line ~opener = + let marks_match ~marks ~opener = + (opener.char = marks.char) && + (not (marks.may_open || opener.may_close) || + marks.count mod 3 = 0 || (opener.count + marks.count) mod 3 != 0) + in + let marks_has_precedence p ~marks ~opener = + if marks.char = opener.char (* Rule 16 *) then true else (* Rule 15 *) + emphasis_closer_pos ~char:marks.char ~after:marks.start p.cidx < + emphasis_closer_pos ~char:opener.char ~after:marks.start p.cidx + in + let rec loop p toks line acc ~opener = match toks with + | [] -> Either.Left (List.rev acc) (* No match but keep nested work done *) + | Emphasis_marks marks as t :: toks -> + let after = marks.start in + if marks.may_close && marks_match ~marks ~opener then + let used = if marks.count >= 2 && opener.count >= 2 then 2 else 1 in + let to_last = marks.start - 1 in + let acc = rev_tokens_and_shorten_last_line ~to_last [] acc in + Either.Right (toks, line, used, acc, marks) + else if marks.may_open && marks_has_precedence p ~marks ~opener then + match try_emphasis p toks line ~opener:marks with + | Either.Left toks -> loop p toks line acc ~opener + | Either.Right (toks, line) -> loop p toks line acc ~opener + else if has_emphasis_closer ~char:opener.char ~after p.cidx then + loop p toks line (t :: acc) ~opener + else (Either.Left (List.rev_append (t :: acc) toks)) + | Newline { newline = l } as t :: toks -> loop p toks l (t :: acc) ~opener + | Inline { endline = l } as t :: toks -> loop p toks l (t :: acc) ~opener + | t :: toks -> loop p toks line (t :: acc) ~opener + in + loop p toks line [] ~opener + + and try_emphasis p start_toks start_line ~opener = + let start = opener.start in + if not (has_emphasis_closer ~char:opener.char ~after:start p.cidx) + then Either.Left start_toks else + match find_emphasis_text p start_toks start_line ~opener with + | Either.Left _ as r -> r + | Either.Right (toks, line, used, emph_toks, closer) -> + let text_first = start + opener.count in + let text_last = closer.start - 1 (* XXX prev line ? *) in + let first = text_first - used in + let last = closer.start + used - 1 in + let first_line = start_line and last_line = line in + let emph = + let text_start = + let last = + if start_line == line then text_last else start_line.last + in + { start_line with first = text_first; last } + in + (* No need to redo first pass *) + let emph_toks = second_pass p emph_toks text_start in + let text = last_pass p emph_toks text_start in + inlines_inline p text ~first ~last:text_last ~first_line ~last_line + in + let toks = + let count = closer.count - used in + if count = 0 then toks else + Emphasis_marks { closer with start = last + 1; count } :: toks + in + let toks = + let strong = used = 2 in + emphasis_token p ~first ~last ~first_line ~last_line ~strong emph :: + toks + in + let toks = + let count = opener.count - used in + if count = 0 then toks else + Emphasis_marks { opener with count } :: toks + in + Either.Right (toks, line) + + and find_strikethrough_text p toks start_line = + let rec loop p toks line acc = match toks with + | [] -> Either.Left (List.rev acc) (* No match but keep nested work done *) + | Strikethrough_marks marks :: toks -> + if marks.may_close then + let to_last = marks.start - 1 in + let acc = rev_tokens_and_shorten_last_line ~to_last [] acc in + Either.Right (toks, line, acc, marks) + else if marks.may_open then + match try_strikethrough p toks line ~opener:marks with + | Either.Left toks -> loop p toks line acc + | Either.Right (toks, line) -> loop p toks line acc + else assert false + | Newline { newline = l } as t :: toks -> loop p toks l (t :: acc) + | Inline { endline = l } as t :: toks -> loop p toks l (t :: acc) + | t :: toks -> loop p toks line (t :: acc) + in + loop p toks start_line [] + + and try_strikethrough p start_toks start_line ~opener = + let start = opener.start in + if not (has_strikethrough_closer ~after:start p.cidx) + then Either.Left start_toks else + match find_strikethrough_text p start_toks start_line with + | Either.Left _ as r -> r + | Either.Right (toks, line, stroken_toks, closer) -> + let first_line = start_line and last_line = line in + let text = + let first = start + 2 in + let last = closer.start - 1 in + let text_start = + let last = + if start_line == line then last else start_line.last + in + { start_line with first; last } + in + (* No need to redo first pass *) + let emph_toks = second_pass p stroken_toks text_start in + let text = last_pass p emph_toks text_start in + inlines_inline p text ~first ~last ~first_line ~last_line + in + let toks = + let first = opener.start and last = closer.start + 1 in + ext_strikethrough_token p ~first ~last ~first_line ~last_line text + :: toks + in + Either.Right (toks, line) + + and second_pass p toks line = + let rec loop p toks line acc = match toks with + | [] -> List.rev acc + | Emphasis_marks ({ may_open } as opener) :: toks -> + if not may_open then loop p toks line acc else + begin match try_emphasis p toks line ~opener with + | Either.Left toks -> loop p toks line acc + | Either.Right (toks, line) -> loop p toks line acc + end + | Strikethrough_marks ({ may_open } as opener) :: toks -> + if not may_open then loop p toks line acc else + begin match try_strikethrough p toks line ~opener with + | Either.Left toks -> loop p toks line acc + | Either.Right (toks, line) -> loop p toks line acc + end + | Newline { newline } as t :: toks -> loop p toks newline (t :: acc) + | Inline { endline } as t :: toks -> loop p toks endline (t :: acc) + | t :: toks -> loop p toks line (t :: acc) + in + loop p toks line [] + + (* Last pass *) + + and last_pass p toks line = + (* Only [Inline] and [Newline] tokens remain. We fold over them to + convert them to [inline] values and [Break]s. [Text] inlines + are created for data between them. *) + let rec loop toks line acc k = match toks with + | [] -> + List.rev (try_add_text_inline p line ~first:k ~last:line.last acc) + | Newline { start; break_type; newline } :: toks -> + let acc = try_add_text_inline p line ~first:k ~last:(start - 1) acc in + let break = break_inline p line ~start ~break_type ~newline in + loop toks newline (break :: acc) newline.first + | Inline { start; inline; endline; next } :: toks -> + let acc = try_add_text_inline p line ~first:k ~last:(start - 1) acc in + let acc = match inline with + | Inline.Inlines (is, _meta_stub) -> List.rev_append (List.rev is) acc + | i -> i :: acc + in + loop toks endline acc next + | (Backticks _ | Autolink_or_html_start _ | Link_start _ | Right_brack _ + | Emphasis_marks _ | Right_paren _ | Strikethrough_marks _ + | Math_span_marks _) :: _ -> + assert false + in + loop toks line [] line.first + + and parse_tokens p toks first_line = + let toks, had_link = first_pass p toks first_line in + let toks = second_pass p toks first_line in + last_pass p toks first_line, had_link + + let strip_paragraph p lines = + (* Remove initial and final blanks. Initial blank removal on + other paragraph lines is done during the inline parsing + and integrated into the AST for layout preservation. *) + let last, trailing_blanks = + let line = List.hd lines in + let first = line.first and start = line.last in + let non_blank = Match.last_non_blank p.i ~first ~start in + { line with last = non_blank}, + layout_clean_raw_span' p { line with first = non_blank + 1; } + in + let lines = List.rev (last :: List.tl lines) in + let first, leading_indent = + let line = List.hd lines in + let non_blank = first_non_blank_in_span p line in + { line with first = non_blank }, + non_blank - line.first + in + let lines = first :: List.tl lines in + let meta = meta_of_spans p ~first ~last in + (leading_indent, trailing_blanks), meta, lines + + let parse p lines = + let layout, meta, lines = strip_paragraph p lines in + let cidx, toks, first_line = tokenize ~exts:p.exts p.i lines in + p.cidx <- cidx; + let is, _had_link = parse_tokens p toks first_line in + let inline = match is with [i] -> i | is -> Inline.Inlines (is, meta) in + layout, inline + + (* Parsing table rows *) + + let get_blanks p line ~before k = + let nb = Match.first_non_blank p.i ~last:(before - 1) ~start:k in + layout_clean_raw_span' p { line with first = k; last = nb - 1 }, nb + + let make_col p = function + | [] -> assert false + | [i] -> i + | is -> + let last = Inline.meta (List.hd is) in + let is = List.rev is in + let first = Inline.meta (List.hd is) in + let meta = meta_of_metas p ~first ~last in + Inline.Inlines (is, meta) + + let find_pipe p line ~before k = + let text p ~first ~last = + Inline.Text (clean_unesc_unref_span p { line with first; last }) + in + let n = Match.first_non_escaped_char '|' p.i ~last:(before - 1) ~start:k in + if n = before then `Not_found (text p ~first:k ~last:(n - 1)) else + let nb = Match.last_non_blank p.i ~first:k ~start:(n - 1) in + let after = + layout_clean_raw_span' p { line with first = nb + 1; last = n - 1 } + in + let text = if nb < k then None else Some (text p ~first:k ~last:nb) in + `Found (text, after, n + 1) + + let start_col p line ~before k = + let bbefore, k = get_blanks p line ~before k in + if k >= before then `Start (bbefore, []) else + match find_pipe p line ~before k with + | `Not_found text -> `Start (bbefore, [text]) + | `Found (text, bafter, k) -> + let text = match text with + | Some text -> text + | None -> + let l = textloc_of_span p { line with first = k; last = k - 1 }in + (Inline.Inlines ([], meta p l)) + in + `Col ((text, (bbefore, bafter)), k) + + let rec finish_col p line blanks_before is toks k = match toks with + | [] -> + begin match find_pipe p line ~before:(line.last + 1) k with + | `Found (text, after, k) -> + let is = match text with Some t -> t :: is | None -> is in + (make_col p is, (blanks_before, after)), [], k + | `Not_found _ -> assert false + end + | Inline { start; inline; next } :: toks when k >= start -> + finish_col p line blanks_before (inline :: is) toks next + | Inline { start; inline; next } :: toks as toks' -> + begin match find_pipe p line ~before:start k with + | `Not_found text -> + let is = inline :: text :: is in + finish_col p line blanks_before is toks next + | `Found (text, after, k) -> + let is = match text with Some t -> t :: is | None -> is in + (make_col p is, (blanks_before, after)), toks', k + end + | (Backticks _ | Autolink_or_html_start _ | Link_start _ | Right_brack _ + | Emphasis_marks _ | Right_paren _ | Strikethrough_marks _ + | Math_span_marks _ | Newline _ ) :: _ -> + assert false + + let rec parse_cols p line acc toks k = match toks with + | [] -> + if k > line.last then (List.rev acc) else + begin match start_col p line ~before:(line.last + 1) k with + | `Col (col, k) -> parse_cols p line (col :: acc) [] k + | `Start _ -> assert false + end + | Inline { start; inline; next } :: toks as toks' -> + begin match start_col p line ~before:start k with + | `Col (col, k) -> parse_cols p line (col :: acc) toks' k + | `Start (before, is) -> + let is = inline :: is in + let col, toks, k = finish_col p line before is toks next in + parse_cols p line (col :: acc) toks k + end + | (Backticks _ | Autolink_or_html_start _ | Link_start _ | Right_brack _ + | Emphasis_marks _ | Right_paren _ | Strikethrough_marks _ + | Math_span_marks _ | Newline _ ) :: _ -> + assert false + + let parse_table_row p line = + let cidx, toks, first_line = tokenize ~exts:p.exts p.i [line] in + p.cidx <- cidx; + let toks, _had_link = first_pass p toks first_line in + let toks = second_pass p toks first_line in + (* We now have modified last pass, inner inlines will have gone through + the regular [last_pass] which is fine since we are only interested + in creating the toplevel text nodes further splited on (unescaped) + [\]. *) + parse_cols p line [] toks line.first +end + +(* Block structure parsing. *) + +module Block_struct = struct + + (* Moving on the line in the indentation space (columns) and over container + markers. *) + + let[@inline] current_col p = p.current_char_col + p.tab_consumed_cols + let[@inline] current_indent p = p.next_non_blank_col - current_col p + let[@inline] end_of_line p = p.current_char > p.current_line_last_char + let[@inline] only_blanks p = p.next_non_blank > p.current_line_last_char + let[@inline] has_next_non_blank p = + p.next_non_blank <= p.current_line_last_char + + let update_next_non_blank p = + let rec loop p s last k col = + if k > last then (p.next_non_blank <- k; p.next_non_blank_col <- col) else + match s.[k] with + | ' ' -> loop p s last (k + 1) (col + 1) + | '\t' -> loop p s last (k + 1) (next_tab_stop col) + | _ -> p.next_non_blank <- k; p.next_non_blank_col <- col; + in + loop p p.i p.current_line_last_char p.current_char p.current_char_col + + let accept_cols ~count p = + let rec loop p count k col = + if count = 0 then (p.current_char <- k; p.current_char_col <- col) else + if p.i.[k] <> '\t' then loop p (count - 1) (k + 1) (col + 1) else + let col' = next_tab_stop col in + let tab_cols = col' - (col + p.tab_consumed_cols) in + if tab_cols > count + then (p.tab_consumed_cols <- count; loop p 0 k col) + else (p.tab_consumed_cols <- 0; loop p (count - tab_cols) (k + 1) col') + in + loop p count p.current_char p.current_char_col; + update_next_non_blank p + + let match_and_accept_block_quote p = + (* https://spec.commonmark.org/current/#block-quote-marker *) + if end_of_line p || p.i.[p.current_char] <> '>' then false else + let next_is_blank = + let next = p.current_char + 1 in + next <= p.current_line_last_char && Ascii.is_blank p.i.[next] + in + let count = if next_is_blank then (* we eat a space *) 2 else 1 in + accept_cols ~count p; true + + let accept_list_marker_and_indent p ~marker_size ~last = + (* Returns min indent after marker for list item *) + accept_cols ~count:marker_size p; + let indent = current_indent p in + let min_indent = + if only_blanks p || indent > 4 (* indented code *) + then 1 + else min indent 4 + in + accept_cols ~count:min_indent p; + min_indent + + let accept_code_indent p ~count = + (* Returns padding for partially consumed tab and content first char *) + accept_cols p ~count; + if p.tab_consumed_cols = 0 then 0, p.current_char else + let col' = next_tab_stop p.current_char_col in + let pad = col' - (p.current_char_col + p.tab_consumed_cols) in + pad, p.current_char (* is '\t' *) + 1 + + (* These data types are only used during parsing, to find out the + block structure. All the lists (blocks, lines) are in reverse + order. We don't extract data from the input here. We just store + line spans. See: + https://spec.commonmark.org/current/#phase-1-block-structure *) + + type space_pad = int (* number of space characters to pad content with. *) + type indented_code_line = + { pad : space_pad; + code : line_span; + is_blank : bool } + + type fence = + { indent : Layout.indent; + opening_fence : line_span; + fence : Char.t * int (* fence length *); + info_string : line_span option (* we drop the trailing blanks *); + closing_fence : line_span option; } + + type fenced_code_block = + { fence : fence; + code : (space_pad * line_span) list } + + type code_block = + [ `Indented of indented_code_line list | `Fenced of fenced_code_block ] + + type atx = + { indent : Layout.indent; + level : Match.heading_level; + after_open : byte_pos; + heading : line_span; + layout_after : line_span } + + type setext = + { level : Match.heading_level; + heading_lines : line_span list; + underline : (* Indent, underline char count, blanks *) + Layout.indent * line_span * line_span; } + + type heading = [ `Atx of atx | `Setext of setext ] + + type html_block = + { end_cond : Match.html_block_end_cond option; + html : line_span list } + + type paragraph = { maybe_ref : bool; lines : line_span list } + + type t = + | Block_quote of Layout.indent * t list + | Blank_line of space_pad * line_span + | Code_block of code_block + | Heading of heading + | Html_block of html_block + | List of list' + | Linkref_def of Link_definition.t node + | Paragraph of paragraph + | Thematic_break of Layout.indent * line_span (* including trailing blanks *) + | Ext_table of Layout.indent * (line_span * line_span (* trail blanks *)) list + | Ext_footnote of Layout.indent * (Label.t * Label.t option) * t list + + and list_item = + { before_marker : Layout.indent; + marker : line_span; + after_marker : Layout.indent; + ext_task_marker : (Uchar.t * line_span) option; + blocks : t list } + + and list' = + { last_blank : bool; (* last added line was blank and not first line + of item *) + loose : bool; (* inter-item looseness, intra-item is computed later *) + item_min_indent : int; (* last item minimal indent *) + list_type : Block.List'.type'; + items : list_item list; } + + let block_is_blank_line = function Blank_line _ -> true | _ -> false + + (* Making blocks from the current line status *) + + let blank_line p = + let first = p.current_char and last = p.current_line_last_char in + Blank_line (0, current_line_span p ~first ~last) + + let thematic_break p ~indent ~last:_ = + let last = p.current_line_last_char (* let's keep everything *) in + let break = current_line_span p ~first:p.current_char ~last in + Thematic_break (indent, break) + + let atx_heading p ~indent ~level ~after_open ~first_content ~last_content = + let heading = current_line_span p ~first:first_content ~last:last_content in + let layout_after = + let first = last_content + 1 and last = p.current_line_last_char in + current_line_span p ~first ~last + in + Heading (`Atx { indent; level; after_open; heading; layout_after }) + + let setext_heading p ~indent ~level ~last_underline heading_lines = + let u = current_line_span p ~first:p.current_char ~last:last_underline in + let blanks = + let first = last_underline + 1 and last = p.current_line_last_char in + current_line_span p ~first ~last + in + let underline = indent, u, blanks in + Heading (`Setext {level; heading_lines; underline}) + + let indented_code_block p = (* Has a side-effect on [p] *) + let pad, first = accept_code_indent p ~count:4 in + let code = current_line_span p ~first ~last:p.current_line_last_char in + Code_block (`Indented [{pad; code; is_blank = false}]) + + let fenced_code_block p ~indent ~fence_first ~fence_last ~info = + let info_string, layout_last = match info with + | None -> None, p.current_line_last_char + | Some (first, last) -> Some (current_line_span p ~first ~last), first - 1 + in + let opening_fence = + current_line_span p ~first:fence_first ~last:layout_last + in + let fence = p.i.[fence_first], (fence_last - fence_first + 1) in + let closing_fence = None in + let fence = { indent; opening_fence; fence; info_string; closing_fence } in + Code_block (`Fenced {fence; code = []}) + + let html_block p ~end_cond ~indent_start = + let first = indent_start and last = p.current_line_last_char in + let end_cond = (* Check if the same line matches the end condition. *) + if Match.html_block_end p.i ~end_cond ~last ~start:p.current_char + then None (* We are already closed *) else Some end_cond + in + Html_block { end_cond; html = [current_line_span p ~first ~last] } + + let paragraph p ~start = + let last = p.current_line_last_char in + let maybe_ref = Match.could_be_link_reference_definition p.i ~last ~start in + Paragraph { maybe_ref; lines = [current_line_span p ~first:start ~last]} + + let add_paragraph_line p ~indent_start par bs = + let first = indent_start and last = p.current_line_last_char in + let lines = current_line_span p ~first ~last :: par.lines in + Paragraph { par with lines } :: bs + + let table_row p ~first ~last = + current_line_span p ~first ~last, + current_line_span p ~first:(last + 1) ~last:p.current_line_last_char + + let table p ~indent ~last = + let row = table_row p ~first:p.current_char ~last in + Ext_table (indent, [row]) + + (* Link reference definition parsing + + This is invoked when we close a paragraph and works on the paragraph + lines. *) + + let parse_link_reference_definition p lines = + (* Has no side effect on [p], parsing occurs on [lines] spans. *) + (* https://spec.commonmark.org/current/#link-reference-definitions *) + let none () = raise_notrace Exit in + let next_line = function line :: lines -> Some (lines, line) | [] -> None in + try + let lines, line = match next_line lines with + | None -> none () | Some v -> v + in + let start = first_non_blank_in_span p line in + let indent = start - line.first in + let meta_first = { line with first = start } in + let lines, line, label, start = + match Match.link_label p.buf ~next_line p.i lines ~line ~start with + | None -> none () + | Some (lines, line, rev_spans, last, key) -> + let colon = last + 1 in + if colon > line.last || p.i.[colon] <> ':' then none () else + let label = Inline_struct.label_of_rev_spans p ~key rev_spans in + lines, line, label, colon + 1 + in + let lines, line, before_dest, start = + match first_non_blank_over_nl ~next_line p lines line ~start with + | None -> none () | Some v -> v + in + let angled_dest, dest, start, meta_last = + match Match.link_destination p.i ~last:line.last ~start with + | None -> none () + | Some (angled, first, last) -> + let dest = clean_unesc_unref_span p { line with first; last } in + let next = if angled then last + 2 else last + 1 in + angled, Some dest, next, { line with last = last } + in + let lines, after_dest, title_open_delim, title, after_title, meta_last = + match first_non_blank_over_nl ~next_line p lines line ~start with + | None -> lines, [], '\"', None, [], meta_last + | Some (_, _, _, st) when st = start (* need some space *) -> none () + | Some (lines', line', after_dest, start') -> + let no_newline = line'.line_pos = line.line_pos in + let title = + Match.link_title ~next_line p.i lines' ~line:line' ~start:start' + in + match title with + | None -> + if no_newline then none () (* garbage after dest *) else + lines, [], '\"', None, [], meta_last + | Some (lines', line', rev_spans, last) -> + let after_title = + let last = line'.last and start = last + 1 in + let nb = Match.first_non_blank p.i ~last ~start in + if nb <= line'.last + then None + else + Some [layout_clean_raw_span p { line' with first = start; }] + in + match after_title with + | None when no_newline -> none () + | None -> (lines, [], '\"', None, [], meta_last) + | Some after_title -> + let t = tight_block_lines p ~rev_spans in + lines', after_dest, p.i.[start'], Some t, + after_title, + { line' with last } + in + let meta = meta_of_spans p ~first:meta_first ~last:meta_last in + let layout = + { Link_definition.indent; angled_dest; before_dest; + after_dest; title_open_delim; after_title } + in + let defined_label = def_label p label in + let label = Some label in + let ld = + { Link_definition.layout; label; defined_label; dest; title }, meta + in + begin match defined_label with + | None -> () | Some def -> set_label_def p def (Link_definition.Def ld) + end; + Some (ld, lines) + with + | Exit -> None + + let maybe_add_link_reference_definitions p lines prevs = + let rec loop p prevs = function + | [] -> prevs + | ls -> + match parse_link_reference_definition p ls with + | None -> + (* Link defs can't interrupt a paragraph so we are good now. *) + Paragraph { maybe_ref = false; lines = List.rev ls } :: prevs + | Some (ld, ls) -> loop p (Linkref_def ld :: prevs) ls + in + loop p prevs (List.rev lines) + + (* Closing blocks and finishing the document. *) + + let close_indented_code_block p lines bs = + (* Removes trailing blank lines and add them as blank lines *) + let rec loop blanks lines bs = match lines with + | { pad; code; is_blank = true} :: lines -> + loop (Blank_line (pad, code) :: blanks) lines bs + | [] -> (* likely assert (false) *) List.rev_append blanks bs + | ls -> List.rev_append blanks ((Code_block (`Indented ls)) :: bs) + in + loop [] lines bs + + let close_paragraph p par bs = + if not par.maybe_ref then Paragraph par :: bs else + maybe_add_link_reference_definitions p par.lines bs + + let rec close_last_block p = function + | Code_block (`Indented ls) :: bs -> close_indented_code_block p ls bs + | Paragraph par :: bs -> close_paragraph p par bs + | List l :: bs -> close_list p l bs + | Ext_footnote (i, l, blocks) :: bs -> close_footnote p i l blocks bs + | bs -> bs + + and close_list p l bs = + let i = List.hd l.items in + let blocks = close_last_block p i.blocks in + (* The final blank line extraction of the list item entails less blank + line churn for CommonMark rendering but we don't do it on empty list + items. *) + match blocks with + | Blank_line _ as bl :: (_ :: _ as blocks) -> + let items = { i with blocks } :: List.tl l.items in + bl :: List { l with items } :: bs + | blocks -> + let items = { i with blocks } :: List.tl l.items in + List { l with items } :: bs + + and close_footnote p indent label blocks bs = + let blocks = close_last_block p blocks in + (* Like for lists above we do blank line extraction (except if blocks + is only a blank line) *) + let blanks, blocks = + let rec loop acc = function + | Blank_line _ as bl :: (_ :: _ as blocks) -> loop (bl :: acc) blocks + | blocks -> acc, blocks + in + loop [] blocks + in + List.rev_append blanks (Ext_footnote (indent, label, blocks) :: bs) + + let close_last_list_item p l = + let item = List.hd l.items in + let item = { item with blocks = close_last_block p item.blocks } in + { l with items = item :: List.tl l.items } + + let end_doc_close_fenced_code_block p fenced bs = match fenced.code with + | (_, l) :: code when l.first > l.last (* empty line *) -> + Blank_line (0, l) :: Code_block (`Fenced { fenced with code }) :: bs + | _ -> Code_block (`Fenced fenced) :: bs + + let end_doc_close_html p h bs = match h.html with + | l :: html when l.first > l.last (* empty line *) -> + Blank_line (0, l) :: Html_block { end_cond = None; html } :: bs + | _ -> + Html_block { h with end_cond = None } :: bs + + let rec end_doc p = function + | Block_quote (indent, bq) :: bs -> Block_quote (indent, end_doc p bq) :: bs + | List list :: bs -> close_list p list bs + | Paragraph par :: bs -> close_paragraph p par bs + | Code_block (`Indented ls) :: bs -> close_indented_code_block p ls bs + | Code_block (`Fenced f) :: bs -> end_doc_close_fenced_code_block p f bs + | Html_block html :: bs -> end_doc_close_html p html bs + | Ext_footnote (i, l, blocks) :: bs -> close_footnote p i l blocks bs + | (Thematic_break _ | Heading _ | Blank_line _ | Linkref_def _ + | Ext_table _ ) :: _ | [] as bs -> bs + + (* Adding lines to blocks *) + + let match_line_type ~no_setext ~indent p = + (* Effects on [p]'s column advance *) + if only_blanks p then Match.Blank_line else + if indent >= 4 then Indented_code_block_line else begin + accept_cols ~count:indent p; + if end_of_line p then Match.Blank_line else + let start = p.current_char and last = p.current_line_last_char in + match p.i.[start] with + (* Early dispatch shaves a few ms but may not be worth doing vs + testing all the cases in sequences. *) + | '>' -> + if match_and_accept_block_quote p then Match.Block_quote_line else + Paragraph_line + | '=' when not no_setext -> + let r = Match.setext_heading_underline p.i ~last ~start in + if r <> Nomatch then r else + Paragraph_line + | '-' -> + let r = + if no_setext then Match.Nomatch else + Match.setext_heading_underline p.i ~last ~start + in + if r <> Nomatch then r else + let r = Match.thematic_break p.i ~last ~start in + if r <> Nomatch then r else + let r = Match.list_marker p.i ~last ~start in + if r <> Nomatch then r else + Paragraph_line + | '#' -> + let r = Match.atx_heading p.i ~last ~start in + if r <> Nomatch then r else + Paragraph_line + | '+' | '*' | '0' .. '9' -> + let r = Match.thematic_break p.i ~last ~start in + if r <> Nomatch then r else + let r = Match.list_marker p.i ~last ~start in + if r <> Nomatch then r else + Paragraph_line + | '_' -> + let r = Match.thematic_break p.i ~last ~start in + if r <> Nomatch then r else + Paragraph_line + | '~' | '`' -> + let r = Match.fenced_code_block_start p.i ~last ~start in + if r <> Nomatch then r else + Paragraph_line + | '<' -> + let r = Match.html_block_start p.i ~last ~start in + if r <> Nomatch then r else + Paragraph_line + | '|' when p.exts -> + let r = Match.ext_table_row p.i ~last ~start in + if r <> Nomatch then r else + Paragraph_line + | '[' when p.exts -> + let line_pos = p.current_line_pos in + let r = Match.ext_footnote_label p.buf p.i ~line_pos ~last ~start in + if r <> Nomatch then r else + Paragraph_line + | _ -> + Paragraph_line + end + + let list_marker_can_interrupt_paragraph p = function + | `Ordered (1, _), marker_last | `Unordered _, marker_last -> + let last = p.current_line_last_char and start = marker_last + 1 in + let non_blank = Match.first_non_blank p.i ~last ~start in + non_blank <= p.current_line_last_char (* line is not blank *) + | _ -> false + + let same_list_type t0 t1 = match t0, t1 with + | `Ordered (_, c0), `Ordered (_, c1) + | `Unordered c0, `Unordered c1 when Char.equal c0 c1 -> true + | _ -> false + + let rec add_open_blocks_with_line_class p ~indent_start ~indent bs = function + | Match.Blank_line -> blank_line p :: bs + | Indented_code_block_line -> indented_code_block p :: bs + | Block_quote_line -> Block_quote (indent, add_open_blocks p []) :: bs + | Thematic_break_line last -> thematic_break p ~indent ~last :: bs + | List_marker_line m -> list p ~indent m bs + | Atx_heading_line (level, after_open, first_content, last_content) -> + atx_heading p ~indent ~level ~after_open ~first_content ~last_content :: + bs + | Fenced_code_block_line (fence_first, fence_last, info) -> + fenced_code_block p ~indent ~fence_first ~fence_last ~info :: bs + | Html_block_line end_cond -> html_block p ~end_cond ~indent_start :: bs + | Paragraph_line -> paragraph p ~start:indent_start :: bs + | Ext_table_row last -> table p ~indent ~last :: bs + | Ext_footnote_label (rev_spans, last, key) -> + footnote p ~indent ~last rev_spans key :: bs + | Setext_underline_line _ | Nomatch -> + (* This function should be called with a line type that comes out + of match_line_type ~no_setext:true *) + assert false + + and add_open_blocks p bs = + let indent_start = p.current_char and indent = current_indent p in + let ltype = match_line_type ~no_setext:true ~indent p in + add_open_blocks_with_line_class p ~indent_start ~indent bs ltype + + and footnote p ~indent ~last rev_spans key = + let label = Inline_struct.label_of_rev_spans p ~key rev_spans in + let defined_label = match def_label p label with + | None -> None + | Some def as l -> set_label_def p def (Block.Footnote.stub label l); l + in + accept_cols p ~count:(last - p.current_char + 1); + Ext_footnote (indent, (label, defined_label), add_open_blocks p []) + + and list_item ~indent p (list_type, last) = + let before_marker = indent and marker_size = last - p.current_char + 1 in + let marker = current_line_span p ~first:p.current_char ~last in + let after_marker = accept_list_marker_and_indent p ~marker_size ~last in + let ext_task_marker, ext_task_marker_size = match p.exts with + | false -> None, 0 + | true -> + let start = p.current_char and last = p.current_line_last_char in + match Match.ext_task_marker p.i ~last ~start with + | None -> None, 0 + | Some (u, last) -> + accept_cols p ~count:(last - start + 1); + let last = match last = p.current_line_last_char with + | true -> (* empty line *) last + | false -> (* remove space for locs *) last - 1 + in + Some (u, current_line_span p ~first:start ~last), 4 + in + let min = indent + marker_size + after_marker + ext_task_marker_size in + min, { before_marker; marker; after_marker; ext_task_marker; + blocks = add_open_blocks p [] } + + and list ~indent p (list_type, _ as m) bs = + let item_min_indent, item = list_item ~indent p m in + List { last_blank = false; loose = false; + item_min_indent; list_type; items = [item] } :: bs + + let try_add_to_list ~indent p (lt, _ as m) l bs = + let item_min_indent, item = list_item ~indent p m in + if same_list_type lt l.list_type then + let l = close_last_list_item p l and last_blank = false in + let list_type = l.list_type in + List { last_blank; loose = l.last_blank; item_min_indent; list_type; + items = item :: l.items } :: bs + else + let bs = close_list p l bs and last_blank = false in + List { last_blank; loose = false; item_min_indent; list_type = lt; + items = [item] } :: bs + + let try_add_to_paragraph p par bs = + let indent_start = p.current_char and indent = current_indent p in + match match_line_type ~no_setext:false ~indent p with + (* These can't interrupt paragraphs *) + | Html_block_line `End_blank_7 + | Indented_code_block_line + | Ext_table_row _ | Ext_footnote_label _ + | Paragraph_line -> + add_paragraph_line p ~indent_start par bs + | List_marker_line m when not (list_marker_can_interrupt_paragraph p m) -> + add_paragraph_line p ~indent_start par bs + | Blank_line -> + blank_line p :: close_paragraph p par bs + | Block_quote_line -> + Block_quote (indent, add_open_blocks p []) :: (close_paragraph p par bs) + | Setext_underline_line (level, last_underline) -> + let bs = close_paragraph p par bs in + begin match bs with + | Paragraph { lines; _ } :: bs -> + setext_heading p ~indent ~level ~last_underline lines :: bs + | bs -> paragraph p ~start:indent_start :: bs + end + | Thematic_break_line last -> + thematic_break p ~indent ~last :: (close_paragraph p par bs) + | List_marker_line m -> + list p ~indent m (close_paragraph p par bs) + | Atx_heading_line (level, after_open, first_content, last_content) -> + let bs = close_paragraph p par bs in + atx_heading p ~indent ~level ~after_open ~first_content ~last_content :: + bs + | Fenced_code_block_line (fence_first, fence_last, info) -> + let bs = close_paragraph p par bs in + fenced_code_block p ~indent ~fence_first ~fence_last ~info :: bs + | Html_block_line end_cond -> + html_block p ~end_cond ~indent_start :: (close_paragraph p par bs) + | Nomatch -> assert false + + let try_add_to_indented_code_block p ls bs = + if current_indent p < 4 then + if has_next_non_blank p + then add_open_blocks p (close_indented_code_block p ls bs) else + (* Blank but white is not data, make an empty span *) + let first = p.current_line_last_char + 1 in + let last = p.current_line_last_char in + let code = current_line_span p ~first ~last in + let l = { pad = 0; code; is_blank = true } in + Code_block (`Indented (l :: ls)) :: bs + else + let pad, first = accept_code_indent p ~count:4 in + let last = p.current_line_last_char in + let is_blank = only_blanks p in + let l = { pad; code = current_line_span p ~first ~last; is_blank } in + Code_block (`Indented (l :: ls)) :: bs + + let try_add_to_fenced_code_block p f bs = match f with + | { fence = { closing_fence = Some _; _}; _ } -> (* block is closed *) + add_open_blocks p ((Code_block (`Fenced f)) :: bs) + | { fence = { indent; fence; _} ; code = ls} as b -> + let start = p.current_char and last = p.current_line_last_char in + match Match.fenced_code_block_continue ~fence p.i ~last ~start with + | `Code -> + let strip = Int.min indent (current_indent p) in + let pad, first = accept_code_indent p ~count:strip in + let code = (pad, current_line_span p ~first ~last) :: ls in + Code_block (`Fenced { b with code }) :: bs + | `Close (first, _fence_last) -> + let close = current_line_span p ~first ~last (* with layout *)in + let fence = { b.fence with closing_fence = Some close } in + Code_block (`Fenced { b with fence }) :: bs + + let try_add_to_html_block p b bs = match b.end_cond with + | None -> add_open_blocks p (Html_block { b with end_cond = None} :: bs) + | Some end_cond -> + let start = p.current_char and last = p.current_line_last_char in + let l = current_line_span p ~first:start ~last in + if not (Match.html_block_end p.i ~end_cond ~last ~start) + then Html_block { b with html = l :: b.html } :: bs else + match end_cond with + | `End_blank | `End_blank_7 -> + blank_line p :: Html_block { b with end_cond = None } :: bs + | _ -> + Html_block { end_cond = None; html = l :: b.html } :: bs + + let rec try_lazy_continuation p ~indent_start = function + | Paragraph par :: bs -> Some (add_paragraph_line p ~indent_start par bs) + | Block_quote (indent, bq) :: bs -> + begin match try_lazy_continuation p ~indent_start bq with + | None -> None + | Some bq -> Some (Block_quote (indent, bq) :: bs) + end + | List l :: bs -> + let i = List.hd l.items in + begin match try_lazy_continuation p ~indent_start i.blocks with + | None -> None + | Some blocks -> + let items = { i with blocks } :: (List.tl l.items) in + Some (List { l with items; last_blank = false } :: bs) + end + | _ -> None + + let try_add_to_table p ind rows bs = + let indent_start = p.current_char and indent = current_indent p in + match match_line_type ~indent ~no_setext:true p with + | Ext_table_row last -> + let row = table_row p ~first:p.current_char ~last in + Ext_table (ind, row :: rows) :: bs + | ltype -> + let bs = Ext_table (ind, rows) :: bs in + add_open_blocks_with_line_class p ~indent ~indent_start bs ltype + + let rec try_add_to_block_quote p indent_layout bq bs = + let indent_start = p.current_char and indent = current_indent p in + match match_line_type ~indent ~no_setext:true p with + | Block_quote_line -> Block_quote (indent_layout, add_line p bq) :: bs + | (Indented_code_block_line (* Looks like a *) | Paragraph_line) as ltype -> + begin match try_lazy_continuation p ~indent_start bq with + | Some bq -> Block_quote (indent_layout, bq) :: bs + | None -> + let bs = Block_quote (indent_layout, close_last_block p bq) :: bs in + add_open_blocks_with_line_class p ~indent ~indent_start bs ltype + end + | ltype -> + let bs = Block_quote (indent_layout, close_last_block p bq) :: bs in + add_open_blocks_with_line_class p ~indent ~indent_start bs ltype + + and try_add_to_footnote p fn_indent label blocks bs = + let indent_start = p.current_char and indent = current_indent p in + if indent < fn_indent + 1 (* position of ^ *) then begin + match match_line_type ~indent ~no_setext:true p with + | (Indented_code_block_line (* Looks like a *) | Paragraph_line) as lt -> + begin match try_lazy_continuation p ~indent_start blocks with + | Some blocks -> Ext_footnote (fn_indent, label, blocks) :: bs + | None -> + let blocks = close_last_block p blocks in + let bs = (close_footnote p fn_indent label blocks) bs in + add_open_blocks_with_line_class p ~indent ~indent_start bs lt + end + | Blank_line -> + Ext_footnote (fn_indent, label, add_line p blocks) :: bs + | ltype -> + let blocks = close_last_block p blocks in + let bs = close_footnote p fn_indent label blocks bs in + add_open_blocks_with_line_class p ~indent ~indent_start bs ltype + end else begin + accept_cols p ~count:(fn_indent + 1); + Ext_footnote (fn_indent, label, add_line p blocks) :: bs + end + + and try_add_to_list_item p list bs = + let indent_start = p.current_char and indent = current_indent p in + if indent >= list.item_min_indent then begin + let last_blank = only_blanks p in + let item = List.hd list.items and items = List.tl list.items in + if list.last_blank && not last_blank && + List.for_all block_is_blank_line item.blocks + then + (* Item can only start with a single blank line, if we are + here it's not a new item so the list ends *) + add_open_blocks p (List list :: bs) + else begin + accept_cols ~count:list.item_min_indent p; + let item = { item with blocks = add_line p item.blocks } in + List { list with items = item :: items; last_blank } :: bs + end + end else match match_line_type ~indent ~no_setext:true p with + | Blank_line -> + let item = List.hd list.items and items = List.tl list.items in + let item = { item with blocks = add_line p item.blocks } in + List { list with items = item :: items; last_blank = true } :: bs + | Indented_code_block_line | Paragraph_line as ltype -> + let item = List.hd list.items and items = List.tl list.items in + begin match try_lazy_continuation p ~indent_start item.blocks with + | Some blocks -> + let items = { item with blocks } :: items in + List { list with items; last_blank = false } :: bs + | None -> + let bs = close_list p list bs in + add_open_blocks_with_line_class p ~indent ~indent_start bs ltype + end + | List_marker_line m -> + try_add_to_list p ~indent m list bs + | ltype -> + let bs = close_list p list bs in + add_open_blocks_with_line_class p ~indent ~indent_start bs ltype + + and add_line p = function + | Paragraph par :: bs -> try_add_to_paragraph p par bs + | ((Thematic_break _ | Heading _ | Blank_line _ | Linkref_def _) :: _) + | [] as bs -> add_open_blocks p bs + | List list :: bs -> try_add_to_list_item p list bs + | Code_block (`Indented ls) :: bs -> try_add_to_indented_code_block p ls bs + | Code_block (`Fenced f) :: bs -> try_add_to_fenced_code_block p f bs + | Block_quote (ind, bq) :: bs -> try_add_to_block_quote p ind bq bs + | Html_block html :: bs -> try_add_to_html_block p html bs + | Ext_table (ind, rows) :: bs -> try_add_to_table p ind rows bs + | Ext_footnote (i, l, blocks) :: bs -> try_add_to_footnote p i l blocks bs + + (* Parsing *) + + let get_first_line p = + let max = String.length p.i - 1 in + let k = ref 0 in + let last_char = + while !k <= max && p.i.[!k] <> '\n' && p.i.[!k] <> '\r' do incr k done; + !k - 1 (* if the line is empty we have -1 *) + in + p.current_line_last_char <- last_char; + update_next_non_blank p; + (* Return first used newline (or "\n" if there is none) *) + if !k > max || p.i.[!k] = '\n' then "\n" else + let next = !k + 1 in + if next <= max && p.i.[next] = '\n' then "\r\n" else "\r" + + let get_next_line p = + let max = String.length p.i - 1 in + if p.current_line_last_char = max then false else + let first_char = + let nl = p.current_line_last_char + 1 in + if p.i.[nl] = '\n' then nl + 1 else (* assert (p.i.[nl] = '\r') *) + let next = nl + 1 in + if next <= max && p.i.[next] = '\n' then next + 1 else next + in + let last_char = + let k = ref first_char in + while !k <= max && p.i.[!k] <> '\n' && p.i.[!k] <> '\r' do incr k done; + !k - 1 (* if the line is empty we have last_char = first_char - 1 *) + in + p.current_line_pos <- (fst p.current_line_pos + 1), first_char; + p.current_line_last_char <- last_char; + p.current_char <- first_char; + p.current_char_col <- 0; + p.tab_consumed_cols <- 0; + update_next_non_blank p; + true + + let parse p = + let meta p = + let first_byte = 0 and last_byte = p.current_line_last_char in + let first_line = 1, first_byte and last_line = p.current_line_pos in + let file = p.file in + meta p (Textloc.v ~file ~first_byte ~last_byte ~first_line ~last_line) + in + let rec loop p bs = + let bs = add_line p bs in + if get_next_line p then loop p bs else (end_doc p bs), meta p + in + let nl = get_first_line p in + nl, loop p [] +end + +(* Building the final AST, invokes inline parsing. *) + +let block_struct_to_blank_line p pad span = + Block.Blank_line (clean_raw_span p ~pad span) + +let block_struct_to_code_block p = function +| `Indented (ls : Block_struct.indented_code_line list) (* non-empty *) -> + let line p { Block_struct.pad; code; _} = clean_raw_span ~pad p code in + let layout = `Indented and info_string = None in + let last = (List.hd ls).code in + let code = List.rev_map (line p) ls in + let meta = + let last_line = last.line_pos and last_byte = last.last in + let start = Meta.textloc (snd (List.hd code)) in + meta p (Textloc.set_last start ~last_byte ~last_line) + in + Block.Code_block ({layout; info_string; code}, meta) +| `Fenced { Block_struct.fence; code = ls } -> + let layout = + let opening_fence = layout_clean_raw_span p fence.opening_fence in + let closing_fence = + Option.map (layout_clean_raw_span p) fence.closing_fence + in + { Block.Code_block.indent = fence.indent; opening_fence; closing_fence } + in + let info_string = Option.map (clean_unesc_unref_span p) fence.info_string in + let code = List.rev_map (fun (pad, l) -> clean_raw_span p ~pad l) ls in + let meta = + let first = fence.opening_fence in + let last = match fence.closing_fence with + | Some last -> last + | None -> match ls with [] -> first | (_, last_line) :: _ -> last_line + in + meta_of_spans p ~first ~last + in + let cb = {Block.Code_block.layout = `Fenced layout; info_string; code} in + if p.exts && Block.Code_block.is_math_block info_string + then Block.Ext_math_block (cb, meta) + else Block.Code_block (cb, meta) + +let block_struct_to_heading p = function +| `Atx { Block_struct.indent; level; after_open; heading; layout_after } -> + let after_opening = + let first = after_open and last = heading.first - 1 in + layout_clean_raw_span' p { heading with first; last } + in + let closing = layout_clean_raw_span' p layout_after in + let layout = `Atx { Block.Heading.indent; after_opening; closing } in + let meta = + meta p (textloc_of_span p { heading with first = after_open - level }) + in + let _layout, inline = Inline_struct.parse p [heading] in + let id = match p.heading_auto_ids with + | false -> None + | true -> Some (`Auto (Inline.id ~buf:p.buf inline)) + in + Block.Heading ({layout; level; inline; id}, meta) +| `Setext { Block_struct.level; heading_lines; underline } -> + let (leading_indent, trailing_blanks), inline = + Inline_struct.parse p heading_lines + in + let underline_indent, u, blanks = underline in + let underline_blanks = layout_clean_raw_span' p blanks in + let underline_count = u.last - u.first + 1, meta p (textloc_of_span p u) in + let layout = + { Block.Heading.leading_indent; trailing_blanks; underline_indent; + underline_count; underline_blanks } + in + let meta = + let last_line = u.line_pos and last_byte = u.last in + let start = Meta.textloc (Inline.meta inline) in + meta p (Textloc.set_last start ~last_byte ~last_line) + in + let id = match p.heading_auto_ids with + | false -> None + | true -> Some (`Auto (Inline.id ~buf:p.buf inline)) + in + Block.Heading ({ layout = `Setext layout; level; inline; id }, meta) + +let block_struct_to_html_block p (b : Block_struct.html_block) = + let last = List.hd b.html in + let last_byte = last.last and last_line = last.line_pos in + let lines = List.rev_map (clean_raw_span p) b.html in + let start_loc = Meta.textloc (snd (List.hd lines)) in + let meta = meta p (Textloc.set_last start_loc ~last_byte ~last_line) in + Block.Html_block (lines, meta) + +let block_struct_to_paragraph p par = + let layout, inline = Inline_struct.parse p par.Block_struct.lines in + let leading_indent, trailing_blanks = layout in + let meta = Inline.meta inline in + Block.Paragraph ({ leading_indent; inline; trailing_blanks }, meta) + +let block_struct_to_thematic_break p indent span = + let layout, meta = (* not layout because of loc *) clean_raw_span p span in + Block.Thematic_break ({ indent; layout }, meta) + +let block_struct_to_table p indent rows = + let rec loop p col_count last_was_sep acc = function + | (row, blanks) :: rs -> + let meta = meta p (textloc_of_span p row) in + let row' = { row with first = row.first + 1; last = row.last } in + let cols = Inline_struct.parse_table_row p row' in + let col_count = Int.max col_count (List.length cols) in + let r, last_was_sep = match Block.Table.parse_sep_row cols with + | Some seps -> ((`Sep seps), meta), true + | None -> + ((if last_was_sep then `Header cols else `Data cols), meta), false + in + let acc = (r, layout_clean_raw_span' p blanks) :: acc in + if rs = [] then row, col_count, acc else + loop p col_count last_was_sep acc rs + | [] -> assert false + in + let last = fst (List.hd rows) in + let first, col_count, rows = loop p 0 false [] rows in + let meta = meta_of_spans p ~first ~last in + Block.Ext_table ({ indent; col_count; rows }, meta) + +let rec block_struct_to_block_quote p indent bs = + let add_block p acc b = block_struct_to_block p b :: acc in + let last = block_struct_to_block p (List.hd bs) in + let block = List.fold_left (add_block p) [last] (List.tl bs) in + let block = match block with + | [b] -> b + | quote -> + let first = Block.meta (List.hd quote) and last = Block.meta last in + Block.Blocks (quote, meta_of_metas p ~first ~last) + in + Block.Block_quote ({indent; block}, Block.meta block) + +and block_struct_to_footnote_definition p indent (label, defined_label) bs = + let add_block p acc b = block_struct_to_block p b :: acc in + let last = block_struct_to_block p (List.hd bs) in + let block = List.fold_left (add_block p) [last] (List.tl bs) in + let last = Block.meta last in + let block = match block with + | [b] -> b + | bs -> + let first = Block.meta (List.hd bs) in + Block.Blocks (bs, meta_of_metas p ~first ~last) + in + let loc = + let labelloc = Label.textloc label in + let lastloc = Meta.textloc last in + let loc = Textloc.span labelloc lastloc in + let first_byte = Textloc.first_byte loc - 1 in + Textloc.set_first loc ~first_byte ~first_line:(Textloc.first_line loc) + in + let fn = { Block.Footnote.indent; label; defined_label; block }, meta p loc in + begin match defined_label with + | None -> () | Some def -> set_label_def p def (Block.Footnote.Def fn) + end; + Block.Ext_footnote_definition fn + +and block_struct_to_list_item p (i : Block_struct.list_item) = + let rec loop bstate tight acc = function + | Block_struct.Blank_line _ as bl :: bs -> + let bstate = if bstate = `Trail_blank then `Trail_blank else `Blank in + loop bstate tight (block_struct_to_block p bl :: acc) bs + | Block_struct.List + { items = { blocks = Block_struct.Blank_line _ :: _ } :: _ } as l :: bs + -> + loop bstate false (block_struct_to_block p l :: acc) bs + | b :: bs -> + let tight = tight && not (bstate = `Blank) in + loop `Non_blank tight (block_struct_to_block p b :: acc) bs + | [] -> tight, acc + in + let last_meta, (tight, blocks) = match i.blocks with + | [Block_struct.Blank_line _ as blank] -> + let bl = block_struct_to_block p blank in + Block.meta bl, (true, [bl]) + | Block_struct.Blank_line _ as blank :: bs -> + let bl = block_struct_to_block p blank in + (Block.meta bl), loop `Trail_blank true [bl] bs + | b :: bs -> + let b = block_struct_to_block p b in + (Block.meta b), loop `Non_blank true [b] bs + | [] -> assert false + in + let block = match blocks with + | [i] -> i + | is -> + let first = Block.meta (List.hd is) in + Block.Blocks (is, meta_of_metas p ~first ~last:last_meta) + in + let before_marker = i.before_marker and after_marker = i.after_marker in + let marker = (* not layout to get loc *) clean_raw_span p i.marker in + let ext_task_marker = match i.ext_task_marker with + | None -> None + | Some (u, span) -> Some (u, meta p (textloc_of_span p span)) + in + let meta = meta_of_metas p ~first:(snd marker) ~last:last_meta in + let i = + { Block.List_item.before_marker; marker; after_marker; block; + ext_task_marker } + in + (i, meta), tight + +and block_struct_to_list p list = + let rec loop p tight acc = function + | [] -> tight, acc + | item :: items -> + let item, item_tight = block_struct_to_list_item p item in + loop p (tight && item_tight) (item :: acc) items + in + let items = list.Block_struct.items in + let last, tight = block_struct_to_list_item p (List.hd items) in + let tight, items = loop p (not list.loose && tight) [last] (List.tl items) in + let meta = meta_of_metas p ~first:(snd (List.hd items)) ~last:(snd last) in + Block.List ({ type' = list.Block_struct.list_type; tight; items }, meta) + +and block_struct_to_block p = function +| Block_struct.Block_quote (ind, bs) -> block_struct_to_block_quote p ind bs +| Block_struct.List list -> block_struct_to_list p list +| Block_struct.Paragraph par -> block_struct_to_paragraph p par +| Block_struct.Thematic_break (i, br) -> block_struct_to_thematic_break p i br +| Block_struct.Code_block cb -> block_struct_to_code_block p cb +| Block_struct.Heading h -> block_struct_to_heading p h +| Block_struct.Html_block html -> block_struct_to_html_block p html +| Block_struct.Blank_line (pad, span) -> block_struct_to_blank_line p pad span +| Block_struct.Linkref_def r -> Block.Link_reference_definition r +| Block_struct.Ext_table (i, rows) -> block_struct_to_table p i rows +| Block_struct.Ext_footnote (i, labels, bs) -> + block_struct_to_footnote_definition p i labels bs + +let block_struct_to_doc p (doc, meta) = + match List.rev_map (block_struct_to_block p) doc with + | [b] -> b | bs -> Block.Blocks (bs, meta) + +(* Documents *) + +module Doc = struct + type t = { nl : Layout.string; block : Block.t; defs : Label.defs } + let make ?(nl = "\n") ?(defs = Label.Map.empty) block = { nl; block; defs } + let empty = make (Block.Blocks ([], Meta.none)) + let nl d = d.nl + let block d = d.block + let defs d = d.defs + let of_string + ?defs ?resolver ?nested_links ?heading_auto_ids ?layout ?locs ?file + ?(strict = true) s + = + let p = + parser ?defs ?resolver ?nested_links ?heading_auto_ids ?layout ?locs + ?file ~strict s + in + let nl, doc = Block_struct.parse p in + let block = block_struct_to_doc p doc in + make ~nl block ~defs:p.defs + + let unicode_version = Cmarkit_data.unicode_version + let commonmark_version = "0.30" +end + +(* Maps and folds *) + +module Mapper = struct + type 'a filter_map = 'a option + type 'a result = [ `Default | `Map of 'a filter_map ] + let default = `Default + let delete = `Map None + let ret v = `Map (Some v) + + type t = + { inline_ext_default : Inline.t map; + block_ext_default : Block.t map; + inline : Inline.t mapper; + block : Block.t mapper } + and 'a map = t -> 'a -> 'a filter_map + and 'a mapper = t -> 'a -> 'a result + + let none _ _ = `Default + let ext_inline_none _ _ = invalid_arg Inline.err_unknown + let ext_block_none _ _ = invalid_arg Block.err_unknown + let make + ?(inline_ext_default = ext_inline_none) + ?(block_ext_default = ext_block_none) + ?(inline = none) ?(block = none) () + = + { inline_ext_default; block_ext_default; inline; block } + + let inline_mapper m = m.inline + let block_mapper m = m.block + let inline_ext_default m = m.inline_ext_default + let block_ext_default m = m.block_ext_default + + let ( let* ) = Option.bind + + let rec map_inline m i = match m.inline m i with + | `Map i -> i + | `Default -> + let open Inline in + match i with + | Autolink _ | Break _ | Code_span _ | Raw_html _ + | Text _ | Ext_math_span _ as i -> Some i + | Image (l, meta) -> + let text = Option.value ~default:Inline.empty (map_inline m l.text) in + Some (Image ({ l with text }, meta)) + | Link (l, meta) -> + let* text = map_inline m l.text in + Some (Link ({ l with text }, meta)) + | Emphasis (e, meta) -> + let* inline = map_inline m e.inline in + Some (Emphasis ({ e with inline }, meta)) + | Strong_emphasis (e, meta) -> + let* inline = map_inline m e.inline in + Some (Strong_emphasis ({ e with inline}, meta)) + | Inlines (is, meta) -> + (match List.filter_map (map_inline m) is with + | [] -> None | is -> Some (Inlines (is, meta))) + | Ext_strikethrough (s, meta) -> + let* inline = map_inline m s in + Some (Ext_strikethrough (inline, meta)) + | ext -> m.inline_ext_default m ext + + let rec map_block m b = match m.block m b with + | `Map b -> b + | `Default -> + let open Block in + match b with + | Blank_line _ | Code_block _ | Html_block _ + | Link_reference_definition _ | Thematic_break _ + | Ext_math_block _ as b -> Some b + | Heading (h, meta) -> + let inline = match map_inline m (Block.Heading.inline h) with + | None -> (* Can be empty *) Inline.Inlines ([], Meta.none) + | Some i -> i + in + Some (Heading ({ h with inline}, meta)) + | Block_quote (b, meta) -> + let block = match map_block m b.block with + | None -> (* Can be empty *) Blocks ([], Meta.none) | Some b -> b + in + Some (Block_quote ({ b with block}, meta)) + | Blocks (bs, meta) -> + (match List.filter_map (map_block m) bs with + | [] -> None | bs -> Some (Blocks (bs, meta))) + | List (l, meta) -> + let map_list_item m (i, meta) = + let* block = map_block m (List_item.block i) in + Some ({ i with block }, meta) + in + (match List.filter_map (map_list_item m) l.items with + | [] -> None | items -> Some (List ({ l with items }, meta))) + | Paragraph (p, meta) -> + let* inline = map_inline m (Paragraph.inline p) in + Some (Paragraph ({ p with inline }, meta)) + | Ext_table (t, meta) -> + let map_col m (i, layout) = match map_inline m i with + | None -> None | Some i -> Some (i, layout) + in + let map_row (((r, meta), blanks) as row) = match r with + | `Header is -> + (`Header (List.filter_map (map_col m) is), meta), blanks + | `Sep _ -> row + | `Data is -> + (`Data (List.filter_map (map_col m) is), meta), blanks + in + let rows = List.map map_row t.rows in + Some (Ext_table ({ t with Table.rows }, meta)) + | Ext_footnote_definition (fn, meta) -> + let block = match map_block m fn.block with + | None -> (* Can be empty *) Blocks ([], Meta.none) | Some b -> b + in + Some (Ext_footnote_definition ({ fn with block}, meta)) + | ext -> m.block_ext_default m ext + + let map_doc m d = + let map_block m b = Option.value ~default:Block.empty (map_block m b) in + (* XXX something better for defs should be devised here. *) + let map_def m = function + | Block.Footnote.Def (fn, meta) -> + let block = map_block m (Block.Footnote.block fn) in + Block.Footnote.Def ({ fn with block }, meta) + | def -> def + in + let block = map_block m (Doc.block d) in + let defs = Label.Map.map (map_def m) (Doc.defs d) in + { d with Doc.block; defs } +end + +module Folder = struct + type 'a result = [ `Default | `Fold of 'a ] + let default = `Default + let ret v = `Fold v + + type ('a, 'b) fold = 'b t -> 'b -> 'a -> 'b + and ('a, 'b) folder = 'b t -> 'b -> 'a -> 'b result + and 'a t = + { inline_ext_default : (Inline.t, 'a) fold; + block_ext_default : (Block.t, 'a) fold; + inline : (Inline.t, 'a) folder; + block : (Block.t, 'a) folder; } + + let none _ _ _ = `Default + let ext_inline_none _ _ _ = invalid_arg Inline.err_unknown + let ext_block_none _ _ _ = invalid_arg Block.err_unknown + let make + ?(inline_ext_default = ext_inline_none) + ?(block_ext_default = ext_block_none) + ?(inline = none) ?(block = none) () + = + { inline_ext_default; block_ext_default; inline; block } + + let inline_folder f = f.inline + let block_folder f = f.block + let inline_ext_default f = f.inline_ext_default + let block_ext_default f = f.block_ext_default + + let rec fold_inline f acc i = match f.inline f acc i with + | `Fold acc -> acc + | `Default -> + let open Inline in + match i with + | Autolink _ | Break _ | Code_span _ | Raw_html _ | Text _ + | Ext_math_span _ -> acc + | Image (l, _) | Link (l, _) -> fold_inline f acc l.text + | Emphasis ({ inline }, _) -> fold_inline f acc inline + | Strong_emphasis ({ inline }, _) -> fold_inline f acc inline + | Inlines (is, _) -> List.fold_left (fold_inline f) acc is + | Ext_strikethrough (inline, _) -> fold_inline f acc inline + | ext -> f.inline_ext_default f acc ext + + let rec fold_block f acc b = match f.block f acc b with + | `Fold acc -> acc + | `Default -> + let open Block in + match b with + | Blank_line _ | Code_block _ | Html_block _ + | Link_reference_definition _ | Thematic_break _ | Ext_math_block _ -> acc + | Heading (h, _) -> fold_inline f acc (Block.Heading.inline h) + | Block_quote (bq, _) -> fold_block f acc bq.block + | Blocks (bs, _) -> List.fold_left (fold_block f) acc bs + | List (l, _) -> + let fold_list_item m acc (i, _) = + fold_block m acc (Block.List_item.block i) + in + List.fold_left (fold_list_item f) acc l.items + | Paragraph (p, _) -> fold_inline f acc (Block.Paragraph.inline p) + | Ext_table (t, _) -> + let fold_row acc ((r, _), _) = match r with + | (`Header is | `Data is) -> + List.fold_left (fun acc (i, _) -> fold_inline f acc i) acc is + | `Sep _ -> acc + in + List.fold_left fold_row acc t.Table.rows + | Ext_footnote_definition (fn, _) -> fold_block f acc fn.block + | ext -> f.block_ext_default f acc ext + + let fold_doc f acc d = fold_block f acc (Doc.block d) +end + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit.mli b/ocaml-lsp-server/vendor/cmarkit/cmarkit.mli new file mode 100644 index 000000000..b0279b01b --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit.mli @@ -0,0 +1,1891 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +(** CommonMark parser and abstract syntax tree. + + See {{!page-index.quick}examples}. + + {b References.} + {ul + {- John MacFarlane. + {e {{:https://spec.commonmark.org/0.30/} + CommonMark Spec}}. Version 0.30, 2021}} *) + +(** {1:ast Abstract syntax tree} *) + +(** Text locations. + + A text location identifies a text span in a given UTF-8 encoded file + by an inclusive range of absolute {{!Textloc.type-byte_pos}byte} positions + and the {{!Textloc.type-line_pos}line positions} on which those occur. *) +module Textloc : sig + + (** {1:fpath File paths} *) + + type fpath = string + (** The type for file paths. *) + + val file_none : fpath + (** [file_none] is ["-"]. A file path to use when there is none. *) + + (** {1:pos Positions} *) + + (** {2:byte_pos Byte positions} *) + + type byte_pos = int + (** The type for zero-based, absolute, byte positions in text. If + the text has [n] bytes, [0] is the first position and [n-1] is + the last position. *) + + val byte_pos_none : byte_pos + (** [byte_pos_none] is [-1]. A position to use when there is none. *) + + (** {2:lines Lines} *) + + type line_num = int + (** The type for one-based, line numbers in the text. Lines + increment after a {e newline} which is either a line feed ['\n'] + (U+000A), a carriage return ['\r'] (U+000D) or a carriage return + and a line feed ["\r\n"] (). *) + + val line_num_none : line_num + (** [line_num_none] is [-1]. A line number to use when there is none. *) + + (** {2:line_pos Line positions} *) + + type line_pos = line_num * byte_pos + (** The type for line positions. This identifies a line by its line + number and the absolute byte position following its newline + (or the start of text for the first line). That byte position: + {ul + {- Indexes the first byte of text of the line if the line is non-empty.} + {- Indexes the first byte of the next newline if the line is empty.} + {- Is out of bounds and equal to the text's length for a last empty + line (this includes when the text is empty).}} *) + + val line_pos_first : line_pos + (** [line_pos_first] is [1, 0]. Note that this is the only line position + of the empty text. *) + + val line_pos_none : line_pos + (** [line_pos_none] is [(line_none, pos_none)]. *) + + (** {1:tloc Text locations} *) + + type t + (** The type for text locations. A text location identifies a text + span in an UTF-8 encoded file by an inclusive range of absolute + {{!type-byte_pos}byte positions} and the {{!type-line_pos}line positions} + on which they occur. + + If the first byte equals the last byte the range contains + exactly that byte. If the first byte is greater than the last + byte this represents an insertion point before the first byte. In + this case information about the last position should be ignored: + it can contain anything. *) + + val none : t + (** [none] is a position to use when there is none. *) + + val v : + file:fpath -> first_byte:byte_pos -> last_byte:byte_pos -> + first_line:line_pos -> last_line:line_pos -> t + (** [v ~file ~first_byte ~last_byte ~first_line ~last_line] is a text + location with the given arguments, see corresponding accessors for + the semantics. If you don't have a file use {!file_none}. *) + + val file : t -> fpath + (** [file l] is [l]'s file. *) + + val first_byte : t -> byte_pos + (** [first_byte l] is [l]'s first byte. Irrelevant if {!is_none} is + [true]. *) + + val last_byte : t -> byte_pos + (** [last_byte l] is [l]'s last byte. Irrelevant if {!is_none} or {!is_empty} + is [true]. *) + + val first_line : t -> line_pos + (** [first_line l] is the line position on which [first_byte l] lies. + Irrelevant if {!is_none} is [true].*) + + val last_line : t -> line_pos + (** [last_line l] is the line position on which [last_byte l] lies. + Irrelevant if {!is_none} or {!is_empty} is [true].*) + + (** {2:preds Predicates and comparisons} *) + + val is_none : t -> bool + (** [is_none t] is [true] iff [first_byte < 0]. *) + + val is_empty : t -> bool + (** [is_empty t] is [true] iff [first_byte t > last_byte t]. *) + + val equal : t -> t -> bool + (** [equal t0 t1] is [true] iff [t0] and [t1] are equal. This checks + that {!file}, {!first_byte} and {!last_byte} are equal. Line information + is ignored. *) + + val compare : t -> t -> int + (** [compare t0 t1] orders [t0] and [t1]. The order is compatible + with {!equal}. Comparison starts with {!file}, follows with {!first_byte} + and ends, if needed, with {!last_byte}. Line information is ignored. *) + + (** {2:shrink_and_stretch Shrink and stretch} *) + + val set_first : t -> first_byte:byte_pos -> first_line:line_pos -> t + (** [set_first l ~first_byte ~first_line] sets the the first position of + [l] to given values. *) + + val set_last : t -> last_byte:byte_pos -> last_line:line_pos -> t + (** [set_last l ~last_byte ~last_line] sets the last position of [l] + to given values. *) + + val to_first : t -> t + (** [to_first l] has both first and last positions set to [l]'s first + position. The range spans {!first_byte}. See also {!before}. *) + + val to_last : t -> t + (** [to_last l] has both first and last positions set to [l]'s last + position. The range spans {!last_byte}. See also {!after}. *) + + val before : t -> t + (** [before t] is the {{!is_empty}empty} text location starting at + {!first_byte}. *) + + val after : t -> t + (** [after t] is the empty {{!is_empty}empty} location starting at + [last_byte t + 1]; note that at the end of input this may be an + invalid byte {e index}. The {!first_line} and {!last_line} of the + result is [last_line t]. *) + + val span : t -> t -> t + (** [span l0 l1] is the span from the smallest byte position of [l0] and + [l1] to the largest byte position of [l0] and [l1]. The file path is + taken from the greatest byte position. *) + + val reloc : first:t -> last:t -> t + (** [reloc ~first ~last] uses the first position of [first], the + last position of [last] and the file of [last]. *) + + (** {2:fmt Formatting} *) + + val pp_ocaml : Format.formatter -> t -> unit + (** [pp_ocaml] formats text locations like the OCaml compiler. *) + + val pp_gnu : Format.formatter -> t -> unit + (** [pp_gnu] formats text locations according to the + {{:https://www.gnu.org/prep/standards/standards.html#Errors}GNU + convention}. *) + + val pp : Format.formatter -> t -> unit + (** [pp] is {!pp_gnu}. *) + + val pp_dump : Format.formatter -> t -> unit + (** [pp_dump] formats raw data for debugging. *) +end + +(** Node metadata. + + Holds text locations and custom, client-defined metadata. *) +module Meta : sig + + type id = int + (** The type for non-negative metadata identifiers. *) + + type t + (** The type for abstract syntax tree node metadata. *) + + val none : t + (** [none] is metadata for when there is none, its {!textloc} is + {!Textloc.none}. *) + + val make : ?textloc:Textloc.t -> unit -> t + (** [make textloc] is metadata with text location [textloc] (defaults + to {!Textloc.none}) and a fresh identifier (see {!val-id}). *) + + val id : t -> id + (** [id m] is an identifier for the metadata. Depending on how you + process the abstract syntax tree this may become non-unique but + the metadata values in an abstract syntax tree returned by + {!Doc.of_string} with [locs:true] have distinct identifiers. *) + + val textloc : t -> Textloc.t + (** [textloc m] is the source location of the syntactic construct [m] + is attached to. *) + + val with_textloc : keep_id:bool -> t -> Textloc.t -> t + (** [with_textloc ~keep_id m textloc] is metadata [m] with text location + [textloc] and a fresh id, unless [keep_id] is [true]. *) + + (** {1:preds Predicates and comparisons} *) + + val equal : t -> t -> bool + (** [equal m0 m1] is [true] if [m0] and [m1] have the same {!val-id}. + Note that they may have different {{!custom}metadata.} *) + + val compare : t -> t -> int + (** [compare m0 m1] is a total order on metadata {!val-id}s compatible with + {!equal}. *) + + val is_none : t -> bool + (** [is_none m] is [equal none m]. *) + + (** {1:custom Custom metadata} + + {b Warning.} Operating on custom metadata never changes + {!val-id}. It is possible for two meta values to have the same + id and different metadata. *) + + type 'a key + (** The type for custom metadata keys. *) + + val key : unit -> 'a key + (** [key ()] is a new metadata key. *) + + val mem : 'a key -> t -> bool + (** [mem k m] is [true] iff [k] is bound in [m]. *) + + val add : 'a key -> 'a -> t -> t + (** [add k v m] is [m] with key [k] bound to [v]. *) + + val tag : unit key -> t -> t + (** [tag k m] is [add k () m]. *) + + val remove : 'a key -> t -> t + (** [remove k m] is [m] with key [k] unbound in [v]. *) + + val find : 'a key -> t -> 'a option + (** [find k m] the value of [k] in [m], if any. *) +end + +type 'a node = 'a * Meta.t +(** The type for abstract syntax tree nodes. The data of type ['a] and its + metadata. *) + +(** Types for layout information. + + Values of these types do not represent document data. They are + used to recover document source layout informations when the + abstract syntax tree cannot represent them. + See {{!Cmarkit_commonmark.layout}source layout preservation} + for more information. + + For programmatically generated nodes, values of these types can be + left empty or filled with a desired layout. Except for the + {{!Cmarkit_commonmark}CommonMark renderer} these values are usually + ignored. *) +module Layout : sig + + type blanks = string + (** The type for blanks layout. This is only made of spaces and tabs. *) + + type nonrec string = string + (** The type for string layout. For example the art of thematic breaks + or code fences. *) + + type nonrec char = char + (** The type for character layout. For example the character used for + an emphasis or an unordered list marker. *) + + type count = int + (** The type for some kind of layout count. Usually a character + count. *) + + type indent = int + (** The type for block indentation. Mostly between 0-3. *) + + val string : ?meta:Meta.t -> string -> string node + (** [string s] is a layout string with meta data [meta] + (defaults to {!Meta.none}). *) + + val empty : string node + (** [empty] is [string ""]. *) +end + +(** Block lines. + + In CommonMark blocks, a "line" does not necessarily correspond to + a line in the source plain text. For example the lines of a + paragraph in a block quote are the lines stripped from the block + quote markers. We call the line resulting from stripping the + block structure preceeding a given block a {e block line}. *) +module Block_line : sig + + (** {1:lines Lines} *) + + type t = string node + (** The type for block lines. *) + + val to_string : t -> string + (** [to_string l] is (fst l). *) + + val list_textloc : t list -> Textloc.t + (** [list_textloc ls] is a text location spanning the lines [ls] + This is {!Textloc.none} on [[]]. *) + + val list_of_string : ?meta:Meta.t -> string -> t list + (** [list_of_string s] cuts [s] on newlines. [meta] is used for + all nodes, default to [Meta.none]. *) + + (** {1:tight_lines Tight lines} *) + + type tight = Layout.blanks * t + (** The type for tight block lines. A block line with its + initial blanks trimmed but kept for layout. *) + + val tight_to_string : tight -> string + (** [tight_to_string l] is [(fst (snd l))]. *) + + val tight_list_textloc : tight list -> Textloc.t + (** [tigh_list_textloc ls] is a text location spanning the lines [ls] + This is {!Textloc.none} on [[]]. *) + + val tight_list_of_string : ?meta:Meta.t -> string -> tight list + (** [list_of_string s] cuts [s] on newlines and computes the blanks + (except on the first line where they are part of the + data). [meta] is used for all nodes, default to [Meta.none]. *) + + (** {1:blank_lines Blank lines} *) + + type blank = Layout.blanks node + (** The type for blank block lines. *) +end + +(** Labels. + + Labels are used by + {{:https://spec.commonmark.org/0.30/#reference-link}reference links} to + refer to the {{!Label.definitions}definitions} of + {{:https://spec.commonmark.org/0.30/#link-reference-definitions} + link reference definitions}, + {{!Cmarkit.ext_footnote_def}footnote definitions} and your own + {{!Label.resolvers}interpretations}. *) +module Label : sig + + (** {1:label Labels} *) + + type key = string + (** The type for label keys. These are + {{:https://spec.commonmark.org/0.30/#link-label}link labels} + normalized for {{:https://spec.commonmark.org/0.30/#matches}matching}. *) + + type t + (** The type for {{:https://spec.commonmark.org/0.30/#link-label}link + labels}. *) + + val make : ?meta:Meta.t -> key:string -> Block_line.tight list -> t + (** [make key text] is a label with key [id] and unormalized text [text]. *) + + val with_meta : Meta.t -> t -> t + (** [with_meta m l] is [l] with meta [m]. *) + + val meta : t -> Meta.t + (** [meta k] is metadata for [k]. *) + + val key : t -> key + (** [key_id l] is the label's key. If [l] comes out of a parse this + [l]'s normalized {!text}. *) + + val text : t -> Block_line.tight list + (** [text l] is the text of [l]. *) + + val text_to_string : t -> string + (** [text_to_string l] is the lines of {!text} separated + by spaces. In contrast to {!val-key} this has not gone + throught {{:https://spec.commonmark.org/0.30/#matches}normalization}. + *) + + val compare : t -> t -> int + (** [compare l0 l1] is [String.compare (key l0) (key l1)]. *) + + (** {1:definitions Definitions} + + A label definition is the content referenced by its {!val-key}. + + Labels are defined in documents via footnotes and link reference + definitions. Additional label definitions can be added before + parsing starts by using the [defs] argument of + {!Doc.of_string}. They can also be manipulated and + created on the fly during parsing by using a + {{!resolvers}resolver}. *) + + type def = .. + (** The type for label definitions. + See for example {!Link_definition.extension-Def} or + {!Block.Footnote.extension-Def}. *) + + (** Label key maps. *) + module Map : Map.S with type key := key + + type defs = def Map.t + (** The type for label definitions. Maps label keys to their definition. *) + + (** {1:resolvers Resolvers} + + To have more control over the label definitions used in a + document, the [defs] argument of {!Doc.of_string} can be + specified to pre-populate the label definitions used during parsing; + for example with those of a previously parsed document. + + In addition the [resolver] argument can be specified to: + {ol + {- Alter or suppress label definitions made by link reference definitions + and footnote definitions. It can also be used to warn, by + side effect, on multiple label definitions.} + {- Alter, or suppress label references on reference links and images – + which happen after all label definitions have been made. You can + define the actual label that will be used for resolving + the reference to its definition.}} + + In particular 2. can be used to create synthetic label definitions + on undefined label references. This provides the ability to treat + the very liberal + {{:https://spec.commonmark.org/0.30/#link-label}link label} + syntax as a domain specific language of yours (e.g. for data binding). + + Note that parsing is not finished when resolvers are invoked + this is the reason why you don't get access to the definition's + data during resolution. + + See {{!resolver_example}an example}. *) + + type context = + [ `Def of t option * t (** Label definitions *) + | `Ref of [ `Link | `Image ] * t * t option (** Label references *) ] + (** The type for resolver contexts. See {!type-resolver}. *) + + type resolver = context -> t option + (** The type for resolvers. [context] is: + {ul + {- [`Def (prev, current)] when we just hit a + {{:https://spec.commonmark.org/0.30/#link-reference-definitions} + link reference definition} or + {{!Cmarkit.ext_footnote_def}footnote definition} that defines + the label [current]. If there is already a definition for + [current]'s {!val-key} it is provided in [prev] (whose {!meta} has + the location of the definition if you parse with locations). + If [None] is returned the [current] definition is ignored, + and definition [prev] (if any) is kept for the document. If + [Some l] is returned [l]'s key will be bound to the parsed + definition for [current] in {!Doc.defs} at the end of parsing. + The result of the resolver is stored in the abstract syntax tree and + available via {!Link_definition.defined_label} and + {!Block.Footnote.defined_label}.} + {- [`Ref (kind, ref, def)] when we just hit a link or image + referencing label [ref]. [def] is the label defining [ref]'s {!val-key} + in the document (if any). The result of the resolver is the label + stored for resolving the reference to its definition in the resulting + {!Inline.module-Link} node; + [None] means that [label] is undefined and the inline becomes + {!Inline.extension-Text} like in CommonMark.}} + + See {{!resolver_example}an example} and the {!default_resolver}. *) + + val default_resolver : resolver + (** [default_resolver] is the default resolver. + + This resolves according to the CommonMark specification. + The first label definition always takes over subsequent + ones and resolution is left untouched (i.e. a label has to be + defined in the document to be used): +{[ +let default_resolver = function +| `Def (None, l) -> Some l +| `Def (Some _, _) -> None (* Previous takes over *) +| `Ref (_, _, def) -> def +]} *) + + (** {1:resolver_example Resolver example} + + In this example we assume references to undefined labels denote + links to pages or media in our wiki and want to process them + them later via a {{!Mapper}tree transformation} or in a + {{!Cmarkit_renderer.example}renderer extension}. + + We devise a resolver to create synthetic labels on any undefined + label so that the CommonMark parser does not turn them into text. +{[ +let wikilink = Cmarkit.Meta.key () (* A meta key to recognize them *) + +let make_wikilink label = (* Just a placeholder label definition *) + let meta = Cmarkit.Meta.tag wikilink (Cmarkit.Label.meta label) in + Cmarkit.Label.with_meta meta label + +let with_wikilinks = function +| `Def _ as ctx -> Cmarkit.Label.default_resolver ctx +| `Ref (_, _, (Some _ as def)) -> def (* As per doc definition *) +| `Ref (_, ref, None) -> Some (make_wikilink ref) +]} + *) +end + +(** Link definitions. *) +module Link_definition : sig + + (** {1:layout Layout} *) + + type layout = + { indent : Layout.indent; (** Amount of indentation, [0] on inline links. *) + angled_dest : bool; (** [true] if destination is between [<â€Ļ>]. *) + before_dest : Block_line.blank list; (** Blanks to destination. *) + after_dest : Block_line.blank list; (** Blanks after destination. *) + title_open_delim : Layout.char; + (** Title open delimiter (['\"'], ['('], â€Ļ) *) + after_title : Block_line.blank list; + (** Blanks after title (inline links). *) } + (** The type for link reference layout. *) + + val layout_for_dest : string -> layout + (** [layout_for_dest d] computes a layout value for destination [d]. This + just determines if [angled_dest] needs to be [true]. *) + + (** {1:link_defs Link definitions} *) + + type t + (** The type for representing + {{:https://spec.commonmark.org/0.30/#link-reference-definitions} + link references definitions} and + {{:https://spec.commonmark.org/0.30/#inline-link}inline links}. *) + + val make : + ?layout:layout -> ?defined_label:Label.t option -> ?label:Label.t -> + ?dest:string node -> ?title:Block_line.tight list -> unit -> t + (** [make ()] is a link reference with given parameters. If [dest] is + given and [layout] is not, the latter is computed with + {!layout_for_dest}. [label] is a label if the link is defined + via a link reference definition. [defined_label] defaults to + [label]. *) + + val layout : t -> layout + (** [layout ld] is the layout of [ld]. *) + + val label : t -> Label.t option + (** [label ld] is [None] if this is a link definition for an inline + link. It is [Some l], if [ld] is a link reference + definition. [l] is the label as found in the text. The result + of the resolver is in {!defined_label}. *) + + val defined_label : t -> Label.t option + (** [defined_label ld] is the label determined by the {!Label.type-resolver} + for the link definition reference. The label as found + in the source text is in {!label}. If this is [None] either + it's a link definition for an inline link or the resolver deleted + the label definition. *) + + val dest : t -> string node option + (** [dest ld] is the link destination of [ld]. [None] means + there was no destination. CommonMark renders that as an empty + [href] in HTML. *) + + val title : t -> Block_line.tight list option + (** [title ld] is the title of the reference, if any. *) + + (** {1:labeldef As label definitions} *) + + type Label.def += Def of t node (** *) + (** A label definition for links. *) +end + +(** Inlines. + + {b Note.} Document data in inline nodes is always stored + {{:https://spec.commonmark.org/0.30/#backslash-escapes}unescaped} and + with {{:https://spec.commonmark.org/0.30/#entity-and-numeric-character-references}entity and character references} resolved. *) +module Inline : sig + + (** {1:inlines Inlines} *) + + type t = .. + (** The type for inlines. *) + + (** Autolinks. *) + module Autolink : sig + type t + (** The type for + {{:https://spec.commonmark.org/0.30/#autolink}autolinks}. *) + + val make : string node -> t + (** [autolink link] is an autolink for [link] + which must be a CommonMark + {{:https://spec.commonmark.org/0.30/#absolute-uri}absolute URI} + or a CommonMark + {{:https://spec.commonmark.org/0.30/#email-address}email + address}. *) + + val is_email : t -> bool + (** [is_email a] is [true] iff {!link}[ a] is + a CommonMark + {{:https://spec.commonmark.org/0.30/#email-address}email + address}. *) + + val link : t -> string node + (** [link a] is the CommonMark + {{:https://spec.commonmark.org/0.30/#absolute-uri}absolute URI} or + {{:https://spec.commonmark.org/0.30/#email-address}email address}. *) + end + + (** Hard and soft breaks *) + module Break : sig + + type type' = + [ `Hard (** {{:https://spec.commonmark.org/0.30/#hard-line-breaks} + Hard line break.} *) + | `Soft (** {{:https://spec.commonmark.org/0.30/#soft-line-breaks} + Soft line break.} *) ] + (** The type for types of line breaks. *) + + type t + (** The type for + {{:https://spec.commonmark.org/0.30/#hard-line-breaks}hard} + and + {{:https://spec.commonmark.org/0.30/#soft-line-breaks}soft} + line breaks. *) + + val make : + ?layout_before:Layout.string node -> ?layout_after:Layout.blanks node -> + type' -> t + (** [make type'] is a new break of type [type']. Layout values default + to {!Layout.empty}. *) + + val type' : t -> type' + (** [type' b] is the type of [b]. *) + + val layout_before : t -> Layout.string node + (** [layout_before b] is the layout before the newline, spaces + or possibly ['\'] for hard breaks. *) + + val layout_after : t -> Layout.blanks node + (** [layout_after] are blanks on the new {e block line}. *) + end + + (** Code spans. *) + module Code_span : sig + + type t + (** The type for + {{:https://spec.commonmark.org/0.30/#code-spans}code spans}. *) + + val make : backtick_count:Layout.count -> Block_line.tight list -> t + (** [make ~backtick_count code_layout] is a code span with given + parameters. + + {b Warning.} Nothing is made to ensure correctness of the + data, use {!of_string} to compute the right amount of + backticks. *) + + val of_string : ?meta:Meta.t -> string -> t + (** [of_string s] is a code span for [s]. [s] can start with or + include backticks; the appropriate minimal backtick count and + possible needed leading and trailing space are computed + accordingly. If [s] contains newlines, blanks after newlines + are treated as layout like during parsing. [meta] is used for + the lines of the resulting code layout (see {!code_layout}). *) + + val backtick_count : t -> Layout.count + (** [backtick_count cs] is the number of delimiting backticks. *) + + val code : t -> string + (** [code cs] computes from {!code_layout} the code in the span [cs]. *) + + val code_layout : t -> Block_line.tight list + (** [code_layout cs] is the code data in a form that allows layout + preservation. + + The actual code data is the tight block lines concatenated and + separated by space and if the result starts and ends with a + space and is not only made of spaces, these should be + dropped. The {!code} function does all that for you. *) + end + + (** Emphasis and strong emphasis. *) + module Emphasis : sig + + type inline := t + + type t + (** The type for + {{:https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis} + emphasis and strong emphasis}. *) + + val make : ?delim:Layout.char -> inline -> t + (** [make i] is an emphasis on [i]. [delim] is the delimiter + used it should be either ['*'] or ['_']. *) + + val inline : t -> inline + (** [inline e] is the emphasised inline. *) + + val delim : t -> Layout.char + (** [delim e] is the delimiter used for emphasis, should be + either ['*'] or ['_']. *) + end + + (** Links. *) + module Link : sig + + type inline := t + + type reference_layout = + [ `Collapsed + (** {{:https://spec.commonmark.org/0.30/#collapsed-reference-link} + Collapsed reference link} *) + | `Full + (** {{:https://spec.commonmark.org/0.30/#full-reference-link} + Full reference link} *) + | `Shortcut + (** {{:https://spec.commonmark.org/0.30/#shortcut-reference-link} + Shortcut reference link} *) ] + (** The type for reference link layouts. *) + + type reference = + [ `Inline of Link_definition.t node + (** {{:https://spec.commonmark.org/0.30/#inline-link}Inline link} *) + | `Ref of reference_layout * Label.t * Label.t + (** {{:https://spec.commonmark.org/0.30/#reference-link}Reference + links}. First label is the label of the reference, second + label is the label of the referenced definition. *) ] + (** The type for references. *) + + type t + (** The type for {{:https://spec.commonmark.org/0.30/#links}links} + and {{:https://spec.commonmark.org/0.30/#images}images}. *) + + val make : inline -> reference -> t + (** [make i ref] is a link for text [i] and link reference [ref]. + + If you plan to render to CommonMark and this is not an inline + reference you should include a + {!Block.extension-Link_reference_definition} (or + {!Block.extension-Ext_footnote_definition}) for [ref] + somewhere in the document, otherwise the reference will not + parse back. *) + + val text : t -> inline + (** [text l] is the text of the link. *) + + val reference : t -> reference + (** [reference l] is the reference of the link. *) + + val referenced_label : t -> Label.t option + (** [referenced_label l] is the label referenced by the label of [l]. + This is the second label of [`Ref _] or [None] on inline + references.*) + + val reference_definition : Label.defs -> t -> Label.def option + (** [reference_definition defs l] is the definition of [l]'s + reference. If [l] is an [`Inline] reference this returns its + link definition wrapped in a {!Link_definition.Def}. If [l] is + [`Ref] this looks up the {!referenced_label} in [defs]. *) + + val is_unsafe : string -> bool + (** [is_unsafe url] is [true] if [url] is deemed unsafe. This is + the case if [url] starts with a caseless match of + [javascript:], [vbscript:], [file:] or [data:] except if + [data:image/{gif,png,jpeg,webp}]. These rules were taken from + {{:https://github.com/commonmark/cmark}[cmark]}, the C + reference implementation of CommonMark and are likely + incomplete. If you are trying to prevent XSS you should + post-process rendering outputs with a dedicated HTML sanitizer. *) + end + + (** Raw HTML. *) + module Raw_html : sig + + type t = Block_line.tight list + (** The type for {{:https://spec.commonmark.org/0.30/#raw-html}inline raw + HTML} (can span multiple lines). + + {b Warning.} If you create HTML blocks using + {!Block_line.tight_list_of_string} you should make sure the + resulting lines satisfy the contraints of CommonMark raw HTML + (one way is to parse them instead). *) + end + + (** Text. *) + module Text : sig + type t = string + (** The type for + {{:https://spec.commonmark.org/0.30/#textual-content}textual content}. + + Normally these strings should not contain newlines. This can + however happen if the source had newlines as + {{:https://spec.commonmark.org/0.30/#entity-and-numeric-character-references}character references}. *) + end + + type t += + | Autolink of Autolink.t node + | Break of Break.t node + | Code_span of Code_span.t node + | Emphasis of Emphasis.t node + | Image of Link.t node + | Inlines of t list node (** Splicing *) + | Link of Link.t node + | Raw_html of Raw_html.t node + | Strong_emphasis of Emphasis.t node + | Text of Text.t node (** *) + (** The + CommonMark {{:https://spec.commonmark.org/0.30/#inlines}inlines}. *) + + val empty : t + (** [empty] is [Inlines ([], Meta.none)]. *) + + (** {1:exts Extensions} + + See the description of {{!Cmarkit.extensions}extensions}. *) + + (** Strikethrough. *) + module Strikethrough : sig + type inline := t + + type t + (** The type for {{!Cmarkit.ext_strikethrough}strikethrough}. *) + + val make : inline -> t + (** [make i] is [i] with a strikethrough. *) + + val inline : t -> inline + (** [inline s] is the inline with a strikethrough. *) + end + + (** Math span. *) + module Math_span : sig + type t + (** The type for {{!Cmarkit.ext_math_inline}math spans}. *) + + val make : display:bool -> Block_line.tight list -> t + (** [make tex_layout] is an inline or display math span with given + T{_E}X code. *) + + val display : t -> bool + (** [display ms] is [true] if the span should be on its own line. *) + + val tex : t -> string + (** [tex ms] is the inline math T{_E}X code of [ms] *) + + val tex_layout : t -> Block_line.tight list + (** [tex_layout ms] is inline math T{_E}X code in a form that + allows layout preservation. + + The acual code data is the tight block lines concatenated and + separated by space. The {!tex} function does that for you. *) + end + + type t += + | Ext_strikethrough of Strikethrough.t node + | Ext_math_span of Math_span.t node (** *) + (** The supported inline extensions. These inlines are only parsed when + {!Doc.of_string} is called with [strict:false]. *) + + (** {1:funs Functions} *) + + val is_empty : t -> bool + (** [is_empty i] is [true] if [i] is [Inline ([], _)] or [Text ("", _)]. *) + + val meta : ?ext:(t -> Meta.t) -> t -> Meta.t + (** [meta ~ext i] is the metadata of [i]. + + [ext] is called on cases not defined in this module. The default + raises [Invalid_argument]. *) + + val normalize : ?ext:(t -> t) -> t -> t + (** [normalize i] has the same content as [i] but is such that for any + occurence of [Inlines (is, _)] in [i] the list of inlines [is]: + {ol + {- [is] is not a singleton list.} + {- Has no two consecutive [Text _] cases. If that occurs the texts are + concatenated, the meta of the first one is kept and its text + location extended to include the second one.} + {- Has no [Inlines _] case. The meta is dropped and the nested + inlines are spliced in [is] where the case occurs.}} + + [ext] is called on cases not defined in this module. The default + raises [Invalid_argument]. *) + + val to_plain_text : + ?ext:(break_on_soft:bool -> t -> t) -> break_on_soft:bool -> + t -> string list list + (** [to_plain_text ~ext ~break_on_soft i] has the plain text of [i] + as a sequence of lines represented by a list of strings to be + concatenated. If [break_on_soft] is [true] soft line breaks + are turned into hard line breaks. To turn the result [r] + in a single string apply: + + {[ String.concat "\n" (List.map (String.concat "") r) ]} + + [ext] is called on cases not defined in this module, it should + compile extensions to one of these cases. The default raises + [Invalid_argument]. *) + + val id : ?buf:Buffer.t -> ?ext:(break_on_soft:bool -> t -> t) -> t -> string + (** [id ?buf i] derives an identifier for inline [i] using [buf] as + scratch space (one is created if unspecified). + + This converts [i] to plain text using {!Inline.to_plain_text}, + then applies the same + {{:https://spec.commonmark.org/0.30/#matches}normalization} + performed on labels, maps spaces to character [-] (U+002D), + drops {{:https://spec.commonmark.org/0.30/#unicode-punctuation-character} + Unicode punctuation characters} except [-] (U+002D) and [_] ([U+005F]). + + [ext] is given to {!Inline.to_plain_text}. *) +end + +(** Blocks. *) +module Block : sig + + (** {1:blocks Blocks} *) + + type t = .. + (** The type for blocks. *) + + (** Blank lines. *) + module Blank_line : sig + type t = Layout.blanks + (** The type for + {{:https://spec.commonmark.org/0.30/#blank-lines}blank lines}. + These can be ignored during rendering, they are kept for layout. *) + end + + (** Block quotes. *) + module Block_quote : sig + + type block := t + + type t + (** The type for {{:https://spec.commonmark.org/0.30/#block-quotes} + block quotes}. *) + + val make : ?indent:Layout.indent -> block -> t + (** [make b] quotes block [b]. *) + + val indent : t -> Layout.indent + (** [indent bq] is the indentation to the block quote + marker found on the first line. *) + + val block : t -> block + (** [block bq] is the quoted block. *) + end + + (** Code blocks. *) + module Code_block : sig + + type fenced_layout = + { indent : Layout.indent; (** Indent to opening fence *) + opening_fence : Layout.string node; + (** Opening fence (before info string). *) + closing_fence : Layout.string node option; + (** Closing fence (if any). *) } + (** The type for fenced code block layouts. *) + + type layout = [ `Indented | `Fenced of fenced_layout ] + (** The type for code block layouts. *) + + type t + (** The type for + {{:https://spec.commonmark.org/0.30/#indented-code-block} + indented} and + {{:https://spec.commonmark.org/0.30/#fenced-code-blocks}fenced} + code blocks. *) + + val make : + ?layout:layout -> ?info_string:string node -> Block_line.t list -> t + (** [make ?layout ?info_string code] is a code block with given + parameters. [layout] defaults to a fenced layout. If [layout] + is [`Indented] and an [info_string] is provided, the layout is + switched to [`Fenced]. *) + + val layout : t -> layout + (** [layout cb] is the layout of [cb]. *) + + val info_string : t -> string node option + (** [info_string cb] is the + {{:https://spec.commonmark.org/0.30/#info-string}info string} + of [cb], if any. *) + + val code : t -> Block_line.t list + (** [code cb] are the code lines of [cb]. *) + + val make_fence : t -> Layout.char * Layout.count + (** [make_fence cb] is a fence character and count suitable for [cb]. *) + + val language_of_info_string : string -> (string * string) option + (** [language_of_info_string s] extracts a (non-empty) language, + the first word of [s] and a trimmed remainder. Assumes [s] is + {!String.trim}ed which is what {!info_string} gives you. *) + end + + (** Headings. *) + module Heading : sig + + type atx_layout = + { indent : Layout.indent; (** Indent to ['#']. *) + after_opening : Layout.blanks; (** Blanks after ['#']. *) + closing : Layout.string; (** Closing sequence of ['#'] and blanks. *) } + (** The type for ATX heading layout. *) + + type setext_layout = + { leading_indent : Layout.indent; (** Of heading first line. *) + trailing_blanks : Layout.blanks; (** Of heading last line. *) + underline_indent : Layout.indent; (** Indentation of underline. *) + underline_count : Layout.count node; (** Underline char count. *) + underline_blanks : Layout.blanks; (** Underline trailing blanks. *) } + (** The type for setext heading layout. *) + + type layout = [ `Atx of atx_layout | `Setext of setext_layout ] + (** The type for heading layouts. *) + + type id = + [ `Auto of string (** Automatically derived. *) + | `Id of string (** Explicitely specified in another way. *) ] + (** The type for heading identifiers. This notion does not + exist in CommonMark. *) + + type t + (** The type for {{:https://spec.commonmark.org/0.30/#atx-headings} + ATX} and {{:https://spec.commonmark.org/0.30/#setext-headings}Setext} + headings. *) + + val make : ?id:id -> ?layout:layout -> level:int -> Inline.t -> t + (** [make ~level text] is a heading with given + parameters. [layout] defaults to [`Atx] so you should make + sure [text] has no breaks. [level] is clamped to 1-6 or 1-2 + depending on [layout]. [id] is an identifier for the heading. *) + + val layout : t -> layout + (** [layout h] is the layout of [h]. *) + + val level : t -> int + (** [level h] is the level of [h], from [1] to [6]. *) + + val inline : t -> Inline.t + (** [inline h] is the contents of the heading. *) + + val id : t -> id option + (** [id h] is the heading identifier (if any). Can be automatically + derived at parse time from the heading {!inline} if + {{!Doc.of_string}[heading_auto_ids:true]}. + Can also be derived later via {!Inline.id}. *) + end + + (** HTML blocks. *) + module Html_block : sig + type t = Block_line.t list + (** The type for {{:https://spec.commonmark.org/0.30/#html-blocks}HTML + blocks}. + + {b Warning.} If you create HTML blocks using + {!Block_line.list_of_string} you should make sure the resulting + lines satisfy the contraints of CommonMark HTML blocks. + (one way is to parse them instead). *) + end + + (** List items. *) + module List_item : sig + + type block := t + + type t + (** The type for {{:https://spec.commonmark.org/0.30/#list-items}list + items}. *) + + val make : + ?before_marker:Layout.indent -> ?marker:Layout.string node -> + ?after_marker:Layout.indent -> ?ext_task_marker:Uchar.t node -> + block -> t + (** [make b] is a list item for block [b] with given parameters, see + corresponding accessors for semantics. *) + + val block : t -> block + (** [block i] is the contents of [i]. *) + + val before_marker : t -> Layout.indent + (** [before_marker i] is the indentation before the list marker. *) + + val marker : t -> Layout.string node + (** [marker i] is the item marker layout of [i]. *) + + val after_marker : t -> Layout.indent + (** [after_marker i] is the indentation after the marker. *) + + (** {1:ext_task_item Task items} *) + + val ext_task_marker : t -> Uchar.t node option + (** [ext_task_marker i] is a task marker, only occurs in non-strict + parsing mode, see see extension + {{!Cmarkit.ext_list_task_items}list task items}.*) + + val task_status_of_task_marker : + Uchar.t -> [ `Unchecked | `Checked | `Cancelled | `Other of Uchar.t ] + (** [task_status_of_task_marker u] is a status for marker [u], see extension + {{!ext_list_task_items}list task item}. *) + end + + (** Lists. *) + module List' : sig + + type type' = + [ `Unordered of Layout.char (** with given marker. *) + | `Ordered of int * Layout.char + (** starting at given integer, markers ending with given character + ([')'] or ['.']). *) ] + (** The type for list types. *) + + type t + (** The type for {{:https://spec.commonmark.org/0.30/#lists}lists}. *) + + val make : ?tight:bool -> type' -> List_item.t node list -> t + (** [make ?tight t items] is a list with given parameters. + tight default to [true], but should be computed from [items] + in practice. *) + + val type' : t -> type' + (** [type' l] is the list type of [l]. *) + + val tight : t -> bool + (** [tight l] is [true] iff the list is + {{:https://spec.commonmark.org/0.30/#tight}tight}. *) + + val items : t -> List_item.t node list + (** [items l] are the items of [l]. *) + end + + (** Paragraphs. *) + module Paragraph : sig + + type t + (** The type for + {{:https://spec.commonmark.org/0.30/#paragraphs}paragraphs}. *) + + val make : + ?leading_indent:Layout.indent -> ?trailing_blanks:Layout.blanks -> + Inline.t -> t + (** [make inline] is a paragraph with given parameters. *) + + val inline : t -> Inline.t + (** [inline p] is the paragraph content. *) + + val leading_indent : t -> Layout.indent + (** [leading_indent p] is the indent on the first line (0-3). *) + + val trailing_blanks : t -> Layout.blanks + (** [trailing_blanks] are trailing blanks on the last line. *) + end + + (** Thematic breaks. *) + module Thematic_break : sig + + type t + (** The type for {{:https://spec.commonmark.org/0.30/#thematic-break} + thematic breaks}. *) + + val make : ?indent:Layout.indent -> ?layout:Layout.string -> unit -> t + (** [make ()] is a thematic break with given parameters. [layout] + defaults to ["---"]. *) + + val indent : t -> Layout.indent + (** [indent t] is the thematic break indent (0-3). *) + + val layout : t -> Layout.string + (** [layout t] is the thematic break art, including trailing blanks. *) + end + + type t += + | Blank_line of Blank_line.t node + | Block_quote of Block_quote.t node + | Blocks of t list node (** Splicing *) + | Code_block of Code_block.t node + | Heading of Heading.t node + | Html_block of Html_block.t node + | Link_reference_definition of Link_definition.t node + (** {{:https://spec.commonmark.org/0.30/#link-reference-definitions} + Link reference definitions}, kept for layout *) + | List of List'.t node + | Paragraph of Paragraph.t node + | Thematic_break of Thematic_break.t node + (** {{:https://spec.commonmark.org/0.30/#paragraphs}Thematic break} *) + (** The CommonMark {{:https://spec.commonmark.org/0.30/#leaf-blocks}leaf} + and {{:https://spec.commonmark.org/0.30/#container-blocks}container} + blocks. *) + + val empty : t + (** [empty] is [Blocks ([], Meta.none)]. *) + + (** {1:exts Extensions} + + See the description of {{!Cmarkit.extensions}extensions}. *) + + (** Tables. *) + module Table : sig + + type align = [ `Left | `Center | `Right ] + (** The type for column alignments. *) + + type sep = align option * Layout.count + (** The type for separators. The column aligment and the number of + [-] for layout preservation. *) + + type cell_layout = Layout.blanks * Layout.blanks + (** The type for cell layout, initial and trailing blanks. *) + + type row = + [ `Header of (Inline.t * cell_layout) list + | `Sep of sep node list + | `Data of (Inline.t * cell_layout) list ] + (** The type for rows. The lists only have entries for columns as + found in rows in the document. You need to pad them on the + right with more columns to reach the table's {!col_count}. *) + + type t + (** The type for {{!Cmarkit.ext_tables}tables}. *) + + val make : ?indent:Layout.indent -> (row node * Layout.blanks) list -> t + (** [make rows] is a table row [rows]. *) + + val indent : t -> Layout.indent + (** [indent t] is the indentation to the first pipe found on the + first row. *) + + val col_count : t -> int + (** [col_count t] is the number of columns in the table. *) + + val rows : t -> (row node * Layout.blanks) list + (** [rows t] are the table's rows. *) + end + + (** Footnotes. *) + module Footnote : sig + + (** {1:footnotes Footnotes} *) + + type block := t + + type t + (** The type for {{!Cmarkit.ext_footnotes}footnotes}. *) + + val make : + ?indent:Layout.indent -> ?defined_label:Label.t option -> Label.t -> + block -> t + (** [make label b] is a footnote for label [label] with content [b]. + [defined_label] defaults to [label]. *) + + val indent : t -> Layout.indent + (** [indent fn] is the indentation to the label found on the first line. *) + + val label : t -> Label.t + (** [label fn] is the footnote definition label as found in the + source text. It includes the [^]. See also {!defined_label}. *) + + val defined_label : t -> Label.t option + (** [defined_label fn] is the label determined by the {!Label.type-resolver} + for the footnote. The label as found in the source text is in {!label}. + If this is [None] the resolver deleted the label definition. *) + + val block : t -> block + (** [block fn] is the footnote content. *) + + (** {1:labeldef As label definitions} *) + + type Label.def += Def of t node (** *) + (** A label definition for footnotes. *) + end + + type t += + | Ext_math_block of Code_block.t node + (** {{!Cmarkit.ext_math_display}display math}*) + | Ext_table of Table.t node (** *) + | Ext_footnote_definition of Footnote.t node (** *) + (** The supported block extensions. These blocks are only parsed when + {!Doc.of_string} is called with [strict:false]. *) + + (** {1:funs Functions on blocks} *) + + val meta : ?ext:(t -> Meta.t) -> t -> Meta.t + (** [meta ~ext b] is the metadata of [b]. + + [ext] is called on cases not defined in this module. The default + raies [Invalid_argument]. *) + + val normalize : ?ext:(t -> t) -> t -> t + (** [normalize b] has the same content as [b] but is such that for any + occurence of [Blocks (bs, _)] in [b] the list of blocks [bs]: + + {ol + {- [bs] is not a singleton list.} + {- Has no [Blocks _] case. The meta is dropped and the nested + blocks are spliced in [bs] where the case occurs.}} + + [ext] is called on cases not defined in this module. The default raises + [Invalid_argument]. *) + + val defs : + ?ext:(Label.defs -> t -> Label.defs) -> ?init:Label.defs -> t -> + Label.defs + (** [defs b] collects [b]'s {!Link_reference_definition} and + {!Ext_footnote_definition} and for those that have a label + definition (see {!Link_definition.defined_label} and + {!Footnote.defined_label}) + adds them to [init] (defaults to {!Label.Map.empty}). + + [ext] is called on cases not defined in this module. The default + raises [Invalid_argument]. *) +end + +(** Documents (and parser). *) +module Doc : sig + + (** {1:docs Documents} *) + + type t + (** The type for CommonMark documents. *) + + val nl : t -> Layout.string + (** [nl d] is the first newline found in the text during parsing + or ["\n"] if there was none. *) + + val block : t -> Block.t + (** [block d] is the document's contents as a block. *) + + val defs : t -> Label.defs + (** [defs d] are the label definitions resulting from parsing [d]. + The result depends on the label definitions found in the + source and the [defs] and [resolver] values specified on + {!Doc.of_string}. *) + + val make : ?nl:Layout.string -> ?defs:Label.defs -> Block.t -> t + (** [make ~nl ~defs b] is a document for block [b] with newline + [nl] (defaults to ["\n"]), label definition [defs] + (defaults to {!Label.Map.empty}). *) + + val empty : t + (** [empty] is an empty document. *) + + (** {1:parsing Parsing} *) + + val of_string : + ?defs:Label.defs -> ?resolver:Label.resolver -> ?nested_links:bool -> + ?heading_auto_ids:bool -> ?layout:bool -> ?locs:bool -> + ?file:Textloc.fpath -> ?strict:bool -> string -> t + (** [of_string md] is a document from the UTF-8 encoded CommonMark + document [md]. + + {ul + {- If [strict] is [true] (default) the CommonMark specification is + followed. If [false] these {{!extensions}extensions} are enabled.} + {- [file] is the file path from which [s] is assumed to have been read + (defaults to {!Textloc.file_none}), used in the {!Textloc.t} + values iff [locs] is [true].} + {- If [locs] is [true] locations are stored in nodes of the abstract + syntax tree in individually {{!Meta.val-id}identified} {!Meta.t} + values. If [false] (default) node meta values are all {!Meta.none} + whose text location is {!Textloc.none}.} + {- If [layout] is [false] (default) layout values cannot be relied + upon and do not in general represent source layout, some fields + may be normalized. The {!Block.extension-Blank_line}, + {!Block.extension-Link_reference_definition}, + {!Block.extension-Ext_footnote_definition}, + layout block values are present in the result regardless of this + parameter.} + {- If [heading_auto_ids] is [true] (defaults to [false]) then [`Auto] + {{!Block.Heading.type-id}heading identifiers} are generated + during parsing from the header text + with {!Inline.id} (at that point no [ext] argument is needed) + and made accessible in {!Block.Heading.val-id}. Note that the identifiers + may not be unique, we leave it to the backends to handle this + problem.} + {- If [nested_links] is [true] (defaults to [false]) there is no + restriction on having links in link text, which is forbidden by + CommonMark and HTML. This can be useful for + embedding DSLs in link labels or destinations. Note that image + links already allow link nesting as per CommonMark + specification.} + {- If [resolver] is provided this is used resolve label definitions + and references. See {{!Label.resolvers}here} for details. Defaults to + {!Label.default_resolver}.} + {- If [defs] adds these label definitions to the document + (defaults to {!Label.Map.empty}). Think of them + as being prepended to [md]. If [resolver] is + {!Label.default_resolver}, these take over the same labels + defined in [md] (first definition takes over in CommonMark).}} + + UTF-8 decoding errors and U+0000 are turned into {!Uchar.rep} + characters. Inlines of the result should be {!Inline.normalize}d. + Blocks of the result should be {!Block.normalize}d. + + {b Note.} For simple renders parsing with [layout:false] and + [locs:false] is generally faster; having these to [true] + allocates quite a bit. *) + + (** {1:versions Versions} *) + + val unicode_version : string + (** [unicode_version] is the Unicode version known to {!of_string}. *) + + val commonmark_version : string + (** [commonmark_version] is the CommonMark version known to {!of_string}. *) +end + +(** {1:maps_and_folds Maps and folds} *) + +(** Abstract syntax tree mappers. + + Mappers help with pushing abstract syntax tree transformations in every + node with a minimal amount of code by defaulting the cases you + don't handle. The default map maps leaves to themselves and + otherwise propagates the map to all childrens. + + This map has the form of {!List.filter_map}, however it is akin + to {!List.concat_map} as it allows: + + {ul + {- Node deletion by mapping to [None]} + {- Node transformation by mapping to [Some _]} + {- Node expansion by mapping to [Some (Inlines _)] or [Some (Blocks _)]}} + + See an {{!Mapper.example}example}. *) +module Mapper : sig + + (** {1:results Map results} *) + + type 'a filter_map = 'a option + (** The type for maps. [None] is for node deletion. [Some n] is a map + to [n]. *) + + type 'a result = + [ `Default (** Do the default map. *) | `Map of 'a filter_map ] + (** The type for mapper results. *) + + val default : 'a result + (** [default] is [`Default]. *) + + val delete : 'a result + (** [delete] is [`Map None]. *) + + val ret : 'a -> 'a result + (** [ret v] is [`Map (Some v)]. *) + + (** {1:mappers Mappers} *) + + type t + (** The type for abstract syntax tree mappers. *) + + type 'a map = t -> 'a -> 'a filter_map + (** The type for maps on values of type ['a]. *) + + type 'a mapper = t -> 'a -> 'a result + (** The type for mappers on values of type ['a]. + + This is what you specify. Return [`Default] if you are not + interested in handling the given case. Use {!map_inline} or + {!map_block} with the given mapper if you need to call the + mapper recursively. *) + + val make : + ?inline_ext_default:Inline.t map -> ?block_ext_default:Block.t map -> + ?inline:Inline.t mapper -> ?block:Block.t mapper -> unit -> t + (** [make ?inline ?block ()] is a mapper using [inline] and [block] + to map the abstract syntax tree. Both default to [fun _ _ -> `Default]. + + The mapper knows how to default the built-in abstract syntax + tree and the built-in {{!extensions}extensions}. It maps + them in document and depth-first order. + + If you extend the abstract syntax tree you need to indicate how to default + these new cases by providing [inline_ext_default] or + [block_ext_default] functions. By default these functions raise + [Invalid_argument]. *) + + (** {1:mapping Mapping} *) + + val map_inline : Inline.t map + (** [map_inline m i] maps [i] with [m]. *) + + val map_block : Block.t map + (** [map_block m b] maps [b] with [m]. *) + + val map_doc : t -> Doc.t -> Doc.t + (** [map_doc m d] maps [Doc.block d] with [m]. If the document + block maps to [None] is replaced by {!Block.empty}. + + {b Warning unstable.} The following may change in the future. + This function also maps the blocks present {!Block.Footnote.Def} + label definitions but will not map inline or block data in + {!Label.def} cases unknown to [Cmarkit]. If the block maps to + [None] for the footnote it is replaced by {!Block.empty}. + + Also note that if these label definitions were defined in [d]'s + abstract syntax tree, they will also already be + mapped in {!Block.Link_reference_definition} and + {!Block.Ext_footnote_definition} cases. It is possible to collect + these mapped definitions via {!Block.defs} on the resulting + document's block. *) + + (** {1:accessors Accessors} *) + + val inline_mapper : t -> Inline.t mapper + (** [inline m] is the inline mapper of [m]. *) + + val block_mapper : t -> Block.t mapper + (** [block m] is the block mapper of [m]. *) + + val inline_ext_default : t -> Inline.t map + (** [inline_ext_default m] is the inline extensions defaulter of [m] *) + + val block_ext_default : t -> Block.t map + (** [block_ext_default m] is the block extensions defaulter of [m]. *) + + (** {1:example Example} + + This example sets all code blocks of document [doc] without info string + to [lang]. + {[ +let set_unknown_code_block_lang ~lang doc = + let open Cmarkit in + let default = lang, Meta.none in + let block m = function + | Block.Code_block (cb, meta) + when Option.is_none (Block.Code_block.info_string cb) -> + let layout = Block.Code_block.layout cb in + let code = Block.Code_block.code cb in + let cb = Block.Code_block.make ~layout ~info_string:default code in + Mapper.ret (Block.Code_block (cb, meta)) + | _ -> + Mapper.default (* let the mapper thread the map *) + in + let mapper = Mapper.make ~block () in + Mapper.map_doc mapper doc +]} + *) +end + +(** Abstract syntax tree folders. + + Folders help with pushing abstract syntax tree folds in every node + with a minimal amount of code by defaulting the cases you don't handle. + The default fold returns the accumulator unchanged on leaves and otherwise + propagates the fold to all children. + + See an {{!Folder.example}example}. *) +module Folder : sig + + (** {1:results Fold results} *) + + type 'a result = [ `Default (** Do the default fold *) | `Fold of 'a ] + (** The type for folder results. The [`Default] case indicates the folder + to perform the default fold. *) + + val default : 'a result + (** [default] is [`Default]. *) + + val ret : 'a -> 'a result + (** [ret v] is [`Fold v]. *) + + (** {1:folders Folders} *) + + type 'a t + (** The type for abstract syntax tree folders with result values of type + ['a]. *) + + type ('a, 'b) fold = 'b t -> 'b -> 'a -> 'b + (** The type for folds on values of type ['a]. *) + + type ('a, 'b) folder = 'b t -> 'b -> 'a -> 'b result + (** The type for folders on value of type ['a]. + + This is what you specify. Return [`Default] if you are not + interested in handling the given case. Use {!fold_inline} or + {!fold_block} with the given folder if you need to call the + folder recursively. *) + + val make : + ?inline_ext_default:(Inline.t, 'a) fold -> + ?block_ext_default:(Block.t, 'a) fold -> + ?inline:(Inline.t, 'a) folder -> ?block:(Block.t, 'a) folder -> unit -> 'a t + (** [make ?inline ?block ()] is a folder using [inline] and [block] + to fold the abstract syntax tree. Both default to + [fun _ _ _ -> `Default]. + + The folder knows how to default the built-in abstract syntax tree + and the built-in {{!extensions}extensions}. It folds + them in document and depth-first order. + + If you extend the abstract syntax tree you need to indicate how + to default these new cases by providing [inline_ext_default] or + [block_ext_default] functions. By default these functions raise + [Invalid_argument]. *) + + (** {1:folding Folding} *) + + val fold_inline : 'a t -> 'a -> Inline.t -> 'a + (** [fold_inline f acc i] folds [i] with [f] starting with [acc]. *) + + val fold_block : 'a t -> 'a -> Block.t -> 'a + (** [fold_block f acc b] folds [b] with [f] starting with [acc]. *) + + val fold_doc : 'a t -> 'a -> Doc.t -> 'a + (** [fold_doc f acc d] folds [Doc.block d] with [f] starting with [acc]. + + {b Warning.} Blocks present in [d]'s {!Doc.defs} is not folded + over. Note however that if these definitions were defined by + [d]'s abstract syntax tree, they will already have been folded + over on {!Block.Link_reference_definition} and + {!Block.Ext_footnote_definition} cases. *) + + (** {1:acesssors Accessors} *) + + val inline_folder : 'a t -> (Inline.t, 'a) folder + (** [inline_folder f] is the inline folder of [f]. *) + + val block_folder : 'a t -> (Block.t, 'a) folder + (** [block_folder f] is the block folder of [f]. *) + + val inline_ext_default : 'a t -> (Inline.t, 'a) fold + (** [inline_ext_default f] is the inline extension defaulter of [f]. *) + + val block_ext_default : 'a t -> (Block.t, 'a) fold + (** [block_ext_default f] is the block extension defaulter of [f]. *) + + (** {1:example Example} + + This example collects the languages present in the code blocks + of a document. +{[ +let code_block_langs doc = + let open Cmarkit in + let module String_set = Set.Make (String) in + let block m acc = function + | Block.Code_block (cb, _) -> + let acc = match Block.Code_block.info_string cb with + | None -> acc + | Some (info, _) -> + match Block.Code_block.language_of_info_string info with + | None -> acc + | Some (lang, _) -> String_set.add lang acc + in + Folder.ret acc + | _ -> + Folder.default (* let the folder thread the fold *) + in + let folder = Folder.make ~block () in + let langs = Folder.fold_doc folder String_set.empty doc in + String_set.elements langs +]} *) +end + +(** {1:extensions Extensions} + + For some documents, bare CommonMark just misses it. The extensions + are here to make it hit the mark. To enable them use + {!Doc.of_string} with [strict:false]. + + Please note the following: + {ol + {- There is no plan to provide an extension mechanism at the + parsing level. A lot can already be achieved by using + {{!Label.resolvers}reference resolvers}, + abusing code fences, post-processing the abstract syntax tree, or + {{!Cmarkit_renderer.example}extending} the renderers.} + {- In order to minimize dialects and extension interaction + oddities, there is no plan to allow to selectively + enable extensions.} + {- If one day the CommonMark specification standardizes a set + of extensions. [Cmarkit] will support those.} + {- In the short term, there is no plan to support more extensions than + those that are listed here.}} + + {2:ext_strikethrough Strikethrough} + + According to {{:https://pandoc.org/MANUAL.html#strikeout}[pandoc]}. + + {v Strikethrough your ~~perfect~~ imperfect thoughts. v} + + Inline text delimited between two [~~] gets into an + {!Inline.extension-Ext_strikethrough} node. + + The text delimited by [~~] cannot start or end with + {{:https://spec.commonmark.org/0.30/#unicode-whitespace-character} + Unicode whitespace}. When a closer can close multiple openers, the + neareast opener is closed. Strikethrough inlines can be nested. + + {2:ext_math Math} + + According to a mix of + {{:https://pandoc.org/MANUAL.html#extension-tex_math_dollars} + [pandoc]}, {{:https://docs.gitlab.com/ee/user/markdown.html#math}GLFM}, + {{:https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/writing-mathematical-expressions}GFM}. + + {3:ext_math_inline Inline math} + + {v This is an inline $\sqrt(x - 1)$ math expression. v} + + Inline text delimited between [$] gets into an + {!Inline.extension-Ext_math_span} node. + + The text delimited by [$] cannot start and end with + {{:https://spec.commonmark.org/0.30/#unicode-whitespace-character} + Unicode whitespace}. Inline math cannot be nested, after an opener + the nearest (non-escaped) closing delimiter matches. Otherwise it + is parsed in essence like a + {{:https://spec.commonmark.org/0.30/#code-spans}code span}. + + {3:ext_math_display Display math} + + {v +It's better to get that $$ \left( \sum_{k=1}^n a_k b_k \right)^2 $$ +on its own line. A math block may also be more convenient: + +```math +\left( \sum_{k=1}^n a_k b_k \right)^2 < \Phi +``` +v} + + Inline text delimited by [$$] gets into a + {!Inline.extension-Ext_math_span} with the + {!Inline.Math_span.display} property set to [true]. Alternatively + code blocks whose + {{!Block.Code_block.language_of_info_string}language} is [math] + get into in {!Block.Ext_math_block} blocks. + + In contrast to [$], the text delimited by [$$] can start and end + with whitespace, however it can't contain a blank line. Display + math cannot be nested, after an opener the nearest (non-escaped) + closing delimiter matches. Otherwise it's parsed in essence like + a {{:https://spec.commonmark.org/0.30/#code-spans}code span}. + + {2:ext_list_task_items List task items} + + According to a mix of + {{:https://github.com/mity/md4c/blob/master/test/tasklists.txt}md4c}, + {{:https://docs.gitlab.com/ee/user/markdown.html#task-lists}GLFM}, + {{:https://github.github.com/gfm/#task-list-item}GFM} and personal + ad-hoc brewery. + +{v +* [ ] That's unchecked. +* [x] That's checked. +* [~] That's cancelled. +v} + + If a list item starts with up to three space, followed by followed + by [\[], a single Unicode character, [\]] and a space (the space + can be omitted if the line is empty, but subsequent indentation + considers there was one). The Unicode character gets stored in + {!Block.List_item.ext_task_marker} and counts as one column + regardless of the character's render width. The task marker + including the final space is considered part of the list marker as + far as subsequent indentation is concerned. + + The Unicode character indicates the status of the task. That's up + to the client but the function + {!Block.List_item.task_status_of_task_marker} which is used by the + built-in renderers makes the following choices: + + {ul + {- Unchecked: [' '] (U+0020).} + {- Checked: ['x'] (U+0078), ['X'] (U+0058), ['✓'] (U+2713, CHECK MARK), + ['✔'] (U+2714, HEAVY CHECK MARK), ['𐄂'] (U+10102, AEGEAN CHECK MARK), + ['🗸'] (U+1F5F8, LIGHT CHECK MARK).} + {- Cancelled: ['~'] (U+007E).} + {- Other: any other character, interpretation left to clients or + renderers (built-in ones equate it with done).}} + + {2:ext_tables Tables} + + According to {{:https://htmlpreview.github.io/?https://github.com/jgm/djot/blob/master/doc/syntax.html#pipe-table}djot}. + +{v +| # | Name | Description | Link | +|:-:|----------:|:----------------------|------------------------:| +| 1 | OCaml | The OCaml website | | +| 2 | Haskell | The Haskell website | | +| 3 | MDN | Web dev docs | | +| 4 | Wikipedia | The Free Encyclopedia | | +v} + + A table is a sequence of rows, each row starts and ends with a + (non-escaped) pipe [|] character. The first row can't be indented + by more than three spaces of indentation, subsequent rows can be + arbitrarily indented. Blanks after the final pipe are allowed. + + Each row of the table contains cells separated by (non-escaped) + pipe [|] characters. Pipes embedded in inlines constructs do not + count as separators (the parsing strategy is to parse the row as + an inline, split the result on the [|] present in {e toplevel} + text nodes and strip initial and trailing blanks in cells). The + number of [|] separators plus 1 determines the number of columns + of a row. The number of columns of a table is the greatest number + of columns of its rows. + + A separator line is a row in which every cell content is made only + of one or more [-] optionally prefixed and suffixed by [:]. These + rows are not data, they indicate alignment of data in their cell + for subsequent rows (multiple separator lines in a single table + are allowed) and that the previous line (if any) was a row of + column headers. [:-] is left aligned [-:] is right aligned, [:-:] + is centered. If there's no alignement specified it's left + aligned. + + Tables are stored in {!Block.extension-Ext_table} nodes. + + {2:ext_footnotes Footnotes} + + According to {{:https://htmlpreview.github.io/?https://github.com/jgm/djot/blob/master/doc/syntax.html#footnotes}djot} for the footnote contents. + +{v +This is a footnote in history[^1] with mutiple references[^1]. +Footnotes are not [very special][^1] references. + + [^1]: Footnotes can have +lazy continuation lines and multiple paragraphs. + + If you start one column after the left bracket, blocks still get + into the footnote. + + But this is no longer the footnote. +v} + + Footnotes go through the label resolution mecanism and share the + same namespace as link references (including the [^]). They end up + being defined in the {!Doc.defs} as {!Block.Footnote.Def} + definitions. Footnote references are simply made by using + {!Inline.extension-Link} with the corresponding labels. + + {3:ext_footnote_def Definition} + + A footnote definition starts with a (single line) + {{:https://spec.commonmark.org/0.30/#link-label}link label} + followed by [:]. The label must start with a [^]. Footnote labels + go through the label {{!Label.resolvers}resolution} mechanism. + + All subsequent lines indented one column further than the start of + the label (i.e. starting on the [^]) get into the footnote. Lazy + continuation lines are supported. + + The result is stored in the document's {!Doc.defs} in + {!Block.Footnote.Def} cases and it's position in the documentation + witnessed by a {!Block.extension-Ext_footnote_definition} node which + is kept for layout. + + {3:ext_footnote_ref References} + + Footnote references are simply reference links with the footnote + label. Linking text on footnotes is allowed. Shortcut and + collapsed references to footnotes are rendered specially by + {!Cmarkit_html}. *) + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_base.ml b/ocaml-lsp-server/vendor/cmarkit/cmarkit_base.ml new file mode 100644 index 000000000..81b79557d --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_base.ml @@ -0,0 +1,1360 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +(* N.B. The doc strings of the .mli can help understanding these internal + functions. *) + +let sub_includes ~affix s ~first ~last = + let get = String.get in + let len_a = String.length affix in + let len_s = last - first + 1 in + if len_a > len_s then false else + let max_idx_a = len_a - 1 in + let max_idx_s = first + (len_s - len_a) in + let rec loop i k = + if i > max_idx_s then false else + if k > max_idx_a then true else + if k > 0 + then if get affix k = get s (i + k) then loop i (k + 1) else loop (i + 1) 0 + else if get affix 0 = get s i then loop i 1 else loop (i + 1) 0 + in + loop first 0 + +let unsafe_get = String.unsafe_get + +module String_set = Set.Make (String) + +(* Heterogeneous dictionaries *) + +module Dict = struct + (* Type identifiers, can be deleted once we require 5.1 *) + module Type = struct + type (_, _) eq = Equal : ('a, 'a) eq + module Id = struct + type _ id = .. + module type ID = sig type t type _ id += Id : t id end + type 'a t = (module ID with type t = 'a) + + let make (type a) () : a t = + (module struct type t = a type _ id += Id : t id end) + + let provably_equal + (type a b) ((module A) : a t) ((module B) : b t) : (a, b) eq option + = + match A.Id with B.Id -> Some Equal | _ -> None + + let uid (type a) ((module A) : a t) = + Obj.Extension_constructor.id (Obj.Extension_constructor.of_val A.Id) + end + end + + module M = Map.Make (Int) + type 'a key = 'a Type.Id.t + type binding = B : 'a key * 'a -> binding + type t = binding M.t + + let key = Type.Id.make + let empty = M.empty + let mem k m = M.mem (Type.Id.uid k) m + let add k v m = M.add (Type.Id.uid k) (B (k, v)) m + let tag k m = add k () m + let remove k m = M.remove (Type.Id.uid k) m + let find : type a. a key -> t -> a option = + fun k m -> match M.find_opt (Type.Id.uid k) m with + | None -> None + | Some B (k', v) -> + match Type.Id.provably_equal k k' with + | None -> assert false | Some Type.Equal -> Some v +end + +(* Text locations *) + +module Textloc = struct + + (* File paths *) + + type fpath = string + let file_none = "-" + let pp_path = Format.pp_print_string + + (* Byte positions *) + + type byte_pos = int (* zero-based *) + let byte_pos_none = -1 + + (* Lines *) + + type line_num = int (* one-based *) + let line_num_none = -1 + + (* Line positions + + We keep the byte position of the first element on the line. This + first element may not exist and be equal to the text length if + the input ends with a newline. Editors expect tools to compute + visual columns (not a very good idea). By keeping these byte + positions we can approximate columns by subtracting the line byte + position data byte location. This will only be correct on + US-ASCII data. *) + + type line_pos = line_num * byte_pos + let line_pos_first = 1, 0 + let line_pos_none = line_num_none, byte_pos_none + + (* Text locations *) + + type t = + { file : fpath; + first_byte : byte_pos; last_byte : byte_pos; + first_line : line_pos; last_line : line_pos } + + let v ~file ~first_byte ~last_byte ~first_line ~last_line = + { file; first_byte; last_byte; first_line; last_line } + + let file l = l.file + let first_byte l = l.first_byte + let last_byte l = l.last_byte + let first_line l = l.first_line + let last_line l = l.last_line + let none = + let first_byte = byte_pos_none and last_byte = byte_pos_none in + let first_line = line_pos_none and last_line = line_pos_none in + v ~file:file_none ~first_byte ~last_byte ~first_line ~last_line + + (* Predicates and comparisons *) + + let is_none l = l.first_byte < 0 + let is_empty l = l.first_byte > l.last_byte + let equal l0 l1 = + String.equal l0.file l1.file && + Int.equal l0.first_byte l1.first_byte && + Int.equal l0.last_byte l1.last_byte + + let compare l0 l1 = + let c = String.compare l0.file l1.file in + if c <> 0 then c else + let c = Int.compare l0.first_byte l1.first_byte in + if c <> 0 then c else + Int.compare l0.last_byte l1.last_byte + + (* Shrink and stretch *) + + let set_first l ~first_byte ~first_line = { l with first_byte; first_line } + let set_last l ~last_byte ~last_line = { l with last_byte; last_line } + + [@@@warning "-6"] + let to_first l = v l.file l.first_byte l.first_byte l.first_line l.first_line + let to_last l = v l.file l.last_byte l.last_byte l.last_line l.last_line + let before l = v l.file l.first_byte byte_pos_none l.first_line line_pos_none + let after l = + v l.file (l.first_byte + 1) byte_pos_none l.last_line line_pos_none + [@@@warning "+6"] + + let span l0 l1 = + let first_byte, first_line = + if l0.first_byte < l1.first_byte + then l0.first_byte, l0.first_line + else l1.first_byte, l1.first_line + in + let last_byte, last_line, file = + if l0.last_byte < l1.last_byte + then l1.last_byte, l1.last_line, l1.file + else l0.last_byte, l0.last_line, l0.file + in + v ~file ~first_byte ~first_line ~last_byte ~last_line + + [@@@warning "-6"] + let reloc ~first ~last = + v last.file first.first_byte last.last_byte first.first_line last.last_line + [@@@warning "+6"] + + (* Formatters *) + + let pf = Format.fprintf + let pp_ocaml ppf l = match is_none l with + | true -> pf ppf "File \"%a\"" pp_path l.file + | false -> + let pp_lines ppf l = match fst l.first_line = fst l.last_line with + | true -> pf ppf "line %d" (fst l.first_line) + | false -> pf ppf "lines %d-%d" (fst l.first_line) (fst l.last_line) + in + (* "characters" represent positions (insertion points) not columns *) + let pos_s = l.first_byte - snd l.first_line in + let pos_e = l.last_byte - snd l.last_line + 1 in + if pos_s = 0 && pos_e = 0 + then pf ppf "File \"%a\", %a" pp_path l.file pp_lines l + else pf ppf "File \"%a\", %a, characters %d-%d" + pp_path l.file pp_lines l pos_s pos_e + + let pp_gnu ppf l = match is_none l with + | true -> pf ppf "%a:" pp_path l.file + | false -> + let pp_lines ppf l = + let col_s = l.first_byte - snd l.first_line + 1 in + let col_e = l.last_byte - snd l.last_line + 1 in + match fst l.first_line = fst l.last_line with + | true -> pf ppf "%d.%d-%d" (fst l.first_line) col_s col_e + | false -> + pf ppf "%d.%d-%d.%d" + (fst l.first_line) col_s (fst l.last_line) col_e + in + pf ppf "%a:%a" pp_path l.file pp_lines l + + let pp = pp_gnu + + let pp_dump ppf l = + pf ppf "file:%s bytes:%d-%d lines:%d-%d lines-bytes:%d-%d]" + l.file l.first_byte l.last_byte (fst l.first_line) (fst l.last_line) + (snd l.first_line) (snd l.last_line) +end + +type line_span = + { line_pos : Textloc.line_pos; + first : Textloc.byte_pos; + last : Textloc.byte_pos } + +type line_start = Textloc.byte_pos +type rev_spans = (line_start * line_span) list +type 'a next_line = 'a -> ('a * line_span) option + +(* Node meta data *) + +module Meta = struct + type id = int + type t = { textloc : Textloc.t; id : id; dict : Dict.t } + + let new_id = let id = Atomic.make 0 in fun () -> Atomic.fetch_and_add id 1 + let make ?(textloc = Textloc.none) () = + { textloc; id = new_id (); dict = Dict.empty } + + let none = make () + let id m = m.id + let textloc m = m.textloc + let with_textloc ~keep_id m textloc = match keep_id with + | true -> { m with textloc } + | false -> { m with textloc; id = new_id () } + + let equal m0 m1 = Int.equal m0.id m1.id + let compare m0 m1 = Int.compare m0.id m1.id + let is_none m = equal none m + + type 'a key = 'a Dict.key + let key = Dict.key + let mem k m = Dict.mem k m.dict + let add k v m = { m with dict = Dict.add k v m.dict } + let tag k m = add k () m + let remove k m = { m with dict = Dict.remove k m.dict } + let find k m = Dict.find k m.dict +end + +(* US-ASCII processing *) + +module Ascii = struct + let is_control = function '\x00' .. '\x1F' | '\x7F' -> true | _ -> false + let is_letter = function 'a' .. 'z' | 'A' .. 'Z' -> true | _ -> false + let is_upper = function 'A' .. 'Z' -> true | _ -> false + let is_lower = function 'a' .. 'z' -> true | _ -> false + let is_digit = function '0' .. '9' -> true | _ -> false + let is_hex_digit = function + | '0' .. '9' | 'A' .. 'F' | 'a' .. 'f' -> true | _ -> false + + let hex_digit_to_int = function + | '0' .. '9' as c -> Char.code c - 0x30 + | 'A' .. 'F' as c -> Char.code c - 0x37 + | 'a' .. 'f' as c -> Char.code c - 0x57 + | _ -> assert false + + let is_alphanum = function + | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true | _ -> false + + let is_white = function + | '\x20' | '\x09' | '\x0A' | '\x0B' | '\x0C' | '\x0D' -> true | _ -> false + + let is_punct = function + (* https://spec.commonmark.org/current/#ascii-punctuation-character *) + | '!' | '\"' | '#' | '$' | '%' | '&' | '\'' | '(' | ')' | '*' | '+' + | ',' | '-' | '.' | '/' | ':' | ';' | '<' | '=' | '>' | '?' | '@' + | '[' | '\\' | ']' | '^' | '_' | '`' | '{' | '|' | '}' | '~' -> true + | _ -> false + + let is_blank = function ' ' | '\t' -> true | _ -> false + + let caseless_starts_with ~prefix s = + let get = String.get in + let len_a = String.length prefix in + let len_s = String.length s in + if len_a > len_s then false else + let max_idx_a = len_a - 1 in + let rec loop s i max = + if i > max then true else + let c = match get s i with + | 'A' .. 'Z' as c -> Char.(unsafe_chr (code c + 32)) | c -> c + in + if get prefix i <> c then false else loop s (i + 1) max + in + loop s 0 max_idx_a + + let match' ~sub s ~start = + (* assert (start + String.length sub - 1 < String.length s) *) + try + for i = 0 to String.length sub - 1 + do if s.[start + i] <> sub.[i] then raise_notrace Exit done; true + with + | Exit -> false + + let caseless_match ~sub s ~start = + (* assert (start + String.length sub - 1 < String.length s) *) + try + for i = 0 to String.length sub - 1 do + let c = match s.[start + i] with + | 'A' .. 'Z' as c -> Char.(unsafe_chr (code c + 32)) | c -> c + in + if c <> sub.[i] then raise_notrace Exit + done; + true + with + | Exit -> false + + let lowercase_sub s first len = + let b = Bytes.create len in + for i = 0 to len - 1 do + let c = match s.[first + i] with + | 'A' .. 'Z' as c -> Char.(unsafe_chr (code c + 32)) | c -> c + in + Bytes.set b i c + done; + Bytes.unsafe_to_string b +end + +module Text = struct + let _utf_8_clean_unesc_unref ~do_unesc buf s ~first ~last = + (* This unescapes CommonMark escapes if [do_unesc] is true, + resolves entity and character references and replaces U+0000 or + UTF-8 decoding errors by U+FFFD *) + let get = String.get in + let flush buf s last start k = + if start <= last then Buffer.add_substring buf s start (k - start) + in + let rec try_entity_hex ~do_unesc buf s last start num_start k u = + (* https://spec.commonmark.org/current/\ + #hexadecimal-numeric-character-references *) + if k > last || k > num_start + 6 + then resolve ~do_unesc buf s last start k else + match get s k with + | ';' -> + let next = k + 1 in + if k = num_start then resolve ~do_unesc buf s last start next else + let u = + if Uchar.is_valid u && u <> 0 then Uchar.unsafe_of_int u else + Uchar.rep + in + flush buf s last start (num_start - 3 (* don't include &#(x|X) *)); + Buffer.add_utf_8_uchar buf u; + resolve ~do_unesc buf s last next next + | c when Ascii.is_hex_digit c -> + let u = u * 16 + (Ascii.hex_digit_to_int c) in + try_entity_hex ~do_unesc buf s last start num_start (k + 1) u + | _ -> + resolve ~do_unesc buf s last start k + and try_entity_dec ~do_unesc buf s last start num_start k u = + if k > last || k > num_start + 7 + then resolve ~do_unesc buf s last start k else + match get s k with + | ';' -> + let next = k + 1 in + if k = num_start then resolve ~do_unesc buf s last start next else + let u = + if Uchar.is_valid u && u <> 0 then Uchar.unsafe_of_int u else + Uchar.rep + in + flush buf s last start (num_start - 2 (* don't include &# *)); + Buffer.add_utf_8_uchar buf u; + resolve ~do_unesc buf s last next next + | c when Ascii.is_digit c -> + let u = u * 10 + (Char.code c - 0x30) in + try_entity_dec ~do_unesc buf s last start num_start (k + 1) u + | _ -> + resolve ~do_unesc buf s last start k + and try_entity_named ~do_unesc buf s last start name_start k = + (* https://spec.commonmark.org/current/\ + #entity-and-numeric-character-references *) + if k > last then resolve ~do_unesc buf s last start k else + match get s k with + | ';' -> + let name = String.sub s name_start (k - name_start) in + begin match Cmarkit_data.html_entity name with + | None -> resolve ~do_unesc buf s last start (k + 1) + | Some rep -> + let next = k + 1 in + flush buf s last start (name_start - 1 (* don't include & *)) ; + Buffer.add_string buf rep; + resolve ~do_unesc buf s last next next + end + | c when Ascii.is_alphanum c -> + try_entity_named ~do_unesc buf s last start name_start (k + 1) + | _ -> + resolve ~do_unesc buf s last start k + and resolve ~do_unesc buf s last start k = + if k > last then (flush buf s last start k; Buffer.contents buf) else + let next = k + 1 in + match get s k with + | '\x00' -> + flush buf s last start k; Buffer.add_utf_8_uchar buf Uchar.rep; + resolve ~do_unesc buf s last next next + | '\\' when do_unesc -> + if next > last then resolve ~do_unesc buf s last start next else + let nc = get s next in + if not (Ascii.is_punct nc) + then resolve ~do_unesc buf s last start next else + let next' = next + 1 in + (flush buf s last start k; Buffer.add_char buf nc; + resolve ~do_unesc buf s last next' next') + | '&' -> + if k + 2 > last then resolve ~do_unesc buf s last start next else + begin match get s next with + | c when Ascii.is_letter c -> + try_entity_named ~do_unesc buf s last start next next + | '#' -> + let next = next + 1 in + begin match get s next with + | c when Ascii.is_digit c -> + try_entity_dec ~do_unesc buf s last start next next 0 + | 'x' | 'X' -> + let next = next + 1 in + try_entity_hex ~do_unesc buf s last start next next 0 + | _ -> resolve ~do_unesc buf s last start next + end + | _ -> resolve ~do_unesc buf s last start next + end + | '\x01' .. '\x7F' -> resolve ~do_unesc buf s last start next + | b -> + let d = String.get_utf_8_uchar s k in + let next = k + Uchar.utf_decode_length d in + match Uchar.utf_decode_is_valid d with + | true -> resolve ~do_unesc buf s last start next + | false -> + flush buf s last start k; + Buffer.add_utf_8_uchar buf Uchar.rep; + resolve ~do_unesc buf s last next next + in + let rec check ~do_unesc buf s last start k = + if k > last then String.sub s first (last - start + 1) else + match unsafe_get s k with + | '\\' when do_unesc -> + Buffer.reset buf; resolve ~do_unesc buf s last start k + | '&' | '\x00' -> + Buffer.reset buf; resolve ~do_unesc buf s last start k + | '\x01' .. '\x7F' -> + check ~do_unesc buf s last start (k + 1) + | _ -> + let d = String.get_utf_8_uchar s k in + if Uchar.utf_decode_is_valid d + then check ~do_unesc buf s last start (k + Uchar.utf_decode_length d) + else (Buffer.reset buf; resolve ~do_unesc buf s last start k) + in + if first > last then "" else + let max = String.length s - 1 in + let last = if last > max then max else last in + let first = if first < 0 then 0 else first in + check ~do_unesc buf s last first first + + let utf_8_clean_unesc_unref buf s ~first ~last = + _utf_8_clean_unesc_unref ~do_unesc:true buf s ~first ~last + + let utf_8_clean_unref buf s ~first ~last = + _utf_8_clean_unesc_unref ~do_unesc:false buf s ~first ~last + + let utf_8_clean_raw ?(pad = 0) buf s ~first ~last = + let get = String.get in + let padit buf pad = for i = 1 to pad do Buffer.add_char buf ' ' done in + let clean buf s last first dirty = + let flush buf s last start k = + if start <= last then Buffer.add_substring buf s start (k - start); + in + let rec loop buf s last start k = + if k > last then (flush buf s last start k; Buffer.contents buf) else + match get s k with + | '\x01' .. '\x7F' (* US-ASCII *) -> loop buf s last start (k + 1) + | '\x00' -> + let next = k + 1 in + flush buf s last start k; Buffer.add_utf_8_uchar buf Uchar.rep; + loop buf s last next next + | _ -> + let d = String.get_utf_8_uchar s k in + let next = k + Uchar.utf_decode_length d in + match Uchar.utf_decode_is_valid d with + | true -> loop buf s last start next + | false -> + flush buf s last start k; Buffer.add_utf_8_uchar buf Uchar.rep; + loop buf s last next next + in + flush buf s last first dirty; loop buf s last dirty dirty + in + let rec check buf s last first k = + if k > last then String.sub s first (last - first + 1) else + match get s k with + | '\x01' .. '\x7F' (* US-ASCII *) -> check buf s last first (k + 1) + | '\x00' -> (Buffer.reset buf; clean buf s last first k) + | _ -> + let d = String.get_utf_8_uchar s k in + if Uchar.utf_decode_is_valid d + then check buf s last first (k + Uchar.utf_decode_length d) + else (Buffer.reset buf; clean buf s last first k) + in + if first > last then + if pad = 0 then "" else + (Buffer.reset buf; padit buf pad; Buffer.contents buf) + else + let max = String.length s - 1 in + let last = if last > max then max else last in + let first = if first < 0 then 0 else first in + if pad = 0 then check buf s last first first else + (Buffer.reset buf; padit buf pad; clean buf s last first first) +end + +(* Unicode matching *) + +let prev_uchar s ~first ~before = + let rec find_utf_8_starter s ~first ~start = + if start < first then first else match s.[start] with + | '\x00' .. '\x7F' | '\xC2' .. '\xDF' + | '\xE0' .. '\xEF' | '\xF0' .. '\xF4' -> start + | _ -> find_utf_8_starter s ~first ~start:(start - 1) + in + if before <= first then Uchar.of_int 0x0020 (* something white *) else + let k = find_utf_8_starter s ~first ~start:(before - 1) in + Uchar.utf_decode_uchar (String.get_utf_8_uchar s k) + +let next_uchar s ~last ~after = + if after >= last then Uchar.of_int 0x0020 (* something white *) else + Uchar.utf_decode_uchar (String.get_utf_8_uchar s (after + 1)) + +(* Result types *) + +type indent = int +type byte_pos = Textloc.byte_pos +type first = Textloc.byte_pos +type last = Textloc.byte_pos +type next = Textloc.byte_pos +type heading_level = int + +(* Runs, blanks and indents *) + +let rec run_of ~char s ~last ~start = + if start > last || s.[start] <> char then start - 1 else + run_of ~char s ~last ~start:(start + 1) + +let rec first_non_blank s ~last ~start = + if start > last then last + 1 else match s.[start] with + | ' ' | '\t' -> first_non_blank s ~last ~start:(start + 1) + | _ -> start + +let first_non_blank_in_span s sp = + first_non_blank s ~last:sp.last ~start:sp.first + +let rec last_non_blank s ~first ~start = + if start < first then first - 1 else match s.[start] with + | ' ' | '\t' -> last_non_blank s ~first ~start:(start - 1) + | _ -> start + +let rec rev_drop_spaces s ~first ~start = + if start < first then first - 1 else + if s.[start] = ' ' then rev_drop_spaces s ~first ~start:(start - 1) else start + +let push_span ~line first' last' = function +| (line_start, { line_pos; first; last }) :: acc + when (fst line_pos) = (fst line.line_pos) -> (* Merge if on same line *) + (line_start, { line with first; last = last' }) :: acc +| acc -> + (line.first, { line with first = first'; last = last' }) :: acc + +let accept_to ~char ~next_line s lines ~line spans ~after = + (* Includes final [char] in spans *) + let rec loop ~char ~next_line s lines line start acc k = + if k > line.last then match next_line lines with + | None -> None + | Some (lines, newline) -> + let acc = push_span ~line start line.last acc in + let start = first_non_blank_in_span s newline in + loop ~char ~next_line s lines newline start acc start + else + if s.[k] = char + then Some (lines, line, push_span ~line start k acc, k) + else loop ~char ~next_line s lines line start acc (k + 1) + in + loop ~char ~next_line s lines line after spans (after + 1) + +let accept_upto ~char ~next_line s lines ~line acc ~after = + (* Does not not include final [char] in spans and continues on + backslashed [char]. *) + let rec loop ~char ~next_line s lines line ~prev_bslash start acc k = + if k > line.last then match next_line lines with + | None -> None + | Some (lines, newline) -> + if newline.first > newline.last (* empty *) then None else + let acc = push_span ~line start line.last acc in + let start = first_non_blank_in_span s newline in + let prev_bslash = false in + loop ~char ~next_line s lines newline ~prev_bslash start acc start + else + if s.[k] = char && not prev_bslash + then Some (lines, line, push_span ~line start (k - 1) acc, k) else + let prev_bslash = s.[k] = '\\' && not prev_bslash (* \\ is not *) in + loop ~char ~next_line s lines line ~prev_bslash start acc (k + 1) + in + let start = after + 1 in + loop ~char ~next_line s lines line ~prev_bslash:false start acc start + +let first_non_blank_over_nl ~next_line s lines ~line ~start = + let nb = first_non_blank s ~last:line.last ~start in + if nb <= line.last then `This_line nb else + match next_line lines with + | None -> `None + | Some (lines, newline) -> + let nb = first_non_blank_in_span s newline in + if nb > newline.last then `None else `Next_line (lines, newline, nb) + +let first_non_blank_over_nl' ~next_line s lines ~line spans ~start = + (* Same as [first_non_blank_over_nl] but pushes skipped data on [spans]. *) + match first_non_blank_over_nl ~next_line s lines ~line ~start with + | `None -> None + | `This_line nb -> + let line = { line with first = start } (* no layout *) in + let spans = push_span ~line start (nb - 1) spans in + Some (lines, line, spans, nb - 1) + | `Next_line (lines, newline, nb) -> + let line = { line with first = start } (* no layout *) in + let spans = push_span ~line start line.last spans in + Some (lines, newline, spans, nb - 1) + +let first_non_escaped_char c s ~last ~start = + let rec loop c s ~last ~start k = + if k > last then k else + if s.[k] = c && (k = start || s.[k - 1] <> '\\') then k else + loop c s ~last ~start (k + 1) + in + loop c s ~last ~start start + +(* Autolinks *) + +let autolink_email s ~last ~start = + (* https://spec.commonmark.org/current/#email-address + Via the ABNF "<" email ">" with email defined by: + https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address *) + let is_atext_plus_dot = function + | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' + | '!' | '#' | '$' | '%' | '&' | '\'' | '*' | '+' | '-' | '/' | '=' | '?' + | '^' | '_' | '`' | '{' | '|' | '}' | '~' | '.' -> true + | _ -> false + in + let is_let_dig = Ascii.is_alphanum in + let is_let_dig_hyp c = Ascii.is_alphanum c || c = '-' in + let rec label_seq s last k = + let rec loop s last c k = + if k > last then None else + if is_let_dig_hyp s.[k] && c <= 63 then loop s last (c + 1) (k + 1) else + if c > 63 || not (is_let_dig s.[k - 1]) then None else + match s.[k] with + | '>' -> Some k + | '.' -> label_seq s last (k + 1) + | c -> None + in + if k > last || not (is_let_dig s.[k]) then None else + loop s last 1 (k + 1) + in + let rec atext_seq s last k = + if k > last then None else + if is_atext_plus_dot s.[k] then atext_seq s last (k + 1) else + if s.[k] = '@' && is_atext_plus_dot s.[k - 1] + then label_seq s last (k + 1) + else None + in + if start > last || s.[start] <> '<' then None else + atext_seq s last (start + 1) + +let autolink_uri s ~last ~start = + (* https://spec.commonmark.org/current/#uri-autolink *) + let is_scheme_letter = function + | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '+' | '.' | '-' -> true | _ -> false + in + let is_uri_char = function + |'\x00' .. '\x1F' | '\x7F' | ' ' | '<' | '>' -> false | _ -> true + in + let rec rest s last k = + if k > last then None else + if is_uri_char s.[k] then rest s last (k + 1) else + if s.[k] = '>' then Some k else None + in + let rec scheme s last c k = + if k > last then None else + if is_scheme_letter s.[k] && c <= 32 then scheme s last (c + 1) (k + 1) else + if s.[k] = ':' && 2 <= c && c <= 32 then rest s last (k + 1) else None + in + let next = start + 1 in + if next > last || s.[start] <> '<' || not (Ascii.is_letter s.[next]) + then None else scheme s last 1 (next + 1) + +(* Raw HTML *) + +let tag_name s ~last ~start : last option = + (* https://spec.commonmark.org/current/#tag-name *) + let rec loop s last k = + if k > last || not (Ascii.is_alphanum s.[k] || s.[k] = '-') + then Some (k - 1) else loop s last (k + 1) + in + if start > last || not (Ascii.is_letter s.[start]) then None else + loop s last (start + 1) + +let attribute_name s ~last ~start : next option = + (* https://spec.commonmark.org/current/#attribute-name *) + let is_start = function + | c when Ascii.is_letter c -> true | '_' | ':' -> true | _ -> false + in + let is_cont = function + | c when Ascii.is_alphanum c -> true | '_' | '.' | ':' | '-' -> true + | _ -> false + in + let rec loop s last k = + if k > last || not (is_cont s.[k]) + then Some (k - 1) else loop s last (k + 1) + in + if start > last || not (is_start s.[start]) then None else + loop s last (start + 1) + +let attribute_value ~next_line s lines ~line spans ~start = + (* https://spec.commonmark.org/current/#attribute-value *) + if start > line.last then None else match s.[start] with + | '\'' | '\"' as char -> + (* https://spec.commonmark.org/current/#double-quoted-attribute-value + https://spec.commonmark.org/current/#unquoted-attribute-value *) + accept_to ~char ~next_line s lines ~line spans ~after:start + | c -> + (* https://spec.commonmark.org/current/#unquoted-attribute-value *) + let cont = function + | ' ' | '\t' | '\"' | '\'' | '=' | '<' | '>' | '`' -> false | _ -> true + in + let rec loop s last k = + if k > last || not (cont s.[k]) then + let last = k - 1 in + Some (lines, line, push_span ~line start last spans, last) + else loop s last (k + 1) + in + loop s line.last (start + 1) + +let attribute ~next_line s lines ~line spans ~start = + (* https://spec.commonmark.org/current/#attribute *) + (* https://spec.commonmark.org/current/#attribute-value-specification *) + match attribute_name s ~last:line.last ~start with + | None -> None + | Some end_name -> + let spans = push_span ~line start end_name spans in + let start = end_name + 1 in + match first_non_blank_over_nl' ~next_line s lines ~line spans ~start with + | None -> None + | Some (lines', line', spans', last_blank) -> + let nb = last_blank + 1 in + if s.[nb] <> '=' + then Some (lines, line, spans, end_name) (* no value *) else + let spans' = push_span ~line nb nb spans' in + let start = nb + 1 in + match + first_non_blank_over_nl' + ~next_line s lines' ~line:line' spans' ~start + with + | None -> None + | Some (lines, line, spans, last_blank) -> + let start = last_blank + 1 in + attribute_value ~next_line s lines ~line spans ~start + +let open_tag ~next_line s lines ~line ~start:tag_start = (* tag_start has < *) + (* https://spec.commonmark.org/current/#open-tag *) + match tag_name s ~last:line.last ~start:(tag_start + 1) with + | None -> None + | Some tag_end_name -> + let rec loop ~next_line s lines ~line spans ~start = + match + first_non_blank_over_nl' ~next_line s lines ~line spans ~start + with + | None -> None + | Some (lines, line, spans, last_blank) -> + let next = last_blank + 1 in + match s.[next] with + | '>' -> + Some (lines, line, push_span ~line next next spans, next) + | '/' -> + let last = next + 1 in + if last > line.last || s.[last] <> '>' then None else + Some (lines, line, push_span ~line next last spans, last) + | c -> + if next = start then None else + match attribute ~next_line s lines ~line spans ~start:next with + | None -> None + | Some (lines, line, spans, last) -> + loop ~next_line s lines ~line spans ~start:(last + 1) + in + let start = tag_end_name + 1 in + let span = { line with first = tag_start; last = tag_end_name} in + let spans = [tag_start, span] in + loop ~next_line s lines ~line spans ~start + +let closing_tag ~next_line s ls ~line ~start:tag_start = (* start is on None + | Some tag_name_end -> + let span = { line with first = tag_start; last = tag_name_end} in + let spans = [tag_start, span] in + let start = tag_name_end + 1 in + match first_non_blank_over_nl' ~next_line s ls ~line spans ~start with + | None -> None + | Some (lines, line, spans, last_blank) -> + let last = last_blank + 1 in + if s.[last] <> '>' then None else + Some (lines, line, push_span ~line last last spans, last) + +let declaration ~next_line s lines ~line ~start = (* start is on ' ~next_line s lines ~line [] ~after:start + +let processing_instruction ~next_line s lines ~line ~start = (* start is on line.last then match next_line lines with + | None -> None + | Some (lines, newline) -> + let acc = push_span ~line start line.last acc in + let start = first_non_blank_in_span s newline in + loop ~next_line s lines newline start acc start + else + if s.[k] <> '?' then loop ~next_line s lines line start acc (k + 1) else + let last = k + 1 in + if last <= line.last && s.[last] = '>' (* ?> *) + then Some (lines, line, push_span ~line start last acc, last) + else loop ~next_line s lines line start acc last + in + loop ~next_line s lines line start [] (start + 2) + +let html_comment ~next_line s lines ~line ~start = (* start is on line.last then match next_line lines with + | None -> None + | Some (lines, newline) -> + let acc = push_span ~line start line.last acc in + let start = first_non_blank_in_span s newline in + loop ~next_line s lines newline start acc start + else + if s.[k] = '-' && s.[k - 1] <> '-' then + let last = k + 2 in + if last <= line.last && s.[k + 1] = '-' then + if s.[last] = '>' (* --> and we do not end with - *) + then Some (lines, line, push_span ~line start last acc, last) + else None (* -- in the input *) + else loop ~next_line s lines line start acc (k + 1) + else loop ~next_line s lines line start acc (k + 1) + in + (* Check we have at least or *) + if (start + 3 > line.last) || not (s.[start + 3] = '-') || + (start + 4 <= line.last && s.[start + 4] = '>') || + (start + 5 <= line.last && s.[start + 4] = '-' && s.[start + 5] = '>') + then None else loop ~next_line s lines line start [] (start + 4) + +let cdata_section ~next_line s lines ~line ~start = (* start is on line.last then match next_line lines with + | None -> None + | Some (lines, newline) -> + let acc = push_span ~line start line.last acc in + let start = first_non_blank_in_span s newline in + loop ~next_line s lines newline start acc start + else + if s.[k] <> ']' then loop ~next_line s lines line start acc (k + 1) else + let last = k + 2 in + if last <= line.last && s.[k + 1] = ']' && s.[last] = '>' (* ]> *) + then Some (lines, line, push_span ~line start last acc, last) + else loop ~next_line s lines line start acc (k + 1) + in + if start + 8 > line.last || (* not CDATA[ *) + not (s.[start + 3] = 'C' && s.[start + 4] = 'D' && s.[start + 5] = 'A' && + s.[start + 6] = 'T' && s.[start + 7] = 'A' && s.[start + 8] = '[') + then None else loop ~next_line s lines line start [] (start + 9) + +let raw_html ~next_line s lines ~line ~start = + (* https://spec.commonmark.org/current/#html-tag *) + let next = start + 1 and last = line.last in + if next > last || s.[start] <> '<' then None else match s.[next] with + | '/' -> closing_tag ~next_line s lines ~line ~start + | '?' -> processing_instruction ~next_line s lines ~line ~start + | '!' -> + let next = next + 1 in + if next > last then None else + begin match s.[next] with + | '-' -> html_comment ~next_line s lines ~line ~start + | '[' -> cdata_section ~next_line s lines ~line ~start + | c when Ascii.is_letter c -> declaration ~next_line s lines ~line ~start + | _ -> None + end + | c -> open_tag ~next_line s lines ~line ~start + +(* Links *) + +let link_destination s ~last ~start = + let delimited s ~last ~start = (* start has '<' *) + (* https://spec.commonmark.org/current/#link-destination 1st *) + let rec loop s start last prev_byte k = + if k > last then None else match s.[k] with + | '\n' | '\r' -> None + | '\\' when prev_byte = '\\' -> loop s start last '\x00' (k + 1) + | '<' when prev_byte <> '\\' -> None + | '>' when prev_byte <> '\\' -> Some (true, (start + 1), k - 1) + | c -> loop s start last c (k + 1) + in + loop s start last '\x00' (start + 1) + in + let not_delimited s ~last ~start = + (* https://spec.commonmark.org/current/#link-destination 2nd *) + let rec loop s start last prev_byte bal k = + if k > last then (if bal = 0 then Some (false, start, k - 1) else None) + else match s.[k] with + | '\\' when prev_byte = '\\' -> loop s start last '\x00' bal (k + 1) + | '(' as c when prev_byte <> '\\' -> loop s start last c (bal + 1) (k + 1) + | ')' as c when prev_byte <> '\\' -> + let bal = bal - 1 in + if bal < 0 + then Some (false, start, k - 1) (* hit inline link closing ')' *) + else loop s start last c bal (k + 1) + | ' ' -> if k <> start && bal = 0 then Some (false, start, k-1) else None + | c when Ascii.is_control c -> + if k <> start && bal = 0 then Some (false, start, k - 1) else None + | c -> loop s start last c bal (k + 1) + in + loop s start last '\x00' 0 start + in + if start > last then None else + if s.[start] = '<' + then delimited s ~last ~start + else not_delimited s ~last ~start + +let link_title ~next_line s lines ~line ~start = + (* https://spec.commonmark.org/current/#link-title *) + let rec paren ~next_line s lines ~line ~prev_bslash start acc k = + if k > line.last then match next_line lines with + | None -> None + | Some (lines, newline) -> + if newline.first > newline.last (* empty *) then None else + let acc = push_span ~line start line.last acc in + let start = first_non_blank_in_span s newline in + let prev_bslash = false in + paren ~next_line s lines ~line:newline ~prev_bslash start acc start + else + if s.[k] = '(' && not prev_bslash then None else + if s.[k] = ')' && not prev_bslash + then Some (lines, line, push_span ~line start (k - 1) acc, k) else + let prev_bslash = s.[k] = '\\' && not prev_bslash in + paren ~next_line s lines ~line ~prev_bslash start acc (k + 1) + in + if start > line.last then None else match s.[start] with + | '\"' | '\'' as char -> + accept_upto ~char ~next_line s lines ~line [] ~after:start + | '(' -> + let start = start + 1 and prev_bslash = false in + paren ~next_line s lines ~line ~prev_bslash start [] start + | _ -> None + +let link_label b ~next_line s lines ~line ~start = + (* https://spec.commonmark.org/current/#link-label *) + let rec loop b ~next_line s lines ~line ~prev_byte start acc count k = + if k > line.last then match next_line lines with + | None -> None + | Some (lines, newline) -> + if newline.first > newline.last (* empty *) then None else + let acc = push_span ~line start line.last acc in + let start = first_non_blank_in_span s newline in + let () = if Buffer.length b <> 0 then Buffer.add_char b ' ' in + let prev_byte = '\x00' in + loop b ~next_line s lines ~line:newline ~prev_byte start acc count start + else + if count > 999 then None else match s.[k] with + | '\\' when prev_byte = '\\' -> + Buffer.add_char b '\\'; + let prev_byte = '\x00' in + loop b ~next_line s lines ~line ~prev_byte start acc (count + 1) (k + 1) + | ']' when prev_byte <> '\\' -> + let key = Buffer.contents b in + if String.for_all Ascii.is_blank key then None else + let acc = push_span ~line start (k - 1) acc in + Some (lines, line, acc, k, key) + | '[' when prev_byte <> '\\' -> None + | ' ' | '\t' as prev_byte -> + loop b ~next_line s lines ~line ~prev_byte start acc (count + 1) (k + 1) + | c -> + let () = + (* Collapses non initial white *) + if Ascii.is_blank prev_byte && Buffer.length b <> 0 + then Buffer.add_char b ' ' + in + let d = String.get_utf_8_uchar s k in + let u = Uchar.utf_decode_uchar d in + let u = match Uchar.to_int u with 0x0000 -> Uchar.rep | _ -> u in + let k' = k + Uchar.utf_decode_length d in + let () = match Cmarkit_data.unicode_case_fold u with + | None -> Buffer.add_utf_8_uchar b u + | Some fold -> Buffer.add_string b fold + in + let prev_byte = s.[k] in + loop b ~next_line s lines ~line ~prev_byte start acc (count + 1) k' + in + if start > line.last || s.[start] <> '[' then None else + let start = start + 1 in + (Buffer.reset b; + loop b ~next_line s lines ~line ~prev_byte:'\x00' start [] 0 start) + +(* Leaf blocks + + The matching functions assume the indentation has been stripped. *) + +type html_block_end_cond = + [ `End_str of string | `End_cond_1 | `End_blank | `End_blank_7 ] + +type line_type = +| Atx_heading_line of heading_level * byte_pos * first * last +| Blank_line +| Block_quote_line +| Fenced_code_block_line of first * last * (first * last) option +| Html_block_line of html_block_end_cond +| Indented_code_block_line +| List_marker_line of ([ `Ordered of int * char | `Unordered of char ] * last) +| Paragraph_line +| Setext_underline_line of heading_level * last +| Thematic_break_line of last +| Ext_table_row of last +| Ext_footnote_label of rev_spans * last * string +| Nomatch + +let thematic_break s ~last ~start = + (* https://spec.commonmark.org/current/#thematic-breaks *) + let rec loop s last count prev k = + if k > last + then (if count < 3 then Nomatch else Thematic_break_line prev) else + if s.[k] = s.[prev] then loop s last (count + 1) k (k + 1) else + if s.[k] = ' ' || s.[k] = '\t' then loop s last count prev (k + 1) else + Nomatch + in + if start > last then Nomatch else match s.[start] with + | '-' | '_' | '*' -> loop s last 1 start (start + 1) + | _ -> Nomatch + +let atx_heading s ~last ~start = + (* https://spec.commonmark.org/current/#atx-headings *) + let rec skip_hashes s last k = + if k > last then k else + if s.[k] = '#' then skip_hashes s last (k + 1) else k + in + let find_end s last k = (* blank on k, last + 1 if blank* [#+] blank* *) + let after_blank = first_non_blank s ~last ~start:(k + 1) in + if after_blank > last then after_blank else + if s.[after_blank] <> '#' then after_blank else + let after_hash = skip_hashes s last (after_blank + 1) in + let after_blank = first_non_blank s ~last ~start:after_hash in + if after_blank > last || after_blank = after_hash then after_blank else + after_blank - 1 (* this could be the beginning of the end, trigger again *) + in + let rec content s last k = + if k > last then k - 1 else + if not (s.[k] = ' ' || s.[k] = '\t') then content s last (k + 1) else + let end' = find_end s last k in + if end' > last then (k - 1) else content s last end' + in + let rec level s last acc k = + if k > last then Atx_heading_line (acc, k, k, last) else + if s.[k] = '#' then + if acc < 6 then level s last (acc + 1) (k + 1) else Nomatch + else + let first = first_non_blank s ~last ~start:k in + if first > last + then Atx_heading_line (acc, k, last + 1, last) (* empty cases *) else + if first = k then Nomatch (* need a blank *) else + let last = + if s.[first] <> '#' then content s last (first + 1) else + let end' = find_end s last (first - 1 (* start on blank *)) in + if end' > last then first - 1 else content s last end' + in + Atx_heading_line (acc, k, first, last) + in + if start > last || s.[start] <> '#' then Nomatch else + level s last 1 (start + 1) + +let setext_heading_underline s ~last ~start = + (* https://spec.commonmark.org/current/#setext-heading *) + let level c = if c = '=' then 1 else 2 in + let rec underline s last start k = + if k > last then Setext_underline_line (level s.[start], k - 1) else + if s.[k] = s.[start] then underline s last start (k + 1) else + if not (s.[k] = ' ' || s.[k] = '\t') then Nomatch else + let end_blank = first_non_blank s ~last ~start:(k + 1) in + if end_blank > last + then Setext_underline_line (level s.[start], k - 1) + else Nomatch + in + if start > last then Nomatch else + if not (s.[start] = '-' || s.[start] = '=') then Nomatch else + underline s last start (start + 1) + +let fenced_code_block_start s ~last ~start = + (* https://spec.commonmark.org/current/#code-fence *) + let rec info s last nobt info_first k = + if k > last then Some (info_first, last) else + if nobt && s.[k] = '`' then raise_notrace Exit else + if not (s.[k] = ' ' || s.[k] = '\t') + then info s last nobt info_first (k + 1) else + let after_blank = first_non_blank s ~last ~start:k in + if after_blank > last then Some (info_first, k - 1) else + info s last nobt info_first after_blank + in + let rec fence s last fence_first k = + if k <= last && s.[k] = s.[fence_first] + then fence s last fence_first (k + 1) else + let fence_last = k - 1 in + let fcount = fence_last - fence_first + 1 in + if fcount < 3 then Nomatch else + let info = + let after_blank = first_non_blank s ~last ~start:k in + if after_blank > last then None else + info s last (s.[fence_first] = '`') after_blank after_blank + in + Fenced_code_block_line (fence_first, fence_last, info) + in + let rec loop s first last k = + if k > last then Nomatch else + if k - first + 1 < 4 && s.[k] = ' ' then loop s first last (k + 1) else + if not (s.[k] = '~' || s.[k] = '`') then Nomatch else + try fence s last k (k + 1) with + | Exit (* backtick fence and info *) -> Nomatch + in + if start > last then Nomatch else loop s start last start + +let fenced_code_block_continue ~fence:(fc, fcount) s ~last ~start = + (* https://spec.commonmark.org/current/#code-fence *) + let rec fence s last fence_first k = + if k <= last && s.[k] = fc then fence s last fence_first (k + 1) else + let fence_last = k - 1 in + if fence_last - fence_first + 1 < fcount then raise Exit (* not closing *) + else + let after_blank = first_non_blank s ~last ~start:k in + if after_blank > last then `Close (fence_first, fence_last) else + raise Exit + in + let rec loop s first last k = + if k > last then `Code (* short blank line *) else + if k - first + 1 < 4 && s.[k] = ' ' then loop s first last (k + 1) else + if s.[k] <> fc then `Code else + try fence s last k (k + 1) with Exit -> `Code + in + if start > last then `Code else loop s start last start + +let html_start_cond_1_set = + String_set.of_list ["pre"; "script"; "style"; "textarea"] + +let html_start_cond_6_set = + String_set.of_list + [ "address"; "article"; "aside"; "base"; "basefont"; "blockquote"; "body"; + "caption"; "center"; "col"; "colgroup"; "dd"; "details"; "dialog"; "dir"; + "div"; "dl"; "dt"; "fieldset"; "figcaption"; "figure"; "footer"; "form"; + "frame"; "frameset"; "h1"; "h2"; "h3"; "h4"; "h5"; "h6"; "head"; "header"; + "hr"; "html"; "iframe"; "legend"; "li"; "link"; "main"; "menu"; + "menuitem"; "nav"; "noframes"; "ol"; "optgroup"; "option"; "p"; "param"; + "section"; "source"; "summary"; "table"; "tbody"; "td"; "tfoot"; "th"; + "thead"; "title"; "tr"; "track"; "ul" ] + +let html_block_start_5 s ~last ~start = (* 3 first chars checked *) + let next = start + 3 and sub = "CDATA[" in + if start + 8 > last || not (Ascii.match' ~sub s ~start:next) then Nomatch else + Html_block_line (`End_str "]]>") (* 5 *) + +let html_block_start_2 s ~last ~start = (* 3 first chars checked *) + let next = start + 3 in + if next > last || s.[next] <> '-' then Nomatch else + Html_block_line (`End_str "-->") (* 2 *) + +let html_block_start_7_open_tag s ~last ~start = + (* Has to be on the same line we fake one and use the inline parser *) + let line = { line_pos = Textloc.line_pos_none; first = start; last } in + let next_line () = None in + match open_tag ~next_line s () ~line ~start with + | None -> Nomatch + | Some (_, _, _, tag_end) -> + let next = first_non_blank s ~last ~start:(tag_end + 1) in + if next > last then Html_block_line `End_blank_7 else Nomatch + +let html_block_start_7_close_tag s ~last ~start = + (* Has to be on the same line we fake one and use the inline parser *) + let line = { line_pos = Textloc.line_pos_none; first = start; last } in + let next_line () = None in + match closing_tag ~next_line s () ~line ~start with + | None -> Nomatch + | Some (_, _, _, tag_end) -> + let next = first_non_blank s ~last ~start:(tag_end + 1) in + if next > last then Html_block_line `End_blank_7 else Nomatch + +let html_block_start s ~last ~start = + (* https://spec.commonmark.org/current/#html-blocks *) + let next = start + 1 in + if next > last || s.[start] <> '<' then Nomatch else + match s.[next] with + | '?' -> Html_block_line (`End_str "?>") (* 3 *) + | '!' -> + let next = next + 1 in + if next > last then Nomatch else + begin match s.[next] with + | '[' -> html_block_start_5 s ~last ~start + | '-' -> html_block_start_2 s ~last ~start + | c when Ascii.is_letter c -> Html_block_line (`End_str ">") (* 4 *) + | _ -> Nomatch + end + | c when Ascii.is_letter c || c = '/' -> + let tag_first = if c = '/' then next + 1 else next in + let tag_last = + let rec find_tag_end s last i = + if i > last || not (Ascii.is_letter s.[i]) then i - 1 else + find_tag_end s last (i + 1) + in + find_tag_end s last tag_first + in + let tag = Ascii.lowercase_sub s tag_first (tag_last - tag_first + 1) in + let is_open_end = + let n = tag_last + 1 in + n > last || s.[n] = ' ' || s.[n] = '\t' || s.[n] = '>' + in + let is_open_close_end = + is_open_end || + (tag_last + 2 <= last && s.[tag_last + 1] = '/' && + s.[tag_last + 2] = '>') + in + if c <> '/' then begin + if String_set.mem tag html_start_cond_1_set && is_open_end + then Html_block_line `End_cond_1 (* 1 *) else + if String_set.mem tag html_start_cond_6_set && is_open_close_end + then Html_block_line `End_blank (* 6 *) else + html_block_start_7_open_tag s ~last ~start + end else begin + if String_set.mem tag html_start_cond_6_set && is_open_close_end + then Html_block_line `End_blank (* 6 *) else + html_block_start_7_close_tag s ~last ~start + end + | _ -> Nomatch + +let html_block_end_cond_1 s ~last ~start = + (* https://spec.commonmark.org/current/#html-blocks end condition 1. *) + let rec loop s last k = + if k + 3 > last then false else + if s.[k] <> '<' || s.[k + 1] <> '/' then loop s last (k + 1) else + let next = k + 2 in + let is_end_tag = match s.[next] with + | 'p' -> Ascii.caseless_match ~sub:"pre>" s ~start:next + | 's' -> + if s.[k + 3] = 't' + then Ascii.caseless_match ~sub:"style>" s ~start:next + else Ascii.caseless_match ~sub:"script>" s ~start:next + | 't' -> Ascii.caseless_match ~sub:"textarea>" s ~start:next + | _ -> false + in + if is_end_tag then true else loop s last (k + 1) + in + loop s last start + +let html_block_end ~end_cond s ~last ~start = match end_cond with +| `End_str str -> sub_includes ~affix:str s ~first:start ~last +| `End_cond_1 -> html_block_end_cond_1 s ~last ~start +| `End_blank | `End_blank_7 -> first_non_blank s ~last ~start = last + 1 + +let ext_table_row s ~last ~start = + if start > last || s.[start] <> '|' then Nomatch else + let first = start + 1 in + let last_nb = last_non_blank s ~first ~start:last in + let before = last_nb - 1 in + if last_nb < first || s.[last_nb] <> '|' || + (before >= first && s.[before] = '\\') + then Nomatch else Ext_table_row last_nb + +let ext_footnote_label buf s ~line_pos ~last ~start = + if start + 1 > last || s.[start] <> '[' || s.[start + 1] <> '^' + then Nomatch else + let rbrack = first_non_escaped_char ']' s ~last ~start:(start + 2) in + let colon = rbrack + 1 in + if colon > last || s.[colon] <> ':' || colon - start + 1 < 5 then Nomatch else + (* Get the normalized label *) + let line = { line_pos; first = start; last } in + let next_line () = None in + match link_label buf ~next_line s () ~line ~start with + | None -> (* should not happen *) Nomatch + | Some (_, _, rev_spans, _, key) -> + Ext_footnote_label (rev_spans, colon, key) + +let could_be_link_reference_definition s ~last ~start = + (* https://spec.commonmark.org/current/#link-reference-definition *) + let rec loop s first last k = + if k > last then false else + if k - first + 1 < 4 && s.[k] = ' ' then loop s first last (k + 1) else + s.[k] = '[' + in + if start > last then false else loop s start last start + +(* Container blocks *) + +let list_marker s ~last ~start = + (* https://spec.commonmark.org/current/#list-marker *) + if start > last then Nomatch else match s.[start] with + | '-' | '+' | '*' as c -> + let next = start + 1 in + if next > last || Ascii.is_blank s.[next] + then List_marker_line (`Unordered c, start) + else Nomatch + | '0' .. '9' as c -> + let[@inline] digit c = Char.code c - 0x30 in + let rec loop s last count acc k = + if k > last || count > 9 then Nomatch else + match s.[k] with + | '0' .. '9' as c -> + loop s last (count + 1) (acc * 10 + digit c) (k + 1) + | '.' | ')' as c -> + let next = k + 1 in + if next > last || Ascii.is_blank s.[next] + then List_marker_line (`Ordered (acc, c), k) else Nomatch + | _ -> Nomatch + in + loop s last 1 (digit c) (start + 1) + | _ -> Nomatch + +let ext_task_marker s ~last ~start = + if start + 1 > last then None else + if s.[start] <> '[' then None else + let next = start + 1 in + let u = String.get_utf_8_uchar s next in + if not (Uchar.utf_decode_is_valid u) then None else + let next = next + Uchar.utf_decode_length u in + if next > last then None else + if s.[next] <> ']' then None else + let next = next + 1 in + if next > last + then Some (Uchar.utf_decode_uchar u, last) + else if s.[next] <> ' ' then None else + Some (Uchar.utf_decode_uchar u, next) + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_base.mli b/ocaml-lsp-server/vendor/cmarkit/cmarkit_base.mli new file mode 100644 index 000000000..0400ecbd0 --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_base.mli @@ -0,0 +1,401 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +(** Low-level internal tools. *) + +(** Heterogeneous dictionaries. + + Used by {!Cmarkit.Meta}. *) +module Dict : sig + type 'a key + val key : unit -> 'a key + type t + val empty : t + val mem : 'a key -> t -> bool + val add : 'a key -> 'a -> t -> t + val tag : unit key -> t -> t + val remove : 'a key -> t -> t + val find : 'a key -> t -> 'a option +end + +(** Text locations. + + See {!Cmarkit.Textloc} for documentation. *) +module Textloc : sig + type fpath = string + val file_none : fpath + + type byte_pos = int + val byte_pos_none : byte_pos + + type line_num = int + val line_num_none : line_num + + type line_pos = line_num * byte_pos + val line_pos_first : line_pos + val line_pos_none : line_pos + + type t + val none : t + val v : + file:fpath -> first_byte:byte_pos -> last_byte:byte_pos -> + first_line:line_pos -> last_line:line_pos -> t + + val file : t -> fpath + val first_byte : t -> byte_pos + val last_byte : t -> byte_pos + val first_line : t -> line_pos + val last_line : t -> line_pos + val is_none : t -> bool + val is_empty : t -> bool + val equal : t -> t -> bool + val compare : t -> t -> int + val set_first : t -> first_byte:byte_pos -> first_line:line_pos -> t + val set_last : t -> last_byte:byte_pos -> last_line:line_pos -> t + val to_first : t -> t + val to_last : t -> t + val before : t -> t + val after : t -> t + val span : t -> t -> t + val reloc : first:t -> last:t -> t + val pp_ocaml : Format.formatter -> t -> unit + val pp_gnu : Format.formatter -> t -> unit + val pp : Format.formatter -> t -> unit + val pp_dump : Format.formatter -> t -> unit +end + +(** Node metadata. + + See {!Cmarkit.Meta} for documentation. *) +module Meta : sig + type id = int + type t + + val none : t + val make : ?textloc:Textloc.t -> unit -> t + val id : t -> id + + val textloc : t -> Textloc.t + val with_textloc : keep_id:bool -> t -> Textloc.t -> t + + val equal : t -> t -> bool + val compare : t -> t -> int + val is_none : t -> bool + + type 'a key + val key : unit -> 'a key + val mem : 'a key -> t -> bool + val add : 'a key -> 'a -> t -> t + val tag : unit key -> t -> t + val remove : 'a key -> t -> t + val find : 'a key -> t -> 'a option +end + +type line_span = + { line_pos : Textloc.line_pos; + first : Textloc.byte_pos; + last : Textloc.byte_pos } +(** The type for line spans. A line position, the first and last + bytes of the span. If the former is greater than the latter, + the span is empty. *) + +type line_start = Textloc.byte_pos +(** The type for denoting a line start inside + a CommonMark container (i.e. may not match the text line's first + character). *) + +type rev_spans = (line_start * line_span) list +(** A reversed list of line spans, tupled with the byte position on + where the line starts (inside a CommonMark container). The + [line_start] is the start of line in the container, the + [line_span] has the actual data. The characters in the + \[[line_start];[line_span.first - 1]\] are blanks. *) + +type 'a next_line = 'a -> ('a * line_span) option +(** The type for getting a new line of input. This is used by certain + multi-line matchers (e.g. raw HTML). *) + +(** {1:ascii US-ASCII matching} *) + +(** US-ASCII matching. *) +module Ascii : sig + val is_control : char -> bool + val is_letter : char -> bool + val is_upper : char -> bool + val is_lower : char -> bool + val is_digit : char -> bool + val is_hex_digit : char -> bool + val hex_digit_to_int : char -> int + val is_alphanum : char -> bool + val is_white : char -> bool + val is_punct : char -> bool + val is_blank : char -> bool + val caseless_starts_with : prefix:string -> string -> bool +end + +(** {1:uchar Unicode matching} *) + +val prev_uchar : string -> first:int -> before:int -> Uchar.t +(** [prev_uchar s ~first ~before] is the first Unicode character before + byte position [before] in the range \[[first];[before-1]\]. If + [before <= first], U+0020 is returned (a Unicode whitespace character). *) + +val next_uchar : string -> last:int -> after:int -> Uchar.t +(** [next_uchar s ~last ~after] is the next Unicode character after + byte position [after] in the range \[[after+1];[last]\]. If [after + >= last], U+0020 is returned (a Unicode whitespace character). *) + +(** {1:content Textual content} *) + +(** Textual content. + + Ensures UTF-8 validity, unescapes, resolves numeric and named character + references. *) +module Text : sig + val utf_8_clean_unesc_unref : + Buffer.t -> string -> first:int -> last:int -> string + (** [utf_8_clean_unesc_unref b s ~first ~last] unescapes CommonMark + escapes, resolves HTML entity and character references in the + given span and replaces U+0000 and UTF-8 decoding errors by + {!Uchar.rep}. [b] is used as scratch space. If [last > first] + or [first] and [last] are not valid indices of [s] is [""]. *) + + val utf_8_clean_unref : + Buffer.t -> string -> first:int -> last:int -> string + (** [utf_8_clean_unref b s ~first ~last] is like + {!utf_8_clean_unesc_unref} but does not unsescape. *) + + val utf_8_clean_raw : + ?pad:int -> Buffer.t -> string -> first:int -> last:int -> string + (** [utf_8_clean_raw b s ~first ~last] replaces U+0000 and UTF-8 + decoding errors by {!Uchar.rep}. [b] is used as scratch space. + [pad] (defaults to [0]) specifies a number of U+0020 spaces to prepend. + If [last > first] or [first] and [last] are not valid indices of + [s] is either [""] or the padded string. *) +end + +(** {1:result Result types} *) + +type indent = int +(** The type for indentation magnitude. *) + +type byte_pos = Textloc.byte_pos +(** The type for positions. *) + +type first = Textloc.byte_pos +(** The type for the first first byte position of a parsed construct. *) + +type last = Textloc.byte_pos +(** The type for the last byte position of a parsed construct. *) + +type next = Textloc.byte_pos +(** The type for a byte position after a parsed construct. The byte position + may be invalid (end of input range) or on the newline. *) + +type heading_level = int +(** The type for heading levels. *) + +(** {1:blanks Newlines, runs, blanks and indents} *) + +val run_of : char:char -> string -> last:byte_pos -> start:byte_pos -> last +(** [run_of ~char s ~last ~start] is the last byte position of a + consecutive run of [char] in the range \[[start];[last]\] or + [start - 1] if [start] is not [char]. *) + +val first_non_blank : string -> last:byte_pos -> start:byte_pos -> byte_pos +(** [first_non_blank s ~last ~start] is the first byte position in the + range \[[start];[last]\] that is not blank and [last + 1] if there + is none. *) + +val first_non_blank_in_span : string -> line_span -> byte_pos +(** [first_non_blank_in_span s span] is + [first_non_blank s ~last:span.last ~start:span.first]. *) + +val last_non_blank : string -> first:byte_pos -> start:byte_pos -> byte_pos +(** [last_non_blank s ~first ~start] is the last position in the + range \[[first];[start]\] that is non blank and [first - 1] if + there is none. *) + +val rev_drop_spaces : string -> first:byte_pos -> start:byte_pos -> byte_pos +(** [rev_drop_spaces s ~first ~start] is the last position in the + range \[[first];[start]\] that is not U+0020 and [first - 1] if + there is none. *) + +val first_non_blank_over_nl : + next_line: 'a next_line -> string -> 'a -> line:line_span -> start:int -> + [ `None + | `This_line of byte_pos + | `Next_line of 'a * line_span * byte_pos ] +(** [first_non_blank_over_nl ~next_line s ~line ~start] is the first + byte position starting with [start] that is not blank in [line] or + on the next line as determined by [next_line]. Returns [`None] if + there is no such position. *) + +val first_non_escaped_char : + char -> string -> last:byte_pos -> start:byte_pos -> byte_pos +(** [first_non_escaped_char c s ~last ~start] is the first byte position + in the range \[[start];[last]\] that has [c] unescaped and [last + 1] + if there is none. *) + +(** {1:autolinks Autolinks} *) + +val autolink_email : string -> last:byte_pos -> start:byte_pos -> last option +(** [autolink_email s ~last ~start] matches an email autolink starting at + [start] in the range \[[start];[last]\] (assumed on the same line). *) + +val autolink_uri : string -> last:byte_pos -> start:byte_pos -> last option +(** [autolink_uri s ~last ~start] matches an URI autolink starting at + [start] in the range \[[start];[last]\] (assumed on the same line). *) + +(** {1:raw_html Raw HTML} *) + +val raw_html : + next_line:'a next_line -> string -> 'a -> line:line_span -> start:byte_pos -> + ('a * line_span * rev_spans * last) option +(** [raw_html ~next_line s lines ~line ~start] matches raw HTML on + line [line] starting at [start]. [next_line] is used to get new + lines on [lines]. Returns [Some (lines, last_line, spans, + last_byte)] with [lines] the lines after consuming the raw HTML, + [last_line] the line where it stops [spans] the byte ranges of [s] + that make up the raw HTML in reverse order and [last_byte] the + last byte included in it (guaranteed to be on [last_line]). *) + +(** {1:link Links} *) + +val link_destination : + string -> last:byte_pos -> start:byte_pos -> (bool * first * last) option +(** [link_destination s ~last ~start] matches a link destination + starting at [start] in the range \[[start];[last]\] (assumed on + the same line). This is [Some (delimited, first, last)] with the + data in \[[first];[last]\] the destination data. [delimited] is + [true] if [first-1] is '<' and [last + 1] is '>'. *) + +val link_title : + next_line:'a next_line -> string -> 'a -> line:line_span -> start:byte_pos -> + ('a * line_span * rev_spans * last) option +(** [link_title ~next_line s lines ~line ~last] is a link title on line [line] + starting at [start]. Returns [Some (lines, last_line, spans, last)] with + [lines] the lines after consuming the title, [last_line] the line where + it stops, [spans] the byte ranges of [s] that make up the title in reverse + order, [last] is on the closing delimiter and guaranteed to be on + [last_line]. *) + +val link_label : + Buffer.t -> next_line:'a next_line -> string -> 'a -> line:line_span -> + start:byte_pos -> ('a * line_span * rev_spans * last * string) option +(** [link_label buf ~next_line s lines ~line ~start] matches a link label + on line [line] starting at [start]. The byte ranges have the label's + content, the string is the normalized label. [buf] is used as scratch + space. *) + +(** {1:leaf_block Leaf blocks} + + Unless otherwise noted [start] is always after leading blanks. *) + +type html_block_end_cond = + [ `End_str of string | `End_cond_1 | `End_blank | `End_blank_7 ] +(** The type for HTML block end conditions. *) + +type line_type = +| Atx_heading_line of heading_level * byte_pos (* after # *) * first * last +| Blank_line +| Block_quote_line +| Fenced_code_block_line of first * last * (first * last) option +| Html_block_line of html_block_end_cond +| Indented_code_block_line +| List_marker_line of ([ `Ordered of int * char | `Unordered of char ] * last) +| Paragraph_line +| Setext_underline_line of heading_level * last +| Thematic_break_line of last +| Ext_table_row of last +| Ext_footnote_label of rev_spans * last * string +| Nomatch (* built-in [None] to avoid option allocs *) + +val thematic_break : string -> last:byte_pos -> start:byte_pos -> line_type +(** [thematic_break s ~last ~start] matches a thematic break in the range + in the range \[[start];[last]\]. The returned position is the last + non-blank. *) + +val atx_heading : + string -> last:byte_pos -> start:byte_pos -> line_type +(** [atx_heading s ~first ~last] is an ATX heading in the range + \[[start];[last]\]. *) + +val setext_heading_underline : + string -> last:byte_pos -> start:byte_pos -> line_type +(** [setext_heading_underline s ~last ~start] is a setext heading + underline in the range \[[start];[last]\]. The returned position + is the last underline char. *) + +val fenced_code_block_start : + string -> last:byte_pos -> start:byte_pos -> line_type +(** [fenced_code_block_start s ~last ~start] is the start of a fenced + code block line in the range \[[start];[last]\]. The first span is + the fence and the second one is the info string (if any). *) + +val fenced_code_block_continue : + fence:char * int -> string -> last:byte_pos -> start:byte_pos -> + [ `Close of first * last | `Code ] +(** [fenced_code_block_continue ~fence s ~last ~start] indicates + whether the fence code continues or closes in the the range + \[[start];[last]\] given the opening [open] which indicates the + indent, fence char and number of fence chars. *) + +val html_block_start : + string -> last:byte_pos -> start:byte_pos -> line_type +(** [html_block_start s ~last ~start] matches the start of an HTML + block starting at [start] in the range \[[start];[last]\] and on + success returns the condition to end it. *) + +val html_block_end : + end_cond:html_block_end_cond -> string -> last:byte_pos -> start:byte_pos -> + bool +(** [html_block ~end_code s ~last ~start] is [true] if the HTML block + end with [end_code] in the the range \[[start];[last]\] *) + +val ext_table_row : string -> last:byte_pos -> start:byte_pos -> line_type +(** [ext_table s ~last ~start] matches a table row in the range + \[[start];[last]\]. The returned position is the rightmost [|]. *) + +val ext_footnote_label : + Buffer.t -> string -> line_pos:Textloc.line_pos -> last:byte_pos -> + start:byte_pos -> line_type +(** [ext_footnote_label s ~last ~start] matches a footnote label the range + \[[start];[last]\]. The returned position is the rightmost [:]. + This remains on the same line. *) + +val could_be_link_reference_definition : + string -> last:byte_pos -> start:byte_pos -> bool +(** [could_be_link_reference_definition s ~last ~start] is [true] if + in the range \[[start];[last]\] could hold a link reference definition. *) + +(** {1:container Container blocks} *) + +val list_marker : + string -> last:byte_pos -> start:byte_pos -> line_type +(** [list_marker s ~last ~start] is a list marker in the range + \[[start];[last]\]. This checks there's at least one space + following unless the item is empty. *) + +val ext_task_marker : + string -> last:byte_pos -> start:byte_pos -> (Uchar.t * last) option +(** [ext_task_marker s ~last ~start] is a list task item marker in the + range \[[start];[last]\]. *) + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_commonmark.ml b/ocaml-lsp-server/vendor/cmarkit/cmarkit_commonmark.ml new file mode 100644 index 000000000..94469e04b --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_commonmark.ml @@ -0,0 +1,446 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2023 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +open Cmarkit +module C = Cmarkit_renderer.Context + +(* Renderer state *) + +type indent = +[ `I of int | `L of int * string * int * Uchar.t option | `Q of int +| `Fn of int * Label.t ] + +type state = + { nl : string; (* newline to output. *) + mutable sot : bool; (* start of text *) + mutable indents : indent list; (* indentation stack. *) } + +let state : state C.State.t = C.State.make () +let get_state c = C.State.get c state +let init_context c d = + C.State.set c state (Some { nl = Cmarkit.Doc.nl d; sot = true; indents = [] }) + +(* Escaping *) + +module Char_set = Set.Make (Char) + +let esc_angles = Char_set.of_list ['<'; '>'] +let esc_parens = Char_set.of_list ['(';')' ] +let esc_quote = Char_set.singleton '\'' +let esc_dquote = Char_set.singleton '\"' +let esc_link_label = Char_set.of_list ['['; ']'; '\\'] + +let buffer_add_dec_esc b c = + Buffer.add_string b "&#"; + Buffer.add_string b (Int.to_string (Char.code c)); + Buffer.add_char b ';' + +let buffer_add_bslash_esc b c = + Buffer.add_char b '\\'; Buffer.add_char b c + +let buffer_add_escaped_string ?(esc_ctrl = true) b cs s = + let flush b max start i = + if start <= max then Buffer.add_substring b s start (i - start) + in + let rec loop b s max start i = + if i > max then flush b max start i else + let next = i + 1 in + let c = String.get s i in + if Char_set.mem c cs then + (flush b max start i; buffer_add_bslash_esc b c; loop b s max next next) + else if esc_ctrl && Cmarkit_base.Ascii.is_control c then + (flush b max start i; buffer_add_dec_esc b c; loop b s max next next) + else loop b s max start next + in + loop b s (String.length s - 1) 0 0 + +let escaped_string ?esc_ctrl c cs s = + buffer_add_escaped_string ?esc_ctrl (C.buffer c) cs s + +let buffer_add_escaped_text b s = + let esc_first b s = match s.[0] with + | '-' | '+' | '_' | '=' as c -> + Buffer.add_char b '\\'; Buffer.add_char b c; true + | _ -> false + in + let esc_amp s max next = + next <= max && (Cmarkit_base.Ascii.is_letter s.[next] || s.[next] = '#') + in + let esc_tilde s max prev next = + not (Char.equal prev '~') && next <= max && s.[next] = '~' + in + let esc_item_marker s i prev = + if prev <> '1' then false else + let k = ref (i - 2) in + while !k >= 0 && s.[!k] = '0' do decr k done; + !k < 0 + in + let flush b max start i = + if start <= max then Buffer.add_substring b s start (i - start) + in + let rec loop b s max start prev i = + if i > max then flush b max start i else + let next = i + 1 in + let c = String.get s i in + if Cmarkit_base.Ascii.is_control c then + (flush b max start i; buffer_add_dec_esc b c; loop b s max next c next) + else match c with + | '#' | '`' when not (Char.equal prev c) -> + flush b max start i; buffer_add_bslash_esc b c; loop b s max next c next + | '~' when esc_tilde s max prev next -> + flush b max start i; buffer_add_bslash_esc b c; loop b s max next c next + | '&' when esc_amp s max next -> + flush b max start i; buffer_add_bslash_esc b c; loop b s max next c next + | '!' when i = max -> + flush b max start i; buffer_add_bslash_esc b c; loop b s max next c next + | '.' | ')' when esc_item_marker s i prev -> + flush b max start i; buffer_add_bslash_esc b c; loop b s max next c next + | '\\' | '<' | '>' | '[' | ']' | '*' | '_' | '$' | '|' -> + flush b max start i; buffer_add_bslash_esc b c; loop b s max next c next + | _ -> + loop b s max start c next + in + let max = String.length s - 1 in + if max < 0 then () else + if esc_first b s then loop b s max 1 s.[0] 1 else loop b s max 0 '\x00' 0 + +let escaped_text c s = buffer_add_escaped_text (C.buffer c) s + +(* Newlines, indentation and multi-line layouts of raw data. *) + +let string_node_option c = function None -> () | Some (s, _) -> C.string c s +let nchars c n char = for i = 1 to n do C.byte c char done + +let newline c = + (* Block generally introduce newlines, except the first one. *) + let st = get_state c in if st.sot then st.sot <- false else C.string c st.nl + +let push_indent c n = let st = get_state c in st.indents <- n :: st.indents +let pop_indent c = + let st = get_state c in + match st.indents with [] -> () | ns -> st.indents <- (List.tl ns) + +let rec indent c = + let rec loop c acc = function + | [] -> acc + | `I n as i :: is -> + nchars c n ' '; loop c (i :: acc) is + | `Q n as i :: is -> + nchars c n ' '; C.byte c '>'; C.byte c ' '; loop c (i :: acc) is + | `L (before, m, after, task) :: is -> + nchars c before ' '; C.string c m; nchars c after ' '; + let after = match task with + | None -> after + | Some u -> C.byte c '['; C.utf_8_uchar c u; C.string c "] "; after + 4 + in + (* On the next call we'll just indent for the list item *) + loop c (`I (before + String.length m + after) :: acc) is + | `Fn (before, label) :: is -> + nchars c before ' '; + C.byte c '['; link_label_lines c (Label.text label); + C.string c "]:"; + (* On the next call we'll just indent to ^ for the footnote *) + loop c (`I (before + 1) :: acc) is + in + let st = get_state c in + st.indents <- loop c [] (List.rev st.indents) + +and link_label_lines c lines = + escaped_tight_block_lines c esc_link_label lines + +and escaped_tight_block_lines c cs = function +| [] -> () | l :: ls -> + let tight c (blanks, (l, _)) = C.string c blanks; escaped_string c cs l in + let line c l = newline c; indent c; tight c l in + tight c l; List.iter (line c) ls + +let block_lines c = function +| [] -> () | (l, _) :: ls -> + let line c (l, _) = newline c; indent c; C.string c l in + C.string c l; List.iter (line c) ls + +let tight_block_lines c = function +| [] -> () | l :: ls -> + let tight c (blanks, (l, _)) = C.string c blanks; C.string c l in + let line c l = newline c; indent c; tight c l in + tight c l; List.iter (line c) ls + +(* Inline rendering *) + +let autolink c a = + C.byte c '<'; C.string c (fst (Inline.Autolink.link a)); C.byte c '>' + +let break c b = + let layout_before = fst (Inline.Break.layout_before b) in + let layout_after = fst (Inline.Break.layout_after b) in + let before, after = match Inline.Break.type' b with + | `Soft -> layout_before, layout_after + | `Hard -> (if layout_before = "" then " " else layout_before), layout_after + in + C.string c before; newline c; indent c; C.string c after + +let code_span c cs = + nchars c (Inline.Code_span.backtick_count cs) '`'; + tight_block_lines c (Inline.Code_span.code_layout cs); + nchars c (Inline.Code_span.backtick_count cs) '`' + +let emphasis c e = + let delim = Inline.Emphasis.delim e and i = Inline.Emphasis.inline e in + let delim = if not (delim = '*' || delim = '_') then '*' else delim in + C.byte c delim; C.inline c i; C.byte c delim + +let strong_emphasis c e = + let delim = Inline.Emphasis.delim e and i = Inline.Emphasis.inline e in + let delim = if not (delim = '*' || delim = '_') then '*' else delim in + C.byte c delim; C.byte c delim; C.inline c i; C.byte c delim; C.byte c delim + +let link_title c open_delim title = match title with +| None -> () +| Some lines -> + let open', close, escapes = match open_delim with + | '\"' as delim -> delim, delim, esc_dquote + | '\'' as delim -> delim, delim, esc_quote + | '(' -> '(', ')', esc_parens + | _ -> '\"', '\"', esc_dquote + in + C.byte c open'; escaped_tight_block_lines c escapes lines; C.byte c close + +let link_definition c ld = + let layout = Link_definition.layout ld in + block_lines c layout.before_dest; + begin match Link_definition.dest ld with + | None -> () + | Some (dest, _) -> + if layout.angled_dest + then (C.byte c '<'; escaped_string c esc_angles dest; C.byte c '>') + else (escaped_string c esc_parens dest) + end; + if layout.after_dest = [] && + Option.is_some (Link_definition.dest ld) && + Option.is_some (Link_definition.title ld) + then C.byte c ' ' (* at least a space is needed *); + block_lines c layout.after_dest; + link_title c layout.title_open_delim (Link_definition.title ld); + block_lines c layout.after_title + +let link c l = match Inline.Link.reference l with +| `Inline (ld, _) -> + C.byte c '['; C.inline c (Inline.Link.text l); C.byte c ']'; + C.byte c '('; link_definition c ld; C.byte c ')' +| `Ref (`Shortcut, label, _) -> + C.byte c '['; link_label_lines c (Label.text label); C.byte c ']'; +| `Ref (`Collapsed, label, _) -> + C.byte c '['; link_label_lines c (Label.text label); C.byte c ']'; + C.string c "[]" +| `Ref (`Full, label, _) -> + C.byte c '['; C.inline c (Inline.Link.text l); C.byte c ']'; + C.byte c '['; link_label_lines c (Label.text label); C.byte c ']' + +let inlines c is = List.iter (C.inline c) is +let image c l = C.byte c '!'; link c l +let raw_html c h = tight_block_lines c h +let text c t = escaped_text c t + +let strikethrough c s = + let i = Inline.Strikethrough.inline s in + C.string c "~~"; C.inline c i; C.string c "~~" + +let math_span c ms = + let sep = if Inline.Math_span.display ms then "$$" else "$" in + C.string c sep; + tight_block_lines c (Inline.Math_span.tex_layout ms); + C.string c sep + +let inline c = function +| Inline.Autolink (a, _) -> autolink c a; true +| Inline.Break (b, _) -> break c b; true +| Inline.Code_span (cs, _) -> code_span c cs; true +| Inline.Emphasis (e, _) -> emphasis c e; true +| Inline.Image (i, _) -> image c i; true +| Inline.Inlines (is, _) -> inlines c is; true +| Inline.Link (l, _) -> link c l; true +| Inline.Raw_html (html, _) -> raw_html c html; true +| Inline.Strong_emphasis (e, _) -> strong_emphasis c e; true +| Inline.Text (t, _) -> text c t; true +| Inline.Ext_strikethrough (s, _) -> strikethrough c s; true +| Inline.Ext_math_span (m, _) -> math_span c m; true +| _ -> C.string c ""; true + +(* Block rendering *) + +let blank_line c l = newline c; indent c; C.string c l + +let block_quote c bq = + push_indent c (`Q (Block.Block_quote.indent bq)); + C.block c (Block.Block_quote.block bq); pop_indent c + +let code_block c cb = match Block.Code_block.layout cb with +| `Indented -> + newline c; push_indent c (`I 4); indent c; + block_lines c (Block.Code_block.code cb); + pop_indent c +| `Fenced f -> + let opening, closing = match fst f.opening_fence with + | "" -> + let char, len = Block.Code_block.make_fence cb in + let f = String.make len char in + f, Some f + | opening -> opening, Option.map fst f.closing_fence + in + let info_string = Block.Code_block.info_string cb in + let code = Block.Code_block.code cb in + newline c; push_indent c (`I f.indent); indent c; + C.string c opening; string_node_option c info_string; + if code <> [] then (newline c; indent c; block_lines c code); + (match closing with + | None -> () | Some close -> newline c; indent c; C.string c close); + pop_indent c + +let heading c h = + newline c; indent c; + match (Block.Heading.layout h) with + | `Atx { indent; after_opening; closing } -> + let inline = Block.Heading.inline h in + nchars c indent ' '; + nchars c (Block.Heading.level h) '#'; + (if after_opening = "" && not (Cmarkit.Inline.is_empty inline) + then C.byte c ' ' else C.string c after_opening); + C.inline c inline; + C.string c closing + | `Setext l -> + let u = match Block.Heading.level h with 1 -> '=' | 2 -> '-' | _ -> '-' in + nchars c l.leading_indent ' '; + C.inline c (Block.Heading.inline h); + C.string c l.trailing_blanks; + newline c; indent c; + nchars c l.underline_indent ' '; + nchars c (fst l.underline_count) u; + C.string c l.underline_blanks + +let html_block c h = newline c; indent c; block_lines c h + +let link_reference_definition c ld = + newline c; indent c; nchars c (Link_definition.layout ld).indent ' '; + C.byte c '['; + begin match Link_definition.label ld with + | None -> () + | Some label -> escaped_tight_block_lines c esc_link_label (Label.text label) + end; + C.string c "]:"; + link_definition c ld + +let unordered_item c marker (i, _) = + let before = Block.List_item.before_marker i in + let after = Block.List_item.after_marker i in + let task = Option.map fst (Block.List_item.ext_task_marker i) in + push_indent c (`L (before, marker, after, task)); + C.block c (Block.List_item.block i); + pop_indent c + +let ordered_item c sep num (i, _) = + let before = Block.List_item.before_marker i in + let marker = fst (Block.List_item.marker i) in + let marker = if marker = "" then Int.to_string num ^ sep else marker in + let after = Block.List_item.after_marker i in + let task = Option.map fst (Block.List_item.ext_task_marker i) in + push_indent c (`L (before, marker, after, task)); + C.block c (Block.List_item.block i); + pop_indent c; + num + 1 + +let list c l = match Block.List'.type' l with +| `Unordered marker -> + let marker = match marker with '*' | '-' | '+' -> marker | _ -> '*' in + let marker = String.make 1 marker in + List.iter (unordered_item c marker) (Block.List'.items l) +| `Ordered (start, sep) -> + let sep = if sep <> '.' && sep <> ')' then '.' else sep in + let sep = String.make 1 sep in + ignore (List.fold_left (ordered_item c sep) start (Block.List'.items l)) + +let paragraph c p = + newline c; indent c; + nchars c (Block.Paragraph.leading_indent p) ' '; + C.inline c (Block.Paragraph.inline p); + C.string c (Block.Paragraph.trailing_blanks p) + +let thematic_break c t = + let ind = Block.Thematic_break.indent t in + let break = Block.Thematic_break.layout t in + let break = if break = "" then "---" else break in + newline c; indent c; nchars c ind ' '; C.string c break + +let table c t = + let col c (i, (before, after)) = + C.byte c '|'; C.string c before; C.inline c i; C.string c after + in + let sep c ((align, len), _) = + C.byte c '|'; + match align with + | None -> nchars c len '-' + | Some `Left -> C.byte c ':'; nchars c len '-' + | Some `Center -> C.byte c ':'; nchars c len '-'; C.byte c ':' + | Some `Right -> nchars c len '-'; C.byte c ':' + in + let row c = function + | (`Header cols, _), blanks | (`Data cols, _), blanks -> + newline c; indent c; + (if cols = [] then C.byte c '|' else List.iter (col c) cols); + C.byte c '|'; C.string c blanks + | (`Sep seps, _), blanks -> + newline c; indent c; + (if seps = [] then C.byte c '|' else List.iter (sep c) seps); + C.byte c '|'; C.string c blanks + in + push_indent c (`I (Block.Table.indent t)); + List.iter (row c) (Block.Table.rows t); + pop_indent c + +let footnote c fn = + push_indent c (`Fn (Block.Footnote.indent fn, Block.Footnote.label fn)); + C.block c (Block.Footnote.block fn); + pop_indent c + +let block c = function +| Block.Blank_line (l, _) -> blank_line c l; true +| Block.Block_quote (b, _) -> block_quote c b; true +| Block.Blocks (bs, _) -> List.iter (C.block c) bs; true +| Block.Code_block (cb, _) -> code_block c cb; true +| Block.Heading (h, _) -> heading c h; true +| Block.Html_block (h, _) -> html_block c h; true +| Block.Link_reference_definition (ld, _) -> + link_reference_definition c ld; true +| Block.List (l, _) -> list c l; true +| Block.Paragraph (p, _) -> paragraph c p; true +| Block.Thematic_break (t, _) -> thematic_break c t; true +| Block.Ext_math_block (cb, _) -> code_block c cb; true +| Block.Ext_table (t, _) -> table c t; true +| Block.Ext_footnote_definition (t, _) -> footnote c t; true +| _ -> newline c; indent c; C.string c ""; true + +(* Document rendering *) + +let doc c d = C.block c (Doc.block d); true + +(* Renderer *) + +let renderer () = Cmarkit_renderer.make ~init_context ~inline ~block ~doc () +let of_doc d = Cmarkit_renderer.doc_to_string (renderer ()) d + +(*--------------------------------------------------------------------------- + Copyright (c) 2023 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_commonmark.mli b/ocaml-lsp-server/vendor/cmarkit/cmarkit_commonmark.mli new file mode 100644 index 000000000..4dd78d93a --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_commonmark.mli @@ -0,0 +1,266 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2023 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +(** Rendering CommonMark to CommonMark. + + Generates CommonMark. If your document was parsed with + [layout:true], it preserves most of the source layout on output. + This won't be perfect, make sure you understand the + {{!layout}details} before reporting issues. + + See {{!page-index.quick}an example}. + + {b Warning.} Rendering outputs are unstable. They may be tweaked + even between minor versions of the library. *) + +(** {1:rendering Rendering} *) + +val of_doc : Cmarkit.Doc.t -> string +(** [of_doc d] is a CommonMark document for [d]. See {!val-renderer} for + more details. *) + +(** {1:renderer Renderer} *) + +val renderer : unit -> Cmarkit_renderer.t +(** [renderer ()] is the default CommonMark renderer. This renders + the strict CommonMark abstract syntax tree and the supported + Cmarkit {{!Cmarkit.extensions}extensions}. + + The inline, block and document renderers always return + [true]. Unknown block and inline values are rendered by an HTML + comment (as permitted by the CommonMark specification). + + See {{!Cmarkit_renderer.example}this example} to extend or + selectively override the renderer. *) + +(** {1:render Render functions} + + Only useful if you extend the renderer. *) + +(** {2:indents Newlines and indentation} *) + +val newline : Cmarkit_renderer.context -> unit +(** [newline c] starts a new line, except on the first call on [c] which is + a nop. *) + +type indent = +[ `I of int (** Identation by given amount. *) +| `L of int * string * int * Uchar.t option + (** Indent before, list marker, indent after, list item task extension *) +| `Q of int (** Identation followed by a block quote marker and a space *) +| `Fn of int * Cmarkit.Label.t (** Indent before, label (footnote extension)*)] +(** The type for specifying block indentation. *) + +val push_indent : Cmarkit_renderer.context -> indent -> unit +(** [push_indent c i] pushes [i] on the current indentation of [c]. This + does not render anything. *) + +val pop_indent : Cmarkit_renderer.context -> unit +(** [pop_indent c] pops the last indentation pushed on [c]. This + does not render anything. *) + +val indent : Cmarkit_renderer.context -> unit +(** [indent i c] outputs current indentation on [c]. Note that [`L] + and [`Fn] get replaced by an [`I] indent on subsequent lines, that + is the list or foonote marker is output only once. *) + +(** {2:bslash Backslash escaping} *) + +module Char_set : Set.S with type elt = char +(** Sets of US-ASCII characters. *) + +val escaped_string : + ?esc_ctrl:bool -> Cmarkit_renderer.context -> Char_set.t -> string -> unit +(** [escaped_string ?esc_ctrl c cs s] renders [s] on [c] with + characters in [cs] backslash escaped. If [esc_ctrl] is [true] + (default) {{:https://spec.commonmark.org/0.30/#ascii-control-character} + ASCII control characters} are escaped to decimal escapes. *) + +val buffer_add_escaped_string : + ?esc_ctrl:bool -> Buffer.t -> Char_set.t -> string -> unit +(** [buffer_add_escaped_string b cs s] is {!escaped_string} but + appends to a buffer value. *) + +val escaped_text : Cmarkit_renderer.context -> string -> unit +(** [escaped_text c s] renders [s] on [c] trying to be smart about escaping + Commonmark structural symbols for {!Cmarkit.Inline.extension-Text} inlines. + We assume text can be anywhere in a sequence of inlines and in particular + that it can start a line. This function also takes into account + the existence of the {{!Cmarkit.extensions}extensions}. + + As such we escape: + + {ul + {- These block markers: [-] [+] [_] [=] only if present at [s.[0]].} + {- Only the first of runs of them: [#] [`]} + {- Only the first of a run longer than 1: [~] + ({{!Cmarkit.ext_strikethrough}strikethrough extension}).} + {- [&] if followed by an US-ASCII letter or [#].} + {- [!] if it is the last character of [s].} + {- [.] or [)] only if preceeded by a single [1] and zero or more [0] to + the start of text.} + {- Everywhere, [*] [_] [\ ] [<] [>] [\[] [\]], + {{:https://spec.commonmark.org/0.30/#ascii-control-character} + ASCII control characters}, [$] ({{!Cmarkit.ext_math_inline}inline math + extension}), [|] ({{!Cmarkit.ext_tables}table extension}) }} *) + +val buffer_add_escaped_text : Buffer.t -> string -> unit +(** [buffer_add_escaped_text b s] is {!escaped_text} but appends to + a buffer value. *) + +(** {1:layout Source layout preservation} + + The abstract syntax tree has a few block cases and data fields to + represent the source document layout. This allows to update + CommonMark documents without normalizing them too much when they + are {{!Cmarkit.Doc.of_string}parsed} with [layout:true]. + + To keep things reasonably simple a few things are {b not} attempted like: + + {ol + {- Preserving entities and character references.} + {- Preserving the exact line by line indentation layout of container + blocks.} + {- Preserving lazy continuation lines.} + {- Keeping track of used newlines except for the first one.} + {- Preserving layout source location information when it can be + reconstructed from the document data source location.}} + + In general we try to keep the following desirable properties + for the abstract syntax tree definition: + + {ol + {- Layout information should not interfere with document data or + be affected by it. Otherwise data updates also needs to update + the layout data, which is error prone and unconvenient.} + {- Abstract syntax trees resulting from the source document, from + renders of the source document parsed with or without + [layout:tree] should all render to the same HTML.}} + + In practice CommonMark being not context free point 1. is not + always achieved. In particular in {!Cmarkit.Inline.extension-Code_span} the + number of delimiting backticks depends on the code content + ({!Cmarkit.Inline.Code_span.of_string}, computes that for you). + + The renderer performs almost no checks on the layout data. You + should be careful if you fill these yourself since you could + generate CommonMark that will be misinterpreted. Layout + data of pristine nodes coming out of {!Cmarkit.Doc.of_string}, created + with the {!Cmarkit.Inline} and {!Cmarkit.Block} constructors + should not need your attention (respect their input constraints + though). *) + +(** {2:rendering_class Classifying renderings} + + We say that a CommonMark render: + {ul + {- is {e correct}, if the result renders the same HTML + as the source document. This can be checked with the + [cmarkit] tool included in the distribution: + {[ + cmarkit commonmark --html-diff mydoc.md + ]} + If a difference shows up, the rendering is said to be {e incorrect}.} + {- {e round trips}, if the result is byte-for-byte equal to the + source document. This can be checked with the [cmarkit] tool + included in the distribution: + {[ + cmarkit commonmark --diff mydoc.md + ]} + If a difference shows up, the rendering does not round trip but + it may still be correct.}} *) + +(** {2:known_diffs Known correct differences} + + In general lack of round trip is due to: + + {ul + {- Loss of layout on input (see above).} + {- Eager escaping of CommonMark delimiters (the escape strategy + is {{!escaped_text}here}).} + {- Churn around blank lines which can be part of blocks without + adhering to their structural convention.}} + + Please do not report issues for differences that are due to the + following: + + {ol + {- Source US-ASCII control characters in textual data render as decimal + character references in the output.} + {- Source entity and character references are lost during parsing and + thus replaced by their definition in the output.} + {- Source tab stop advances may be replaced by spaces in the output.} + {- Source escaped characters may end up unescaped in the output.} + {- Source unescaped characters may end up escaped in the output.} + {- Source lazy continuation lines are made part of blocks in the output.} + {- Source indented blank lines following indented code blocks + lose four spaces of indentation (as per specification these are not + part of the block).} + {- Source unindented blank lines in indented code blocks are indented + in the output.} + {- Source fenced code block indentation is retained from the opening + fence and used for the following lines in the output.} + {- Source block quote indentation is retained from the first line + and used for the following lines in the output. The optional space + following the quotation mark ['>'] is made mandatory. } + {- Source list item indentation is regularized, in particular blank lines + will indent.} + {- Source list item that start with an empty line get a space after + their marker.} + {- The newline used in the output is the one found in the rendered + {!Cmarkit.Doc.t} value.}} + + {e Simple} and {e implemented} round trip improvements to the + renderer are welcome. + + {2:known_incorrect Known incorrect renderings} + + Please do not report issues incorrect renderings that are due to the + following (and unlikely to be fixed): + + {ol + {- Use of entities and character references around structural + CommonMark symbols can make things go wrong. These get resolved + after inline parsing because they can't be used to stand for + structural CommonMark symbols, however once they have been resolved they + can interact with parsing. Here is an example: + {[ + *emph * + ]} + It parses as emphasis. But if we render it to CommonMark + non-breaking space renders as is and we get: + {[ + *emph * + ]} + which no longer parses as emphasis. + + Note in this particular case it is possible to do something + about it by being smarter about the context when escaping. However + there's a trade-off between renderer complexity and the (conjectured) + paucity of these cases.} + } + + Otherwise, if you spot an incorrect rendering please report a minimal + reproduction case. + + {e Simple} and {e implemented} round trip improvements to the + renderer are welcome. + *) + +(*--------------------------------------------------------------------------- + Copyright (c) 2023 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_data.ml b/ocaml-lsp-server/vendor/cmarkit/cmarkit_data.ml new file mode 100644 index 000000000..e3bc3716d --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_data.ml @@ -0,0 +1,59 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +(* Unicode character data + + XXX. For now we kept that simple and use the Stdlib's Set and + Maps. Bring in Uucp's tmapbool and tmap if that turns out to be too + costly in space or time. *) + +module Uset = struct + include Set.Make (Uchar) + let of_array = + let add acc u = add (Uchar.unsafe_of_int u) acc in + Array.fold_left add empty +end + +module Umap = struct + include Map.Make (Uchar) + let of_array = + let add acc (u, f) = add (Uchar.unsafe_of_int u) f acc in + Array.fold_left add empty +end + +let whitespace_uset = Uset.of_array Cmarkit_data_uchar.whitespace +let punctuation_uset = Uset.of_array Cmarkit_data_uchar.punctuation +let case_fold_umap = Umap.of_array Cmarkit_data_uchar.case_fold + +let unicode_version = Cmarkit_data_uchar.unicode_version +let is_unicode_whitespace u = Uset.mem u whitespace_uset +let is_unicode_punctuation u = Uset.mem u punctuation_uset +let unicode_case_fold u = Umap.find_opt u case_fold_umap + +(* HTML entity data. *) + +module String_map = Map.Make (String) + +let html_entity_smap = + let add acc (entity, rep) = String_map.add entity rep acc in + Array.fold_left add String_map.empty Cmarkit_data_html.entities + +let html_entity e = String_map.find_opt e html_entity_smap + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_data.mli b/ocaml-lsp-server/vendor/cmarkit/cmarkit_data.mli new file mode 100644 index 000000000..7cd98bcb9 --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_data.mli @@ -0,0 +1,50 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +(** Data needed for CommonMark parsing. *) + +(** {1:unicode Unicode data} *) + +val unicode_version : string +(** [unicode_version] is the supported Unicode version. *) + +val is_unicode_whitespace : Uchar.t -> bool +(** [is_unicode_whitespace u] is [true] iff + [u] is a CommonMark + {{:https://spec.commonmark.org/current/#unicode-whitespace-character} + Unicode whitespace character}. *) + +val is_unicode_punctuation : Uchar.t -> bool +(** [is_unicode_punctuation u] is [true] iff + [u] is a CommonMark + {{:https://spec.commonmark.org/current/#unicode-punctuation-character} + Unicode punctuation character}. *) + +val unicode_case_fold : Uchar.t -> string option +(** [unicode_case_fold u] is the UTF-8 encoding of [u]'s Unicode + {{:http://www.unicode.org/reports/tr44/#Case_Folding}case fold} or + [None] if [u] case folds to itself. *) + +(** {1:html HTML data} *) + +val html_entity : string -> string option +(** [html_entity e] is the UTF-8 data for of the HTML entity {e name} + (without [&] and [;]) [e]. *) + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_data_html.ml b/ocaml-lsp-server/vendor/cmarkit/cmarkit_data_html.ml new file mode 100644 index 000000000..c53b62244 --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_data_html.ml @@ -0,0 +1,2165 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +(* Munged from https://html.spec.whatwg.org/entities.json + Note that some entities map to sequences of characters. *) + +let entities = [| +(* Generated with: +cat entities.json | \ +# Extract entities and code points +jq -r \ +'map_values(.codepoints)|to_entries|.[]|[.key,(.value|join(" "))]|join (", ")' | \ +# Drop entities from the list that are not closed by a ';' +grep ';' | \ +# Keep only the name of entities +sed 's/[&;]//g' | +# Replace decimal code points by OCaml Unicode escapes +perl -pe 's/ \d{1,}/sprintf " \\u{%04X}", $&/ge' | +# Convert the lines to OCaml syntax +sed 's/ //g;s/^/"/g;s/$/";/g;s/;,/", "/g' +*) +"AElig", "\u{00C6}"; +"AMP", "\u{0026}"; +"Aacute", "\u{00C1}"; +"Abreve", "\u{0102}"; +"Acirc", "\u{00C2}"; +"Acy", "\u{0410}"; +"Afr", "\u{1D504}"; +"Agrave", "\u{00C0}"; +"Alpha", "\u{0391}"; +"Amacr", "\u{0100}"; +"And", "\u{2A53}"; +"Aogon", "\u{0104}"; +"Aopf", "\u{1D538}"; +"ApplyFunction", "\u{2061}"; +"Aring", "\u{00C5}"; +"Ascr", "\u{1D49C}"; +"Assign", "\u{2254}"; +"Atilde", "\u{00C3}"; +"Auml", "\u{00C4}"; +"Backslash", "\u{2216}"; +"Barv", "\u{2AE7}"; +"Barwed", "\u{2306}"; +"Bcy", "\u{0411}"; +"Because", "\u{2235}"; +"Bernoullis", "\u{212C}"; +"Beta", "\u{0392}"; +"Bfr", "\u{1D505}"; +"Bopf", "\u{1D539}"; +"Breve", "\u{02D8}"; +"Bscr", "\u{212C}"; +"Bumpeq", "\u{224E}"; +"CHcy", "\u{0427}"; +"COPY", "\u{00A9}"; +"Cacute", "\u{0106}"; +"Cap", "\u{22D2}"; +"CapitalDifferentialD", "\u{2145}"; +"Cayleys", "\u{212D}"; +"Ccaron", "\u{010C}"; +"Ccedil", "\u{00C7}"; +"Ccirc", "\u{0108}"; +"Cconint", "\u{2230}"; +"Cdot", "\u{010A}"; +"Cedilla", "\u{00B8}"; +"CenterDot", "\u{00B7}"; +"Cfr", "\u{212D}"; +"Chi", "\u{03A7}"; +"CircleDot", "\u{2299}"; +"CircleMinus", "\u{2296}"; +"CirclePlus", "\u{2295}"; +"CircleTimes", "\u{2297}"; +"ClockwiseContourIntegral", "\u{2232}"; +"CloseCurlyDoubleQuote", "\u{201D}"; +"CloseCurlyQuote", "\u{2019}"; +"Colon", "\u{2237}"; +"Colone", "\u{2A74}"; +"Congruent", "\u{2261}"; +"Conint", "\u{222F}"; +"ContourIntegral", "\u{222E}"; +"Copf", "\u{2102}"; +"Coproduct", "\u{2210}"; +"CounterClockwiseContourIntegral", "\u{2233}"; +"Cross", "\u{2A2F}"; +"Cscr", "\u{1D49E}"; +"Cup", "\u{22D3}"; +"CupCap", "\u{224D}"; +"DD", "\u{2145}"; +"DDotrahd", "\u{2911}"; +"DJcy", "\u{0402}"; +"DScy", "\u{0405}"; +"DZcy", "\u{040F}"; +"Dagger", "\u{2021}"; +"Darr", "\u{21A1}"; +"Dashv", "\u{2AE4}"; +"Dcaron", "\u{010E}"; +"Dcy", "\u{0414}"; +"Del", "\u{2207}"; +"Delta", "\u{0394}"; +"Dfr", "\u{1D507}"; +"DiacriticalAcute", "\u{00B4}"; +"DiacriticalDot", "\u{02D9}"; +"DiacriticalDoubleAcute", "\u{02DD}"; +"DiacriticalGrave", "\u{0060}"; +"DiacriticalTilde", "\u{02DC}"; +"Diamond", "\u{22C4}"; +"DifferentialD", "\u{2146}"; +"Dopf", "\u{1D53B}"; +"Dot", "\u{00A8}"; +"DotDot", "\u{20DC}"; +"DotEqual", "\u{2250}"; +"DoubleContourIntegral", "\u{222F}"; +"DoubleDot", "\u{00A8}"; +"DoubleDownArrow", "\u{21D3}"; +"DoubleLeftArrow", "\u{21D0}"; +"DoubleLeftRightArrow", "\u{21D4}"; +"DoubleLeftTee", "\u{2AE4}"; +"DoubleLongLeftArrow", "\u{27F8}"; +"DoubleLongLeftRightArrow", "\u{27FA}"; +"DoubleLongRightArrow", "\u{27F9}"; +"DoubleRightArrow", "\u{21D2}"; +"DoubleRightTee", "\u{22A8}"; +"DoubleUpArrow", "\u{21D1}"; +"DoubleUpDownArrow", "\u{21D5}"; +"DoubleVerticalBar", "\u{2225}"; +"DownArrow", "\u{2193}"; +"DownArrowBar", "\u{2913}"; +"DownArrowUpArrow", "\u{21F5}"; +"DownBreve", "\u{0311}"; +"DownLeftRightVector", "\u{2950}"; +"DownLeftTeeVector", "\u{295E}"; +"DownLeftVector", "\u{21BD}"; +"DownLeftVectorBar", "\u{2956}"; +"DownRightTeeVector", "\u{295F}"; +"DownRightVector", "\u{21C1}"; +"DownRightVectorBar", "\u{2957}"; +"DownTee", "\u{22A4}"; +"DownTeeArrow", "\u{21A7}"; +"Downarrow", "\u{21D3}"; +"Dscr", "\u{1D49F}"; +"Dstrok", "\u{0110}"; +"ENG", "\u{014A}"; +"ETH", "\u{00D0}"; +"Eacute", "\u{00C9}"; +"Ecaron", "\u{011A}"; +"Ecirc", "\u{00CA}"; +"Ecy", "\u{042D}"; +"Edot", "\u{0116}"; +"Efr", "\u{1D508}"; +"Egrave", "\u{00C8}"; +"Element", "\u{2208}"; +"Emacr", "\u{0112}"; +"EmptySmallSquare", "\u{25FB}"; +"EmptyVerySmallSquare", "\u{25AB}"; +"Eogon", "\u{0118}"; +"Eopf", "\u{1D53C}"; +"Epsilon", "\u{0395}"; +"Equal", "\u{2A75}"; +"EqualTilde", "\u{2242}"; +"Equilibrium", "\u{21CC}"; +"Escr", "\u{2130}"; +"Esim", "\u{2A73}"; +"Eta", "\u{0397}"; +"Euml", "\u{00CB}"; +"Exists", "\u{2203}"; +"ExponentialE", "\u{2147}"; +"Fcy", "\u{0424}"; +"Ffr", "\u{1D509}"; +"FilledSmallSquare", "\u{25FC}"; +"FilledVerySmallSquare", "\u{25AA}"; +"Fopf", "\u{1D53D}"; +"ForAll", "\u{2200}"; +"Fouriertrf", "\u{2131}"; +"Fscr", "\u{2131}"; +"GJcy", "\u{0403}"; +"GT", "\u{003E}"; +"Gamma", "\u{0393}"; +"Gammad", "\u{03DC}"; +"Gbreve", "\u{011E}"; +"Gcedil", "\u{0122}"; +"Gcirc", "\u{011C}"; +"Gcy", "\u{0413}"; +"Gdot", "\u{0120}"; +"Gfr", "\u{1D50A}"; +"Gg", "\u{22D9}"; +"Gopf", "\u{1D53E}"; +"GreaterEqual", "\u{2265}"; +"GreaterEqualLess", "\u{22DB}"; +"GreaterFullEqual", "\u{2267}"; +"GreaterGreater", "\u{2AA2}"; +"GreaterLess", "\u{2277}"; +"GreaterSlantEqual", "\u{2A7E}"; +"GreaterTilde", "\u{2273}"; +"Gscr", "\u{1D4A2}"; +"Gt", "\u{226B}"; +"HARDcy", "\u{042A}"; +"Hacek", "\u{02C7}"; +"Hat", "\u{005E}"; +"Hcirc", "\u{0124}"; +"Hfr", "\u{210C}"; +"HilbertSpace", "\u{210B}"; +"Hopf", "\u{210D}"; +"HorizontalLine", "\u{2500}"; +"Hscr", "\u{210B}"; +"Hstrok", "\u{0126}"; +"HumpDownHump", "\u{224E}"; +"HumpEqual", "\u{224F}"; +"IEcy", "\u{0415}"; +"IJlig", "\u{0132}"; +"IOcy", "\u{0401}"; +"Iacute", "\u{00CD}"; +"Icirc", "\u{00CE}"; +"Icy", "\u{0418}"; +"Idot", "\u{0130}"; +"Ifr", "\u{2111}"; +"Igrave", "\u{00CC}"; +"Im", "\u{2111}"; +"Imacr", "\u{012A}"; +"ImaginaryI", "\u{2148}"; +"Implies", "\u{21D2}"; +"Int", "\u{222C}"; +"Integral", "\u{222B}"; +"Intersection", "\u{22C2}"; +"InvisibleComma", "\u{2063}"; +"InvisibleTimes", "\u{2062}"; +"Iogon", "\u{012E}"; +"Iopf", "\u{1D540}"; +"Iota", "\u{0399}"; +"Iscr", "\u{2110}"; +"Itilde", "\u{0128}"; +"Iukcy", "\u{0406}"; +"Iuml", "\u{00CF}"; +"Jcirc", "\u{0134}"; +"Jcy", "\u{0419}"; +"Jfr", "\u{1D50D}"; +"Jopf", "\u{1D541}"; +"Jscr", "\u{1D4A5}"; +"Jsercy", "\u{0408}"; +"Jukcy", "\u{0404}"; +"KHcy", "\u{0425}"; +"KJcy", "\u{040C}"; +"Kappa", "\u{039A}"; +"Kcedil", "\u{0136}"; +"Kcy", "\u{041A}"; +"Kfr", "\u{1D50E}"; +"Kopf", "\u{1D542}"; +"Kscr", "\u{1D4A6}"; +"LJcy", "\u{0409}"; +"LT", "\u{003C}"; +"Lacute", "\u{0139}"; +"Lambda", "\u{039B}"; +"Lang", "\u{27EA}"; +"Laplacetrf", "\u{2112}"; +"Larr", "\u{219E}"; +"Lcaron", "\u{013D}"; +"Lcedil", "\u{013B}"; +"Lcy", "\u{041B}"; +"LeftAngleBracket", "\u{27E8}"; +"LeftArrow", "\u{2190}"; +"LeftArrowBar", "\u{21E4}"; +"LeftArrowRightArrow", "\u{21C6}"; +"LeftCeiling", "\u{2308}"; +"LeftDoubleBracket", "\u{27E6}"; +"LeftDownTeeVector", "\u{2961}"; +"LeftDownVector", "\u{21C3}"; +"LeftDownVectorBar", "\u{2959}"; +"LeftFloor", "\u{230A}"; +"LeftRightArrow", "\u{2194}"; +"LeftRightVector", "\u{294E}"; +"LeftTee", "\u{22A3}"; +"LeftTeeArrow", "\u{21A4}"; +"LeftTeeVector", "\u{295A}"; +"LeftTriangle", "\u{22B2}"; +"LeftTriangleBar", "\u{29CF}"; +"LeftTriangleEqual", "\u{22B4}"; +"LeftUpDownVector", "\u{2951}"; +"LeftUpTeeVector", "\u{2960}"; +"LeftUpVector", "\u{21BF}"; +"LeftUpVectorBar", "\u{2958}"; +"LeftVector", "\u{21BC}"; +"LeftVectorBar", "\u{2952}"; +"Leftarrow", "\u{21D0}"; +"Leftrightarrow", "\u{21D4}"; +"LessEqualGreater", "\u{22DA}"; +"LessFullEqual", "\u{2266}"; +"LessGreater", "\u{2276}"; +"LessLess", "\u{2AA1}"; +"LessSlantEqual", "\u{2A7D}"; +"LessTilde", "\u{2272}"; +"Lfr", "\u{1D50F}"; +"Ll", "\u{22D8}"; +"Lleftarrow", "\u{21DA}"; +"Lmidot", "\u{013F}"; +"LongLeftArrow", "\u{27F5}"; +"LongLeftRightArrow", "\u{27F7}"; +"LongRightArrow", "\u{27F6}"; +"Longleftarrow", "\u{27F8}"; +"Longleftrightarrow", "\u{27FA}"; +"Longrightarrow", "\u{27F9}"; +"Lopf", "\u{1D543}"; +"LowerLeftArrow", "\u{2199}"; +"LowerRightArrow", "\u{2198}"; +"Lscr", "\u{2112}"; +"Lsh", "\u{21B0}"; +"Lstrok", "\u{0141}"; +"Lt", "\u{226A}"; +"Map", "\u{2905}"; +"Mcy", "\u{041C}"; +"MediumSpace", "\u{205F}"; +"Mellintrf", "\u{2133}"; +"Mfr", "\u{1D510}"; +"MinusPlus", "\u{2213}"; +"Mopf", "\u{1D544}"; +"Mscr", "\u{2133}"; +"Mu", "\u{039C}"; +"NJcy", "\u{040A}"; +"Nacute", "\u{0143}"; +"Ncaron", "\u{0147}"; +"Ncedil", "\u{0145}"; +"Ncy", "\u{041D}"; +"NegativeMediumSpace", "\u{200B}"; +"NegativeThickSpace", "\u{200B}"; +"NegativeThinSpace", "\u{200B}"; +"NegativeVeryThinSpace", "\u{200B}"; +"NestedGreaterGreater", "\u{226B}"; +"NestedLessLess", "\u{226A}"; +"NewLine", "\u{000A}"; +"Nfr", "\u{1D511}"; +"NoBreak", "\u{2060}"; +"NonBreakingSpace", "\u{00A0}"; +"Nopf", "\u{2115}"; +"Not", "\u{2AEC}"; +"NotCongruent", "\u{2262}"; +"NotCupCap", "\u{226D}"; +"NotDoubleVerticalBar", "\u{2226}"; +"NotElement", "\u{2209}"; +"NotEqual", "\u{2260}"; +"NotEqualTilde", "\u{2242}\u{0338}"; +"NotExists", "\u{2204}"; +"NotGreater", "\u{226F}"; +"NotGreaterEqual", "\u{2271}"; +"NotGreaterFullEqual", "\u{2267}\u{0338}"; +"NotGreaterGreater", "\u{226B}\u{0338}"; +"NotGreaterLess", "\u{2279}"; +"NotGreaterSlantEqual", "\u{2A7E}\u{0338}"; +"NotGreaterTilde", "\u{2275}"; +"NotHumpDownHump", "\u{224E}\u{0338}"; +"NotHumpEqual", "\u{224F}\u{0338}"; +"NotLeftTriangle", "\u{22EA}"; +"NotLeftTriangleBar", "\u{29CF}\u{0338}"; +"NotLeftTriangleEqual", "\u{22EC}"; +"NotLess", "\u{226E}"; +"NotLessEqual", "\u{2270}"; +"NotLessGreater", "\u{2278}"; +"NotLessLess", "\u{226A}\u{0338}"; +"NotLessSlantEqual", "\u{2A7D}\u{0338}"; +"NotLessTilde", "\u{2274}"; +"NotNestedGreaterGreater", "\u{2AA2}\u{0338}"; +"NotNestedLessLess", "\u{2AA1}\u{0338}"; +"NotPrecedes", "\u{2280}"; +"NotPrecedesEqual", "\u{2AAF}\u{0338}"; +"NotPrecedesSlantEqual", "\u{22E0}"; +"NotReverseElement", "\u{220C}"; +"NotRightTriangle", "\u{22EB}"; +"NotRightTriangleBar", "\u{29D0}\u{0338}"; +"NotRightTriangleEqual", "\u{22ED}"; +"NotSquareSubset", "\u{228F}\u{0338}"; +"NotSquareSubsetEqual", "\u{22E2}"; +"NotSquareSuperset", "\u{2290}\u{0338}"; +"NotSquareSupersetEqual", "\u{22E3}"; +"NotSubset", "\u{2282}\u{20D2}"; +"NotSubsetEqual", "\u{2288}"; +"NotSucceeds", "\u{2281}"; +"NotSucceedsEqual", "\u{2AB0}\u{0338}"; +"NotSucceedsSlantEqual", "\u{22E1}"; +"NotSucceedsTilde", "\u{227F}\u{0338}"; +"NotSuperset", "\u{2283}\u{20D2}"; +"NotSupersetEqual", "\u{2289}"; +"NotTilde", "\u{2241}"; +"NotTildeEqual", "\u{2244}"; +"NotTildeFullEqual", "\u{2247}"; +"NotTildeTilde", "\u{2249}"; +"NotVerticalBar", "\u{2224}"; +"Nscr", "\u{1D4A9}"; +"Ntilde", "\u{00D1}"; +"Nu", "\u{039D}"; +"OElig", "\u{0152}"; +"Oacute", "\u{00D3}"; +"Ocirc", "\u{00D4}"; +"Ocy", "\u{041E}"; +"Odblac", "\u{0150}"; +"Ofr", "\u{1D512}"; +"Ograve", "\u{00D2}"; +"Omacr", "\u{014C}"; +"Omega", "\u{03A9}"; +"Omicron", "\u{039F}"; +"Oopf", "\u{1D546}"; +"OpenCurlyDoubleQuote", "\u{201C}"; +"OpenCurlyQuote", "\u{2018}"; +"Or", "\u{2A54}"; +"Oscr", "\u{1D4AA}"; +"Oslash", "\u{00D8}"; +"Otilde", "\u{00D5}"; +"Otimes", "\u{2A37}"; +"Ouml", "\u{00D6}"; +"OverBar", "\u{203E}"; +"OverBrace", "\u{23DE}"; +"OverBracket", "\u{23B4}"; +"OverParenthesis", "\u{23DC}"; +"PartialD", "\u{2202}"; +"Pcy", "\u{041F}"; +"Pfr", "\u{1D513}"; +"Phi", "\u{03A6}"; +"Pi", "\u{03A0}"; +"PlusMinus", "\u{00B1}"; +"Poincareplane", "\u{210C}"; +"Popf", "\u{2119}"; +"Pr", "\u{2ABB}"; +"Precedes", "\u{227A}"; +"PrecedesEqual", "\u{2AAF}"; +"PrecedesSlantEqual", "\u{227C}"; +"PrecedesTilde", "\u{227E}"; +"Prime", "\u{2033}"; +"Product", "\u{220F}"; +"Proportion", "\u{2237}"; +"Proportional", "\u{221D}"; +"Pscr", "\u{1D4AB}"; +"Psi", "\u{03A8}"; +"QUOT", "\u{0022}"; +"Qfr", "\u{1D514}"; +"Qopf", "\u{211A}"; +"Qscr", "\u{1D4AC}"; +"RBarr", "\u{2910}"; +"REG", "\u{00AE}"; +"Racute", "\u{0154}"; +"Rang", "\u{27EB}"; +"Rarr", "\u{21A0}"; +"Rarrtl", "\u{2916}"; +"Rcaron", "\u{0158}"; +"Rcedil", "\u{0156}"; +"Rcy", "\u{0420}"; +"Re", "\u{211C}"; +"ReverseElement", "\u{220B}"; +"ReverseEquilibrium", "\u{21CB}"; +"ReverseUpEquilibrium", "\u{296F}"; +"Rfr", "\u{211C}"; +"Rho", "\u{03A1}"; +"RightAngleBracket", "\u{27E9}"; +"RightArrow", "\u{2192}"; +"RightArrowBar", "\u{21E5}"; +"RightArrowLeftArrow", "\u{21C4}"; +"RightCeiling", "\u{2309}"; +"RightDoubleBracket", "\u{27E7}"; +"RightDownTeeVector", "\u{295D}"; +"RightDownVector", "\u{21C2}"; +"RightDownVectorBar", "\u{2955}"; +"RightFloor", "\u{230B}"; +"RightTee", "\u{22A2}"; +"RightTeeArrow", "\u{21A6}"; +"RightTeeVector", "\u{295B}"; +"RightTriangle", "\u{22B3}"; +"RightTriangleBar", "\u{29D0}"; +"RightTriangleEqual", "\u{22B5}"; +"RightUpDownVector", "\u{294F}"; +"RightUpTeeVector", "\u{295C}"; +"RightUpVector", "\u{21BE}"; +"RightUpVectorBar", "\u{2954}"; +"RightVector", "\u{21C0}"; +"RightVectorBar", "\u{2953}"; +"Rightarrow", "\u{21D2}"; +"Ropf", "\u{211D}"; +"RoundImplies", "\u{2970}"; +"Rrightarrow", "\u{21DB}"; +"Rscr", "\u{211B}"; +"Rsh", "\u{21B1}"; +"RuleDelayed", "\u{29F4}"; +"SHCHcy", "\u{0429}"; +"SHcy", "\u{0428}"; +"SOFTcy", "\u{042C}"; +"Sacute", "\u{015A}"; +"Sc", "\u{2ABC}"; +"Scaron", "\u{0160}"; +"Scedil", "\u{015E}"; +"Scirc", "\u{015C}"; +"Scy", "\u{0421}"; +"Sfr", "\u{1D516}"; +"ShortDownArrow", "\u{2193}"; +"ShortLeftArrow", "\u{2190}"; +"ShortRightArrow", "\u{2192}"; +"ShortUpArrow", "\u{2191}"; +"Sigma", "\u{03A3}"; +"SmallCircle", "\u{2218}"; +"Sopf", "\u{1D54A}"; +"Sqrt", "\u{221A}"; +"Square", "\u{25A1}"; +"SquareIntersection", "\u{2293}"; +"SquareSubset", "\u{228F}"; +"SquareSubsetEqual", "\u{2291}"; +"SquareSuperset", "\u{2290}"; +"SquareSupersetEqual", "\u{2292}"; +"SquareUnion", "\u{2294}"; +"Sscr", "\u{1D4AE}"; +"Star", "\u{22C6}"; +"Sub", "\u{22D0}"; +"Subset", "\u{22D0}"; +"SubsetEqual", "\u{2286}"; +"Succeeds", "\u{227B}"; +"SucceedsEqual", "\u{2AB0}"; +"SucceedsSlantEqual", "\u{227D}"; +"SucceedsTilde", "\u{227F}"; +"SuchThat", "\u{220B}"; +"Sum", "\u{2211}"; +"Sup", "\u{22D1}"; +"Superset", "\u{2283}"; +"SupersetEqual", "\u{2287}"; +"Supset", "\u{22D1}"; +"THORN", "\u{00DE}"; +"TRADE", "\u{2122}"; +"TSHcy", "\u{040B}"; +"TScy", "\u{0426}"; +"Tab", "\u{0009}"; +"Tau", "\u{03A4}"; +"Tcaron", "\u{0164}"; +"Tcedil", "\u{0162}"; +"Tcy", "\u{0422}"; +"Tfr", "\u{1D517}"; +"Therefore", "\u{2234}"; +"Theta", "\u{0398}"; +"ThickSpace", "\u{205F}\u{200A}"; +"ThinSpace", "\u{2009}"; +"Tilde", "\u{223C}"; +"TildeEqual", "\u{2243}"; +"TildeFullEqual", "\u{2245}"; +"TildeTilde", "\u{2248}"; +"Topf", "\u{1D54B}"; +"TripleDot", "\u{20DB}"; +"Tscr", "\u{1D4AF}"; +"Tstrok", "\u{0166}"; +"Uacute", "\u{00DA}"; +"Uarr", "\u{219F}"; +"Uarrocir", "\u{2949}"; +"Ubrcy", "\u{040E}"; +"Ubreve", "\u{016C}"; +"Ucirc", "\u{00DB}"; +"Ucy", "\u{0423}"; +"Udblac", "\u{0170}"; +"Ufr", "\u{1D518}"; +"Ugrave", "\u{00D9}"; +"Umacr", "\u{016A}"; +"UnderBar", "\u{005F}"; +"UnderBrace", "\u{23DF}"; +"UnderBracket", "\u{23B5}"; +"UnderParenthesis", "\u{23DD}"; +"Union", "\u{22C3}"; +"UnionPlus", "\u{228E}"; +"Uogon", "\u{0172}"; +"Uopf", "\u{1D54C}"; +"UpArrow", "\u{2191}"; +"UpArrowBar", "\u{2912}"; +"UpArrowDownArrow", "\u{21C5}"; +"UpDownArrow", "\u{2195}"; +"UpEquilibrium", "\u{296E}"; +"UpTee", "\u{22A5}"; +"UpTeeArrow", "\u{21A5}"; +"Uparrow", "\u{21D1}"; +"Updownarrow", "\u{21D5}"; +"UpperLeftArrow", "\u{2196}"; +"UpperRightArrow", "\u{2197}"; +"Upsi", "\u{03D2}"; +"Upsilon", "\u{03A5}"; +"Uring", "\u{016E}"; +"Uscr", "\u{1D4B0}"; +"Utilde", "\u{0168}"; +"Uuml", "\u{00DC}"; +"VDash", "\u{22AB}"; +"Vbar", "\u{2AEB}"; +"Vcy", "\u{0412}"; +"Vdash", "\u{22A9}"; +"Vdashl", "\u{2AE6}"; +"Vee", "\u{22C1}"; +"Verbar", "\u{2016}"; +"Vert", "\u{2016}"; +"VerticalBar", "\u{2223}"; +"VerticalLine", "\u{007C}"; +"VerticalSeparator", "\u{2758}"; +"VerticalTilde", "\u{2240}"; +"VeryThinSpace", "\u{200A}"; +"Vfr", "\u{1D519}"; +"Vopf", "\u{1D54D}"; +"Vscr", "\u{1D4B1}"; +"Vvdash", "\u{22AA}"; +"Wcirc", "\u{0174}"; +"Wedge", "\u{22C0}"; +"Wfr", "\u{1D51A}"; +"Wopf", "\u{1D54E}"; +"Wscr", "\u{1D4B2}"; +"Xfr", "\u{1D51B}"; +"Xi", "\u{039E}"; +"Xopf", "\u{1D54F}"; +"Xscr", "\u{1D4B3}"; +"YAcy", "\u{042F}"; +"YIcy", "\u{0407}"; +"YUcy", "\u{042E}"; +"Yacute", "\u{00DD}"; +"Ycirc", "\u{0176}"; +"Ycy", "\u{042B}"; +"Yfr", "\u{1D51C}"; +"Yopf", "\u{1D550}"; +"Yscr", "\u{1D4B4}"; +"Yuml", "\u{0178}"; +"ZHcy", "\u{0416}"; +"Zacute", "\u{0179}"; +"Zcaron", "\u{017D}"; +"Zcy", "\u{0417}"; +"Zdot", "\u{017B}"; +"ZeroWidthSpace", "\u{200B}"; +"Zeta", "\u{0396}"; +"Zfr", "\u{2128}"; +"Zopf", "\u{2124}"; +"Zscr", "\u{1D4B5}"; +"aacute", "\u{00E1}"; +"abreve", "\u{0103}"; +"ac", "\u{223E}"; +"acE", "\u{223E}\u{0333}"; +"acd", "\u{223F}"; +"acirc", "\u{00E2}"; +"acute", "\u{00B4}"; +"acy", "\u{0430}"; +"aelig", "\u{00E6}"; +"af", "\u{2061}"; +"afr", "\u{1D51E}"; +"agrave", "\u{00E0}"; +"alefsym", "\u{2135}"; +"aleph", "\u{2135}"; +"alpha", "\u{03B1}"; +"amacr", "\u{0101}"; +"amalg", "\u{2A3F}"; +"amp", "\u{0026}"; +"and", "\u{2227}"; +"andand", "\u{2A55}"; +"andd", "\u{2A5C}"; +"andslope", "\u{2A58}"; +"andv", "\u{2A5A}"; +"ang", "\u{2220}"; +"ange", "\u{29A4}"; +"angle", "\u{2220}"; +"angmsd", "\u{2221}"; +"angmsdaa", "\u{29A8}"; +"angmsdab", "\u{29A9}"; +"angmsdac", "\u{29AA}"; +"angmsdad", "\u{29AB}"; +"angmsdae", "\u{29AC}"; +"angmsdaf", "\u{29AD}"; +"angmsdag", "\u{29AE}"; +"angmsdah", "\u{29AF}"; +"angrt", "\u{221F}"; +"angrtvb", "\u{22BE}"; +"angrtvbd", "\u{299D}"; +"angsph", "\u{2222}"; +"angst", "\u{00C5}"; +"angzarr", "\u{237C}"; +"aogon", "\u{0105}"; +"aopf", "\u{1D552}"; +"ap", "\u{2248}"; +"apE", "\u{2A70}"; +"apacir", "\u{2A6F}"; +"ape", "\u{224A}"; +"apid", "\u{224B}"; +"apos", "\u{0027}"; +"approx", "\u{2248}"; +"approxeq", "\u{224A}"; +"aring", "\u{00E5}"; +"ascr", "\u{1D4B6}"; +"ast", "\u{002A}"; +"asymp", "\u{2248}"; +"asympeq", "\u{224D}"; +"atilde", "\u{00E3}"; +"auml", "\u{00E4}"; +"awconint", "\u{2233}"; +"awint", "\u{2A11}"; +"bNot", "\u{2AED}"; +"backcong", "\u{224C}"; +"backepsilon", "\u{03F6}"; +"backprime", "\u{2035}"; +"backsim", "\u{223D}"; +"backsimeq", "\u{22CD}"; +"barvee", "\u{22BD}"; +"barwed", "\u{2305}"; +"barwedge", "\u{2305}"; +"bbrk", "\u{23B5}"; +"bbrktbrk", "\u{23B6}"; +"bcong", "\u{224C}"; +"bcy", "\u{0431}"; +"bdquo", "\u{201E}"; +"becaus", "\u{2235}"; +"because", "\u{2235}"; +"bemptyv", "\u{29B0}"; +"bepsi", "\u{03F6}"; +"bernou", "\u{212C}"; +"beta", "\u{03B2}"; +"beth", "\u{2136}"; +"between", "\u{226C}"; +"bfr", "\u{1D51F}"; +"bigcap", "\u{22C2}"; +"bigcirc", "\u{25EF}"; +"bigcup", "\u{22C3}"; +"bigodot", "\u{2A00}"; +"bigoplus", "\u{2A01}"; +"bigotimes", "\u{2A02}"; +"bigsqcup", "\u{2A06}"; +"bigstar", "\u{2605}"; +"bigtriangledown", "\u{25BD}"; +"bigtriangleup", "\u{25B3}"; +"biguplus", "\u{2A04}"; +"bigvee", "\u{22C1}"; +"bigwedge", "\u{22C0}"; +"bkarow", "\u{290D}"; +"blacklozenge", "\u{29EB}"; +"blacksquare", "\u{25AA}"; +"blacktriangle", "\u{25B4}"; +"blacktriangledown", "\u{25BE}"; +"blacktriangleleft", "\u{25C2}"; +"blacktriangleright", "\u{25B8}"; +"blank", "\u{2423}"; +"blk12", "\u{2592}"; +"blk14", "\u{2591}"; +"blk34", "\u{2593}"; +"block", "\u{2588}"; +"bne", "\u{003D}\u{20E5}"; +"bnequiv", "\u{2261}\u{20E5}"; +"bnot", "\u{2310}"; +"bopf", "\u{1D553}"; +"bot", "\u{22A5}"; +"bottom", "\u{22A5}"; +"bowtie", "\u{22C8}"; +"boxDL", "\u{2557}"; +"boxDR", "\u{2554}"; +"boxDl", "\u{2556}"; +"boxDr", "\u{2553}"; +"boxH", "\u{2550}"; +"boxHD", "\u{2566}"; +"boxHU", "\u{2569}"; +"boxHd", "\u{2564}"; +"boxHu", "\u{2567}"; +"boxUL", "\u{255D}"; +"boxUR", "\u{255A}"; +"boxUl", "\u{255C}"; +"boxUr", "\u{2559}"; +"boxV", "\u{2551}"; +"boxVH", "\u{256C}"; +"boxVL", "\u{2563}"; +"boxVR", "\u{2560}"; +"boxVh", "\u{256B}"; +"boxVl", "\u{2562}"; +"boxVr", "\u{255F}"; +"boxbox", "\u{29C9}"; +"boxdL", "\u{2555}"; +"boxdR", "\u{2552}"; +"boxdl", "\u{2510}"; +"boxdr", "\u{250C}"; +"boxh", "\u{2500}"; +"boxhD", "\u{2565}"; +"boxhU", "\u{2568}"; +"boxhd", "\u{252C}"; +"boxhu", "\u{2534}"; +"boxminus", "\u{229F}"; +"boxplus", "\u{229E}"; +"boxtimes", "\u{22A0}"; +"boxuL", "\u{255B}"; +"boxuR", "\u{2558}"; +"boxul", "\u{2518}"; +"boxur", "\u{2514}"; +"boxv", "\u{2502}"; +"boxvH", "\u{256A}"; +"boxvL", "\u{2561}"; +"boxvR", "\u{255E}"; +"boxvh", "\u{253C}"; +"boxvl", "\u{2524}"; +"boxvr", "\u{251C}"; +"bprime", "\u{2035}"; +"breve", "\u{02D8}"; +"brvbar", "\u{00A6}"; +"bscr", "\u{1D4B7}"; +"bsemi", "\u{204F}"; +"bsim", "\u{223D}"; +"bsime", "\u{22CD}"; +"bsol", "\u{005C}"; +"bsolb", "\u{29C5}"; +"bsolhsub", "\u{27C8}"; +"bull", "\u{2022}"; +"bullet", "\u{2022}"; +"bump", "\u{224E}"; +"bumpE", "\u{2AAE}"; +"bumpe", "\u{224F}"; +"bumpeq", "\u{224F}"; +"cacute", "\u{0107}"; +"cap", "\u{2229}"; +"capand", "\u{2A44}"; +"capbrcup", "\u{2A49}"; +"capcap", "\u{2A4B}"; +"capcup", "\u{2A47}"; +"capdot", "\u{2A40}"; +"caps", "\u{2229}\u{FE00}"; +"caret", "\u{2041}"; +"caron", "\u{02C7}"; +"ccaps", "\u{2A4D}"; +"ccaron", "\u{010D}"; +"ccedil", "\u{00E7}"; +"ccirc", "\u{0109}"; +"ccups", "\u{2A4C}"; +"ccupssm", "\u{2A50}"; +"cdot", "\u{010B}"; +"cedil", "\u{00B8}"; +"cemptyv", "\u{29B2}"; +"cent", "\u{00A2}"; +"centerdot", "\u{00B7}"; +"cfr", "\u{1D520}"; +"chcy", "\u{0447}"; +"check", "\u{2713}"; +"checkmark", "\u{2713}"; +"chi", "\u{03C7}"; +"cir", "\u{25CB}"; +"cirE", "\u{29C3}"; +"circ", "\u{02C6}"; +"circeq", "\u{2257}"; +"circlearrowleft", "\u{21BA}"; +"circlearrowright", "\u{21BB}"; +"circledR", "\u{00AE}"; +"circledS", "\u{24C8}"; +"circledast", "\u{229B}"; +"circledcirc", "\u{229A}"; +"circleddash", "\u{229D}"; +"cire", "\u{2257}"; +"cirfnint", "\u{2A10}"; +"cirmid", "\u{2AEF}"; +"cirscir", "\u{29C2}"; +"clubs", "\u{2663}"; +"clubsuit", "\u{2663}"; +"colon", "\u{003A}"; +"colone", "\u{2254}"; +"coloneq", "\u{2254}"; +"comma", "\u{002C}"; +"commat", "\u{0040}"; +"comp", "\u{2201}"; +"compfn", "\u{2218}"; +"complement", "\u{2201}"; +"complexes", "\u{2102}"; +"cong", "\u{2245}"; +"congdot", "\u{2A6D}"; +"conint", "\u{222E}"; +"copf", "\u{1D554}"; +"coprod", "\u{2210}"; +"copy", "\u{00A9}"; +"copysr", "\u{2117}"; +"crarr", "\u{21B5}"; +"cross", "\u{2717}"; +"cscr", "\u{1D4B8}"; +"csub", "\u{2ACF}"; +"csube", "\u{2AD1}"; +"csup", "\u{2AD0}"; +"csupe", "\u{2AD2}"; +"ctdot", "\u{22EF}"; +"cudarrl", "\u{2938}"; +"cudarrr", "\u{2935}"; +"cuepr", "\u{22DE}"; +"cuesc", "\u{22DF}"; +"cularr", "\u{21B6}"; +"cularrp", "\u{293D}"; +"cup", "\u{222A}"; +"cupbrcap", "\u{2A48}"; +"cupcap", "\u{2A46}"; +"cupcup", "\u{2A4A}"; +"cupdot", "\u{228D}"; +"cupor", "\u{2A45}"; +"cups", "\u{222A}\u{FE00}"; +"curarr", "\u{21B7}"; +"curarrm", "\u{293C}"; +"curlyeqprec", "\u{22DE}"; +"curlyeqsucc", "\u{22DF}"; +"curlyvee", "\u{22CE}"; +"curlywedge", "\u{22CF}"; +"curren", "\u{00A4}"; +"curvearrowleft", "\u{21B6}"; +"curvearrowright", "\u{21B7}"; +"cuvee", "\u{22CE}"; +"cuwed", "\u{22CF}"; +"cwconint", "\u{2232}"; +"cwint", "\u{2231}"; +"cylcty", "\u{232D}"; +"dArr", "\u{21D3}"; +"dHar", "\u{2965}"; +"dagger", "\u{2020}"; +"daleth", "\u{2138}"; +"darr", "\u{2193}"; +"dash", "\u{2010}"; +"dashv", "\u{22A3}"; +"dbkarow", "\u{290F}"; +"dblac", "\u{02DD}"; +"dcaron", "\u{010F}"; +"dcy", "\u{0434}"; +"dd", "\u{2146}"; +"ddagger", "\u{2021}"; +"ddarr", "\u{21CA}"; +"ddotseq", "\u{2A77}"; +"deg", "\u{00B0}"; +"delta", "\u{03B4}"; +"demptyv", "\u{29B1}"; +"dfisht", "\u{297F}"; +"dfr", "\u{1D521}"; +"dharl", "\u{21C3}"; +"dharr", "\u{21C2}"; +"diam", "\u{22C4}"; +"diamond", "\u{22C4}"; +"diamondsuit", "\u{2666}"; +"diams", "\u{2666}"; +"die", "\u{00A8}"; +"digamma", "\u{03DD}"; +"disin", "\u{22F2}"; +"div", "\u{00F7}"; +"divide", "\u{00F7}"; +"divideontimes", "\u{22C7}"; +"divonx", "\u{22C7}"; +"djcy", "\u{0452}"; +"dlcorn", "\u{231E}"; +"dlcrop", "\u{230D}"; +"dollar", "\u{0024}"; +"dopf", "\u{1D555}"; +"dot", "\u{02D9}"; +"doteq", "\u{2250}"; +"doteqdot", "\u{2251}"; +"dotminus", "\u{2238}"; +"dotplus", "\u{2214}"; +"dotsquare", "\u{22A1}"; +"doublebarwedge", "\u{2306}"; +"downarrow", "\u{2193}"; +"downdownarrows", "\u{21CA}"; +"downharpoonleft", "\u{21C3}"; +"downharpoonright", "\u{21C2}"; +"drbkarow", "\u{2910}"; +"drcorn", "\u{231F}"; +"drcrop", "\u{230C}"; +"dscr", "\u{1D4B9}"; +"dscy", "\u{0455}"; +"dsol", "\u{29F6}"; +"dstrok", "\u{0111}"; +"dtdot", "\u{22F1}"; +"dtri", "\u{25BF}"; +"dtrif", "\u{25BE}"; +"duarr", "\u{21F5}"; +"duhar", "\u{296F}"; +"dwangle", "\u{29A6}"; +"dzcy", "\u{045F}"; +"dzigrarr", "\u{27FF}"; +"eDDot", "\u{2A77}"; +"eDot", "\u{2251}"; +"eacute", "\u{00E9}"; +"easter", "\u{2A6E}"; +"ecaron", "\u{011B}"; +"ecir", "\u{2256}"; +"ecirc", "\u{00EA}"; +"ecolon", "\u{2255}"; +"ecy", "\u{044D}"; +"edot", "\u{0117}"; +"ee", "\u{2147}"; +"efDot", "\u{2252}"; +"efr", "\u{1D522}"; +"eg", "\u{2A9A}"; +"egrave", "\u{00E8}"; +"egs", "\u{2A96}"; +"egsdot", "\u{2A98}"; +"el", "\u{2A99}"; +"elinters", "\u{23E7}"; +"ell", "\u{2113}"; +"els", "\u{2A95}"; +"elsdot", "\u{2A97}"; +"emacr", "\u{0113}"; +"empty", "\u{2205}"; +"emptyset", "\u{2205}"; +"emptyv", "\u{2205}"; +"emsp13", "\u{2004}"; +"emsp14", "\u{2005}"; +"emsp", "\u{2003}"; +"eng", "\u{014B}"; +"ensp", "\u{2002}"; +"eogon", "\u{0119}"; +"eopf", "\u{1D556}"; +"epar", "\u{22D5}"; +"eparsl", "\u{29E3}"; +"eplus", "\u{2A71}"; +"epsi", "\u{03B5}"; +"epsilon", "\u{03B5}"; +"epsiv", "\u{03F5}"; +"eqcirc", "\u{2256}"; +"eqcolon", "\u{2255}"; +"eqsim", "\u{2242}"; +"eqslantgtr", "\u{2A96}"; +"eqslantless", "\u{2A95}"; +"equals", "\u{003D}"; +"equest", "\u{225F}"; +"equiv", "\u{2261}"; +"equivDD", "\u{2A78}"; +"eqvparsl", "\u{29E5}"; +"erDot", "\u{2253}"; +"erarr", "\u{2971}"; +"escr", "\u{212F}"; +"esdot", "\u{2250}"; +"esim", "\u{2242}"; +"eta", "\u{03B7}"; +"eth", "\u{00F0}"; +"euml", "\u{00EB}"; +"euro", "\u{20AC}"; +"excl", "\u{0021}"; +"exist", "\u{2203}"; +"expectation", "\u{2130}"; +"exponentiale", "\u{2147}"; +"fallingdotseq", "\u{2252}"; +"fcy", "\u{0444}"; +"female", "\u{2640}"; +"ffilig", "\u{FB03}"; +"fflig", "\u{FB00}"; +"ffllig", "\u{FB04}"; +"ffr", "\u{1D523}"; +"filig", "\u{FB01}"; +"fjlig", "\u{0066}\u{006A}"; +"flat", "\u{266D}"; +"fllig", "\u{FB02}"; +"fltns", "\u{25B1}"; +"fnof", "\u{0192}"; +"fopf", "\u{1D557}"; +"forall", "\u{2200}"; +"fork", "\u{22D4}"; +"forkv", "\u{2AD9}"; +"fpartint", "\u{2A0D}"; +"frac12", "\u{00BD}"; +"frac13", "\u{2153}"; +"frac14", "\u{00BC}"; +"frac15", "\u{2155}"; +"frac16", "\u{2159}"; +"frac18", "\u{215B}"; +"frac23", "\u{2154}"; +"frac25", "\u{2156}"; +"frac34", "\u{00BE}"; +"frac35", "\u{2157}"; +"frac38", "\u{215C}"; +"frac45", "\u{2158}"; +"frac56", "\u{215A}"; +"frac58", "\u{215D}"; +"frac78", "\u{215E}"; +"frasl", "\u{2044}"; +"frown", "\u{2322}"; +"fscr", "\u{1D4BB}"; +"gE", "\u{2267}"; +"gEl", "\u{2A8C}"; +"gacute", "\u{01F5}"; +"gamma", "\u{03B3}"; +"gammad", "\u{03DD}"; +"gap", "\u{2A86}"; +"gbreve", "\u{011F}"; +"gcirc", "\u{011D}"; +"gcy", "\u{0433}"; +"gdot", "\u{0121}"; +"ge", "\u{2265}"; +"gel", "\u{22DB}"; +"geq", "\u{2265}"; +"geqq", "\u{2267}"; +"geqslant", "\u{2A7E}"; +"ges", "\u{2A7E}"; +"gescc", "\u{2AA9}"; +"gesdot", "\u{2A80}"; +"gesdoto", "\u{2A82}"; +"gesdotol", "\u{2A84}"; +"gesl", "\u{22DB}\u{FE00}"; +"gesles", "\u{2A94}"; +"gfr", "\u{1D524}"; +"gg", "\u{226B}"; +"ggg", "\u{22D9}"; +"gimel", "\u{2137}"; +"gjcy", "\u{0453}"; +"gl", "\u{2277}"; +"glE", "\u{2A92}"; +"gla", "\u{2AA5}"; +"glj", "\u{2AA4}"; +"gnE", "\u{2269}"; +"gnap", "\u{2A8A}"; +"gnapprox", "\u{2A8A}"; +"gne", "\u{2A88}"; +"gneq", "\u{2A88}"; +"gneqq", "\u{2269}"; +"gnsim", "\u{22E7}"; +"gopf", "\u{1D558}"; +"grave", "\u{0060}"; +"gscr", "\u{210A}"; +"gsim", "\u{2273}"; +"gsime", "\u{2A8E}"; +"gsiml", "\u{2A90}"; +"gt", "\u{003E}"; +"gtcc", "\u{2AA7}"; +"gtcir", "\u{2A7A}"; +"gtdot", "\u{22D7}"; +"gtlPar", "\u{2995}"; +"gtquest", "\u{2A7C}"; +"gtrapprox", "\u{2A86}"; +"gtrarr", "\u{2978}"; +"gtrdot", "\u{22D7}"; +"gtreqless", "\u{22DB}"; +"gtreqqless", "\u{2A8C}"; +"gtrless", "\u{2277}"; +"gtrsim", "\u{2273}"; +"gvertneqq", "\u{2269}\u{FE00}"; +"gvnE", "\u{2269}\u{FE00}"; +"hArr", "\u{21D4}"; +"hairsp", "\u{200A}"; +"half", "\u{00BD}"; +"hamilt", "\u{210B}"; +"hardcy", "\u{044A}"; +"harr", "\u{2194}"; +"harrcir", "\u{2948}"; +"harrw", "\u{21AD}"; +"hbar", "\u{210F}"; +"hcirc", "\u{0125}"; +"hearts", "\u{2665}"; +"heartsuit", "\u{2665}"; +"hellip", "\u{2026}"; +"hercon", "\u{22B9}"; +"hfr", "\u{1D525}"; +"hksearow", "\u{2925}"; +"hkswarow", "\u{2926}"; +"hoarr", "\u{21FF}"; +"homtht", "\u{223B}"; +"hookleftarrow", "\u{21A9}"; +"hookrightarrow", "\u{21AA}"; +"hopf", "\u{1D559}"; +"horbar", "\u{2015}"; +"hscr", "\u{1D4BD}"; +"hslash", "\u{210F}"; +"hstrok", "\u{0127}"; +"hybull", "\u{2043}"; +"hyphen", "\u{2010}"; +"iacute", "\u{00ED}"; +"ic", "\u{2063}"; +"icirc", "\u{00EE}"; +"icy", "\u{0438}"; +"iecy", "\u{0435}"; +"iexcl", "\u{00A1}"; +"iff", "\u{21D4}"; +"ifr", "\u{1D526}"; +"igrave", "\u{00EC}"; +"ii", "\u{2148}"; +"iiiint", "\u{2A0C}"; +"iiint", "\u{222D}"; +"iinfin", "\u{29DC}"; +"iiota", "\u{2129}"; +"ijlig", "\u{0133}"; +"imacr", "\u{012B}"; +"image", "\u{2111}"; +"imagline", "\u{2110}"; +"imagpart", "\u{2111}"; +"imath", "\u{0131}"; +"imof", "\u{22B7}"; +"imped", "\u{01B5}"; +"in", "\u{2208}"; +"incare", "\u{2105}"; +"infin", "\u{221E}"; +"infintie", "\u{29DD}"; +"inodot", "\u{0131}"; +"int", "\u{222B}"; +"intcal", "\u{22BA}"; +"integers", "\u{2124}"; +"intercal", "\u{22BA}"; +"intlarhk", "\u{2A17}"; +"intprod", "\u{2A3C}"; +"iocy", "\u{0451}"; +"iogon", "\u{012F}"; +"iopf", "\u{1D55A}"; +"iota", "\u{03B9}"; +"iprod", "\u{2A3C}"; +"iquest", "\u{00BF}"; +"iscr", "\u{1D4BE}"; +"isin", "\u{2208}"; +"isinE", "\u{22F9}"; +"isindot", "\u{22F5}"; +"isins", "\u{22F4}"; +"isinsv", "\u{22F3}"; +"isinv", "\u{2208}"; +"it", "\u{2062}"; +"itilde", "\u{0129}"; +"iukcy", "\u{0456}"; +"iuml", "\u{00EF}"; +"jcirc", "\u{0135}"; +"jcy", "\u{0439}"; +"jfr", "\u{1D527}"; +"jmath", "\u{0237}"; +"jopf", "\u{1D55B}"; +"jscr", "\u{1D4BF}"; +"jsercy", "\u{0458}"; +"jukcy", "\u{0454}"; +"kappa", "\u{03BA}"; +"kappav", "\u{03F0}"; +"kcedil", "\u{0137}"; +"kcy", "\u{043A}"; +"kfr", "\u{1D528}"; +"kgreen", "\u{0138}"; +"khcy", "\u{0445}"; +"kjcy", "\u{045C}"; +"kopf", "\u{1D55C}"; +"kscr", "\u{1D4C0}"; +"lAarr", "\u{21DA}"; +"lArr", "\u{21D0}"; +"lAtail", "\u{291B}"; +"lBarr", "\u{290E}"; +"lE", "\u{2266}"; +"lEg", "\u{2A8B}"; +"lHar", "\u{2962}"; +"lacute", "\u{013A}"; +"laemptyv", "\u{29B4}"; +"lagran", "\u{2112}"; +"lambda", "\u{03BB}"; +"lang", "\u{27E8}"; +"langd", "\u{2991}"; +"langle", "\u{27E8}"; +"lap", "\u{2A85}"; +"laquo", "\u{00AB}"; +"larr", "\u{2190}"; +"larrb", "\u{21E4}"; +"larrbfs", "\u{291F}"; +"larrfs", "\u{291D}"; +"larrhk", "\u{21A9}"; +"larrlp", "\u{21AB}"; +"larrpl", "\u{2939}"; +"larrsim", "\u{2973}"; +"larrtl", "\u{21A2}"; +"lat", "\u{2AAB}"; +"latail", "\u{2919}"; +"late", "\u{2AAD}"; +"lates", "\u{2AAD}\u{FE00}"; +"lbarr", "\u{290C}"; +"lbbrk", "\u{2772}"; +"lbrace", "\u{007B}"; +"lbrack", "\u{005B}"; +"lbrke", "\u{298B}"; +"lbrksld", "\u{298F}"; +"lbrkslu", "\u{298D}"; +"lcaron", "\u{013E}"; +"lcedil", "\u{013C}"; +"lceil", "\u{2308}"; +"lcub", "\u{007B}"; +"lcy", "\u{043B}"; +"ldca", "\u{2936}"; +"ldquo", "\u{201C}"; +"ldquor", "\u{201E}"; +"ldrdhar", "\u{2967}"; +"ldrushar", "\u{294B}"; +"ldsh", "\u{21B2}"; +"le", "\u{2264}"; +"leftarrow", "\u{2190}"; +"leftarrowtail", "\u{21A2}"; +"leftharpoondown", "\u{21BD}"; +"leftharpoonup", "\u{21BC}"; +"leftleftarrows", "\u{21C7}"; +"leftrightarrow", "\u{2194}"; +"leftrightarrows", "\u{21C6}"; +"leftrightharpoons", "\u{21CB}"; +"leftrightsquigarrow", "\u{21AD}"; +"leftthreetimes", "\u{22CB}"; +"leg", "\u{22DA}"; +"leq", "\u{2264}"; +"leqq", "\u{2266}"; +"leqslant", "\u{2A7D}"; +"les", "\u{2A7D}"; +"lescc", "\u{2AA8}"; +"lesdot", "\u{2A7F}"; +"lesdoto", "\u{2A81}"; +"lesdotor", "\u{2A83}"; +"lesg", "\u{22DA}\u{FE00}"; +"lesges", "\u{2A93}"; +"lessapprox", "\u{2A85}"; +"lessdot", "\u{22D6}"; +"lesseqgtr", "\u{22DA}"; +"lesseqqgtr", "\u{2A8B}"; +"lessgtr", "\u{2276}"; +"lesssim", "\u{2272}"; +"lfisht", "\u{297C}"; +"lfloor", "\u{230A}"; +"lfr", "\u{1D529}"; +"lg", "\u{2276}"; +"lgE", "\u{2A91}"; +"lhard", "\u{21BD}"; +"lharu", "\u{21BC}"; +"lharul", "\u{296A}"; +"lhblk", "\u{2584}"; +"ljcy", "\u{0459}"; +"ll", "\u{226A}"; +"llarr", "\u{21C7}"; +"llcorner", "\u{231E}"; +"llhard", "\u{296B}"; +"lltri", "\u{25FA}"; +"lmidot", "\u{0140}"; +"lmoust", "\u{23B0}"; +"lmoustache", "\u{23B0}"; +"lnE", "\u{2268}"; +"lnap", "\u{2A89}"; +"lnapprox", "\u{2A89}"; +"lne", "\u{2A87}"; +"lneq", "\u{2A87}"; +"lneqq", "\u{2268}"; +"lnsim", "\u{22E6}"; +"loang", "\u{27EC}"; +"loarr", "\u{21FD}"; +"lobrk", "\u{27E6}"; +"longleftarrow", "\u{27F5}"; +"longleftrightarrow", "\u{27F7}"; +"longmapsto", "\u{27FC}"; +"longrightarrow", "\u{27F6}"; +"looparrowleft", "\u{21AB}"; +"looparrowright", "\u{21AC}"; +"lopar", "\u{2985}"; +"lopf", "\u{1D55D}"; +"loplus", "\u{2A2D}"; +"lotimes", "\u{2A34}"; +"lowast", "\u{2217}"; +"lowbar", "\u{005F}"; +"loz", "\u{25CA}"; +"lozenge", "\u{25CA}"; +"lozf", "\u{29EB}"; +"lpar", "\u{0028}"; +"lparlt", "\u{2993}"; +"lrarr", "\u{21C6}"; +"lrcorner", "\u{231F}"; +"lrhar", "\u{21CB}"; +"lrhard", "\u{296D}"; +"lrm", "\u{200E}"; +"lrtri", "\u{22BF}"; +"lsaquo", "\u{2039}"; +"lscr", "\u{1D4C1}"; +"lsh", "\u{21B0}"; +"lsim", "\u{2272}"; +"lsime", "\u{2A8D}"; +"lsimg", "\u{2A8F}"; +"lsqb", "\u{005B}"; +"lsquo", "\u{2018}"; +"lsquor", "\u{201A}"; +"lstrok", "\u{0142}"; +"lt", "\u{003C}"; +"ltcc", "\u{2AA6}"; +"ltcir", "\u{2A79}"; +"ltdot", "\u{22D6}"; +"lthree", "\u{22CB}"; +"ltimes", "\u{22C9}"; +"ltlarr", "\u{2976}"; +"ltquest", "\u{2A7B}"; +"ltrPar", "\u{2996}"; +"ltri", "\u{25C3}"; +"ltrie", "\u{22B4}"; +"ltrif", "\u{25C2}"; +"lurdshar", "\u{294A}"; +"luruhar", "\u{2966}"; +"lvertneqq", "\u{2268}\u{FE00}"; +"lvnE", "\u{2268}\u{FE00}"; +"mDDot", "\u{223A}"; +"macr", "\u{00AF}"; +"male", "\u{2642}"; +"malt", "\u{2720}"; +"maltese", "\u{2720}"; +"map", "\u{21A6}"; +"mapsto", "\u{21A6}"; +"mapstodown", "\u{21A7}"; +"mapstoleft", "\u{21A4}"; +"mapstoup", "\u{21A5}"; +"marker", "\u{25AE}"; +"mcomma", "\u{2A29}"; +"mcy", "\u{043C}"; +"mdash", "\u{2014}"; +"measuredangle", "\u{2221}"; +"mfr", "\u{1D52A}"; +"mho", "\u{2127}"; +"micro", "\u{00B5}"; +"mid", "\u{2223}"; +"midast", "\u{002A}"; +"midcir", "\u{2AF0}"; +"middot", "\u{00B7}"; +"minus", "\u{2212}"; +"minusb", "\u{229F}"; +"minusd", "\u{2238}"; +"minusdu", "\u{2A2A}"; +"mlcp", "\u{2ADB}"; +"mldr", "\u{2026}"; +"mnplus", "\u{2213}"; +"models", "\u{22A7}"; +"mopf", "\u{1D55E}"; +"mp", "\u{2213}"; +"mscr", "\u{1D4C2}"; +"mstpos", "\u{223E}"; +"mu", "\u{03BC}"; +"multimap", "\u{22B8}"; +"mumap", "\u{22B8}"; +"nGg", "\u{22D9}\u{0338}"; +"nGt", "\u{226B}\u{20D2}"; +"nGtv", "\u{226B}\u{0338}"; +"nLeftarrow", "\u{21CD}"; +"nLeftrightarrow", "\u{21CE}"; +"nLl", "\u{22D8}\u{0338}"; +"nLt", "\u{226A}\u{20D2}"; +"nLtv", "\u{226A}\u{0338}"; +"nRightarrow", "\u{21CF}"; +"nVDash", "\u{22AF}"; +"nVdash", "\u{22AE}"; +"nabla", "\u{2207}"; +"nacute", "\u{0144}"; +"nang", "\u{2220}\u{20D2}"; +"nap", "\u{2249}"; +"napE", "\u{2A70}\u{0338}"; +"napid", "\u{224B}\u{0338}"; +"napos", "\u{0149}"; +"napprox", "\u{2249}"; +"natur", "\u{266E}"; +"natural", "\u{266E}"; +"naturals", "\u{2115}"; +"nbsp", "\u{00A0}"; +"nbump", "\u{224E}\u{0338}"; +"nbumpe", "\u{224F}\u{0338}"; +"ncap", "\u{2A43}"; +"ncaron", "\u{0148}"; +"ncedil", "\u{0146}"; +"ncong", "\u{2247}"; +"ncongdot", "\u{2A6D}\u{0338}"; +"ncup", "\u{2A42}"; +"ncy", "\u{043D}"; +"ndash", "\u{2013}"; +"ne", "\u{2260}"; +"neArr", "\u{21D7}"; +"nearhk", "\u{2924}"; +"nearr", "\u{2197}"; +"nearrow", "\u{2197}"; +"nedot", "\u{2250}\u{0338}"; +"nequiv", "\u{2262}"; +"nesear", "\u{2928}"; +"nesim", "\u{2242}\u{0338}"; +"nexist", "\u{2204}"; +"nexists", "\u{2204}"; +"nfr", "\u{1D52B}"; +"ngE", "\u{2267}\u{0338}"; +"nge", "\u{2271}"; +"ngeq", "\u{2271}"; +"ngeqq", "\u{2267}\u{0338}"; +"ngeqslant", "\u{2A7E}\u{0338}"; +"nges", "\u{2A7E}\u{0338}"; +"ngsim", "\u{2275}"; +"ngt", "\u{226F}"; +"ngtr", "\u{226F}"; +"nhArr", "\u{21CE}"; +"nharr", "\u{21AE}"; +"nhpar", "\u{2AF2}"; +"ni", "\u{220B}"; +"nis", "\u{22FC}"; +"nisd", "\u{22FA}"; +"niv", "\u{220B}"; +"njcy", "\u{045A}"; +"nlArr", "\u{21CD}"; +"nlE", "\u{2266}\u{0338}"; +"nlarr", "\u{219A}"; +"nldr", "\u{2025}"; +"nle", "\u{2270}"; +"nleftarrow", "\u{219A}"; +"nleftrightarrow", "\u{21AE}"; +"nleq", "\u{2270}"; +"nleqq", "\u{2266}\u{0338}"; +"nleqslant", "\u{2A7D}\u{0338}"; +"nles", "\u{2A7D}\u{0338}"; +"nless", "\u{226E}"; +"nlsim", "\u{2274}"; +"nlt", "\u{226E}"; +"nltri", "\u{22EA}"; +"nltrie", "\u{22EC}"; +"nmid", "\u{2224}"; +"nopf", "\u{1D55F}"; +"not", "\u{00AC}"; +"notin", "\u{2209}"; +"notinE", "\u{22F9}\u{0338}"; +"notindot", "\u{22F5}\u{0338}"; +"notinva", "\u{2209}"; +"notinvb", "\u{22F7}"; +"notinvc", "\u{22F6}"; +"notni", "\u{220C}"; +"notniva", "\u{220C}"; +"notnivb", "\u{22FE}"; +"notnivc", "\u{22FD}"; +"npar", "\u{2226}"; +"nparallel", "\u{2226}"; +"nparsl", "\u{2AFD}\u{20E5}"; +"npart", "\u{2202}\u{0338}"; +"npolint", "\u{2A14}"; +"npr", "\u{2280}"; +"nprcue", "\u{22E0}"; +"npre", "\u{2AAF}\u{0338}"; +"nprec", "\u{2280}"; +"npreceq", "\u{2AAF}\u{0338}"; +"nrArr", "\u{21CF}"; +"nrarr", "\u{219B}"; +"nrarrc", "\u{2933}\u{0338}"; +"nrarrw", "\u{219D}\u{0338}"; +"nrightarrow", "\u{219B}"; +"nrtri", "\u{22EB}"; +"nrtrie", "\u{22ED}"; +"nsc", "\u{2281}"; +"nsccue", "\u{22E1}"; +"nsce", "\u{2AB0}\u{0338}"; +"nscr", "\u{1D4C3}"; +"nshortmid", "\u{2224}"; +"nshortparallel", "\u{2226}"; +"nsim", "\u{2241}"; +"nsime", "\u{2244}"; +"nsimeq", "\u{2244}"; +"nsmid", "\u{2224}"; +"nspar", "\u{2226}"; +"nsqsube", "\u{22E2}"; +"nsqsupe", "\u{22E3}"; +"nsub", "\u{2284}"; +"nsubE", "\u{2AC5}\u{0338}"; +"nsube", "\u{2288}"; +"nsubset", "\u{2282}\u{20D2}"; +"nsubseteq", "\u{2288}"; +"nsubseteqq", "\u{2AC5}\u{0338}"; +"nsucc", "\u{2281}"; +"nsucceq", "\u{2AB0}\u{0338}"; +"nsup", "\u{2285}"; +"nsupE", "\u{2AC6}\u{0338}"; +"nsupe", "\u{2289}"; +"nsupset", "\u{2283}\u{20D2}"; +"nsupseteq", "\u{2289}"; +"nsupseteqq", "\u{2AC6}\u{0338}"; +"ntgl", "\u{2279}"; +"ntilde", "\u{00F1}"; +"ntlg", "\u{2278}"; +"ntriangleleft", "\u{22EA}"; +"ntrianglelefteq", "\u{22EC}"; +"ntriangleright", "\u{22EB}"; +"ntrianglerighteq", "\u{22ED}"; +"nu", "\u{03BD}"; +"num", "\u{0023}"; +"numero", "\u{2116}"; +"numsp", "\u{2007}"; +"nvDash", "\u{22AD}"; +"nvHarr", "\u{2904}"; +"nvap", "\u{224D}\u{20D2}"; +"nvdash", "\u{22AC}"; +"nvge", "\u{2265}\u{20D2}"; +"nvgt", "\u{003E}\u{20D2}"; +"nvinfin", "\u{29DE}"; +"nvlArr", "\u{2902}"; +"nvle", "\u{2264}\u{20D2}"; +"nvlt", "\u{003C}\u{20D2}"; +"nvltrie", "\u{22B4}\u{20D2}"; +"nvrArr", "\u{2903}"; +"nvrtrie", "\u{22B5}\u{20D2}"; +"nvsim", "\u{223C}\u{20D2}"; +"nwArr", "\u{21D6}"; +"nwarhk", "\u{2923}"; +"nwarr", "\u{2196}"; +"nwarrow", "\u{2196}"; +"nwnear", "\u{2927}"; +"oS", "\u{24C8}"; +"oacute", "\u{00F3}"; +"oast", "\u{229B}"; +"ocir", "\u{229A}"; +"ocirc", "\u{00F4}"; +"ocy", "\u{043E}"; +"odash", "\u{229D}"; +"odblac", "\u{0151}"; +"odiv", "\u{2A38}"; +"odot", "\u{2299}"; +"odsold", "\u{29BC}"; +"oelig", "\u{0153}"; +"ofcir", "\u{29BF}"; +"ofr", "\u{1D52C}"; +"ogon", "\u{02DB}"; +"ograve", "\u{00F2}"; +"ogt", "\u{29C1}"; +"ohbar", "\u{29B5}"; +"ohm", "\u{03A9}"; +"oint", "\u{222E}"; +"olarr", "\u{21BA}"; +"olcir", "\u{29BE}"; +"olcross", "\u{29BB}"; +"oline", "\u{203E}"; +"olt", "\u{29C0}"; +"omacr", "\u{014D}"; +"omega", "\u{03C9}"; +"omicron", "\u{03BF}"; +"omid", "\u{29B6}"; +"ominus", "\u{2296}"; +"oopf", "\u{1D560}"; +"opar", "\u{29B7}"; +"operp", "\u{29B9}"; +"oplus", "\u{2295}"; +"or", "\u{2228}"; +"orarr", "\u{21BB}"; +"ord", "\u{2A5D}"; +"order", "\u{2134}"; +"orderof", "\u{2134}"; +"ordf", "\u{00AA}"; +"ordm", "\u{00BA}"; +"origof", "\u{22B6}"; +"oror", "\u{2A56}"; +"orslope", "\u{2A57}"; +"orv", "\u{2A5B}"; +"oscr", "\u{2134}"; +"oslash", "\u{00F8}"; +"osol", "\u{2298}"; +"otilde", "\u{00F5}"; +"otimes", "\u{2297}"; +"otimesas", "\u{2A36}"; +"ouml", "\u{00F6}"; +"ovbar", "\u{233D}"; +"par", "\u{2225}"; +"para", "\u{00B6}"; +"parallel", "\u{2225}"; +"parsim", "\u{2AF3}"; +"parsl", "\u{2AFD}"; +"part", "\u{2202}"; +"pcy", "\u{043F}"; +"percnt", "\u{0025}"; +"period", "\u{002E}"; +"permil", "\u{2030}"; +"perp", "\u{22A5}"; +"pertenk", "\u{2031}"; +"pfr", "\u{1D52D}"; +"phi", "\u{03C6}"; +"phiv", "\u{03D5}"; +"phmmat", "\u{2133}"; +"phone", "\u{260E}"; +"pi", "\u{03C0}"; +"pitchfork", "\u{22D4}"; +"piv", "\u{03D6}"; +"planck", "\u{210F}"; +"planckh", "\u{210E}"; +"plankv", "\u{210F}"; +"plus", "\u{002B}"; +"plusacir", "\u{2A23}"; +"plusb", "\u{229E}"; +"pluscir", "\u{2A22}"; +"plusdo", "\u{2214}"; +"plusdu", "\u{2A25}"; +"pluse", "\u{2A72}"; +"plusmn", "\u{00B1}"; +"plussim", "\u{2A26}"; +"plustwo", "\u{2A27}"; +"pm", "\u{00B1}"; +"pointint", "\u{2A15}"; +"popf", "\u{1D561}"; +"pound", "\u{00A3}"; +"pr", "\u{227A}"; +"prE", "\u{2AB3}"; +"prap", "\u{2AB7}"; +"prcue", "\u{227C}"; +"pre", "\u{2AAF}"; +"prec", "\u{227A}"; +"precapprox", "\u{2AB7}"; +"preccurlyeq", "\u{227C}"; +"preceq", "\u{2AAF}"; +"precnapprox", "\u{2AB9}"; +"precneqq", "\u{2AB5}"; +"precnsim", "\u{22E8}"; +"precsim", "\u{227E}"; +"prime", "\u{2032}"; +"primes", "\u{2119}"; +"prnE", "\u{2AB5}"; +"prnap", "\u{2AB9}"; +"prnsim", "\u{22E8}"; +"prod", "\u{220F}"; +"profalar", "\u{232E}"; +"profline", "\u{2312}"; +"profsurf", "\u{2313}"; +"prop", "\u{221D}"; +"propto", "\u{221D}"; +"prsim", "\u{227E}"; +"prurel", "\u{22B0}"; +"pscr", "\u{1D4C5}"; +"psi", "\u{03C8}"; +"puncsp", "\u{2008}"; +"qfr", "\u{1D52E}"; +"qint", "\u{2A0C}"; +"qopf", "\u{1D562}"; +"qprime", "\u{2057}"; +"qscr", "\u{1D4C6}"; +"quaternions", "\u{210D}"; +"quatint", "\u{2A16}"; +"quest", "\u{003F}"; +"questeq", "\u{225F}"; +"quot", "\u{0022}"; +"rAarr", "\u{21DB}"; +"rArr", "\u{21D2}"; +"rAtail", "\u{291C}"; +"rBarr", "\u{290F}"; +"rHar", "\u{2964}"; +"race", "\u{223D}\u{0331}"; +"racute", "\u{0155}"; +"radic", "\u{221A}"; +"raemptyv", "\u{29B3}"; +"rang", "\u{27E9}"; +"rangd", "\u{2992}"; +"range", "\u{29A5}"; +"rangle", "\u{27E9}"; +"raquo", "\u{00BB}"; +"rarr", "\u{2192}"; +"rarrap", "\u{2975}"; +"rarrb", "\u{21E5}"; +"rarrbfs", "\u{2920}"; +"rarrc", "\u{2933}"; +"rarrfs", "\u{291E}"; +"rarrhk", "\u{21AA}"; +"rarrlp", "\u{21AC}"; +"rarrpl", "\u{2945}"; +"rarrsim", "\u{2974}"; +"rarrtl", "\u{21A3}"; +"rarrw", "\u{219D}"; +"ratail", "\u{291A}"; +"ratio", "\u{2236}"; +"rationals", "\u{211A}"; +"rbarr", "\u{290D}"; +"rbbrk", "\u{2773}"; +"rbrace", "\u{007D}"; +"rbrack", "\u{005D}"; +"rbrke", "\u{298C}"; +"rbrksld", "\u{298E}"; +"rbrkslu", "\u{2990}"; +"rcaron", "\u{0159}"; +"rcedil", "\u{0157}"; +"rceil", "\u{2309}"; +"rcub", "\u{007D}"; +"rcy", "\u{0440}"; +"rdca", "\u{2937}"; +"rdldhar", "\u{2969}"; +"rdquo", "\u{201D}"; +"rdquor", "\u{201D}"; +"rdsh", "\u{21B3}"; +"real", "\u{211C}"; +"realine", "\u{211B}"; +"realpart", "\u{211C}"; +"reals", "\u{211D}"; +"rect", "\u{25AD}"; +"reg", "\u{00AE}"; +"rfisht", "\u{297D}"; +"rfloor", "\u{230B}"; +"rfr", "\u{1D52F}"; +"rhard", "\u{21C1}"; +"rharu", "\u{21C0}"; +"rharul", "\u{296C}"; +"rho", "\u{03C1}"; +"rhov", "\u{03F1}"; +"rightarrow", "\u{2192}"; +"rightarrowtail", "\u{21A3}"; +"rightharpoondown", "\u{21C1}"; +"rightharpoonup", "\u{21C0}"; +"rightleftarrows", "\u{21C4}"; +"rightleftharpoons", "\u{21CC}"; +"rightrightarrows", "\u{21C9}"; +"rightsquigarrow", "\u{219D}"; +"rightthreetimes", "\u{22CC}"; +"ring", "\u{02DA}"; +"risingdotseq", "\u{2253}"; +"rlarr", "\u{21C4}"; +"rlhar", "\u{21CC}"; +"rlm", "\u{200F}"; +"rmoust", "\u{23B1}"; +"rmoustache", "\u{23B1}"; +"rnmid", "\u{2AEE}"; +"roang", "\u{27ED}"; +"roarr", "\u{21FE}"; +"robrk", "\u{27E7}"; +"ropar", "\u{2986}"; +"ropf", "\u{1D563}"; +"roplus", "\u{2A2E}"; +"rotimes", "\u{2A35}"; +"rpar", "\u{0029}"; +"rpargt", "\u{2994}"; +"rppolint", "\u{2A12}"; +"rrarr", "\u{21C9}"; +"rsaquo", "\u{203A}"; +"rscr", "\u{1D4C7}"; +"rsh", "\u{21B1}"; +"rsqb", "\u{005D}"; +"rsquo", "\u{2019}"; +"rsquor", "\u{2019}"; +"rthree", "\u{22CC}"; +"rtimes", "\u{22CA}"; +"rtri", "\u{25B9}"; +"rtrie", "\u{22B5}"; +"rtrif", "\u{25B8}"; +"rtriltri", "\u{29CE}"; +"ruluhar", "\u{2968}"; +"rx", "\u{211E}"; +"sacute", "\u{015B}"; +"sbquo", "\u{201A}"; +"sc", "\u{227B}"; +"scE", "\u{2AB4}"; +"scap", "\u{2AB8}"; +"scaron", "\u{0161}"; +"sccue", "\u{227D}"; +"sce", "\u{2AB0}"; +"scedil", "\u{015F}"; +"scirc", "\u{015D}"; +"scnE", "\u{2AB6}"; +"scnap", "\u{2ABA}"; +"scnsim", "\u{22E9}"; +"scpolint", "\u{2A13}"; +"scsim", "\u{227F}"; +"scy", "\u{0441}"; +"sdot", "\u{22C5}"; +"sdotb", "\u{22A1}"; +"sdote", "\u{2A66}"; +"seArr", "\u{21D8}"; +"searhk", "\u{2925}"; +"searr", "\u{2198}"; +"searrow", "\u{2198}"; +"sect", "\u{00A7}"; +"semi", "\u{003B}"; +"seswar", "\u{2929}"; +"setminus", "\u{2216}"; +"setmn", "\u{2216}"; +"sext", "\u{2736}"; +"sfr", "\u{1D530}"; +"sfrown", "\u{2322}"; +"sharp", "\u{266F}"; +"shchcy", "\u{0449}"; +"shcy", "\u{0448}"; +"shortmid", "\u{2223}"; +"shortparallel", "\u{2225}"; +"shy", "\u{00AD}"; +"sigma", "\u{03C3}"; +"sigmaf", "\u{03C2}"; +"sigmav", "\u{03C2}"; +"sim", "\u{223C}"; +"simdot", "\u{2A6A}"; +"sime", "\u{2243}"; +"simeq", "\u{2243}"; +"simg", "\u{2A9E}"; +"simgE", "\u{2AA0}"; +"siml", "\u{2A9D}"; +"simlE", "\u{2A9F}"; +"simne", "\u{2246}"; +"simplus", "\u{2A24}"; +"simrarr", "\u{2972}"; +"slarr", "\u{2190}"; +"smallsetminus", "\u{2216}"; +"smashp", "\u{2A33}"; +"smeparsl", "\u{29E4}"; +"smid", "\u{2223}"; +"smile", "\u{2323}"; +"smt", "\u{2AAA}"; +"smte", "\u{2AAC}"; +"smtes", "\u{2AAC}\u{FE00}"; +"softcy", "\u{044C}"; +"sol", "\u{002F}"; +"solb", "\u{29C4}"; +"solbar", "\u{233F}"; +"sopf", "\u{1D564}"; +"spades", "\u{2660}"; +"spadesuit", "\u{2660}"; +"spar", "\u{2225}"; +"sqcap", "\u{2293}"; +"sqcaps", "\u{2293}\u{FE00}"; +"sqcup", "\u{2294}"; +"sqcups", "\u{2294}\u{FE00}"; +"sqsub", "\u{228F}"; +"sqsube", "\u{2291}"; +"sqsubset", "\u{228F}"; +"sqsubseteq", "\u{2291}"; +"sqsup", "\u{2290}"; +"sqsupe", "\u{2292}"; +"sqsupset", "\u{2290}"; +"sqsupseteq", "\u{2292}"; +"squ", "\u{25A1}"; +"square", "\u{25A1}"; +"squarf", "\u{25AA}"; +"squf", "\u{25AA}"; +"srarr", "\u{2192}"; +"sscr", "\u{1D4C8}"; +"ssetmn", "\u{2216}"; +"ssmile", "\u{2323}"; +"sstarf", "\u{22C6}"; +"star", "\u{2606}"; +"starf", "\u{2605}"; +"straightepsilon", "\u{03F5}"; +"straightphi", "\u{03D5}"; +"strns", "\u{00AF}"; +"sub", "\u{2282}"; +"subE", "\u{2AC5}"; +"subdot", "\u{2ABD}"; +"sube", "\u{2286}"; +"subedot", "\u{2AC3}"; +"submult", "\u{2AC1}"; +"subnE", "\u{2ACB}"; +"subne", "\u{228A}"; +"subplus", "\u{2ABF}"; +"subrarr", "\u{2979}"; +"subset", "\u{2282}"; +"subseteq", "\u{2286}"; +"subseteqq", "\u{2AC5}"; +"subsetneq", "\u{228A}"; +"subsetneqq", "\u{2ACB}"; +"subsim", "\u{2AC7}"; +"subsub", "\u{2AD5}"; +"subsup", "\u{2AD3}"; +"succ", "\u{227B}"; +"succapprox", "\u{2AB8}"; +"succcurlyeq", "\u{227D}"; +"succeq", "\u{2AB0}"; +"succnapprox", "\u{2ABA}"; +"succneqq", "\u{2AB6}"; +"succnsim", "\u{22E9}"; +"succsim", "\u{227F}"; +"sum", "\u{2211}"; +"sung", "\u{266A}"; +"sup1", "\u{00B9}"; +"sup2", "\u{00B2}"; +"sup3", "\u{00B3}"; +"sup", "\u{2283}"; +"supE", "\u{2AC6}"; +"supdot", "\u{2ABE}"; +"supdsub", "\u{2AD8}"; +"supe", "\u{2287}"; +"supedot", "\u{2AC4}"; +"suphsol", "\u{27C9}"; +"suphsub", "\u{2AD7}"; +"suplarr", "\u{297B}"; +"supmult", "\u{2AC2}"; +"supnE", "\u{2ACC}"; +"supne", "\u{228B}"; +"supplus", "\u{2AC0}"; +"supset", "\u{2283}"; +"supseteq", "\u{2287}"; +"supseteqq", "\u{2AC6}"; +"supsetneq", "\u{228B}"; +"supsetneqq", "\u{2ACC}"; +"supsim", "\u{2AC8}"; +"supsub", "\u{2AD4}"; +"supsup", "\u{2AD6}"; +"swArr", "\u{21D9}"; +"swarhk", "\u{2926}"; +"swarr", "\u{2199}"; +"swarrow", "\u{2199}"; +"swnwar", "\u{292A}"; +"szlig", "\u{00DF}"; +"target", "\u{2316}"; +"tau", "\u{03C4}"; +"tbrk", "\u{23B4}"; +"tcaron", "\u{0165}"; +"tcedil", "\u{0163}"; +"tcy", "\u{0442}"; +"tdot", "\u{20DB}"; +"telrec", "\u{2315}"; +"tfr", "\u{1D531}"; +"there4", "\u{2234}"; +"therefore", "\u{2234}"; +"theta", "\u{03B8}"; +"thetasym", "\u{03D1}"; +"thetav", "\u{03D1}"; +"thickapprox", "\u{2248}"; +"thicksim", "\u{223C}"; +"thinsp", "\u{2009}"; +"thkap", "\u{2248}"; +"thksim", "\u{223C}"; +"thorn", "\u{00FE}"; +"tilde", "\u{02DC}"; +"times", "\u{00D7}"; +"timesb", "\u{22A0}"; +"timesbar", "\u{2A31}"; +"timesd", "\u{2A30}"; +"tint", "\u{222D}"; +"toea", "\u{2928}"; +"top", "\u{22A4}"; +"topbot", "\u{2336}"; +"topcir", "\u{2AF1}"; +"topf", "\u{1D565}"; +"topfork", "\u{2ADA}"; +"tosa", "\u{2929}"; +"tprime", "\u{2034}"; +"trade", "\u{2122}"; +"triangle", "\u{25B5}"; +"triangledown", "\u{25BF}"; +"triangleleft", "\u{25C3}"; +"trianglelefteq", "\u{22B4}"; +"triangleq", "\u{225C}"; +"triangleright", "\u{25B9}"; +"trianglerighteq", "\u{22B5}"; +"tridot", "\u{25EC}"; +"trie", "\u{225C}"; +"triminus", "\u{2A3A}"; +"triplus", "\u{2A39}"; +"trisb", "\u{29CD}"; +"tritime", "\u{2A3B}"; +"trpezium", "\u{23E2}"; +"tscr", "\u{1D4C9}"; +"tscy", "\u{0446}"; +"tshcy", "\u{045B}"; +"tstrok", "\u{0167}"; +"twixt", "\u{226C}"; +"twoheadleftarrow", "\u{219E}"; +"twoheadrightarrow", "\u{21A0}"; +"uArr", "\u{21D1}"; +"uHar", "\u{2963}"; +"uacute", "\u{00FA}"; +"uarr", "\u{2191}"; +"ubrcy", "\u{045E}"; +"ubreve", "\u{016D}"; +"ucirc", "\u{00FB}"; +"ucy", "\u{0443}"; +"udarr", "\u{21C5}"; +"udblac", "\u{0171}"; +"udhar", "\u{296E}"; +"ufisht", "\u{297E}"; +"ufr", "\u{1D532}"; +"ugrave", "\u{00F9}"; +"uharl", "\u{21BF}"; +"uharr", "\u{21BE}"; +"uhblk", "\u{2580}"; +"ulcorn", "\u{231C}"; +"ulcorner", "\u{231C}"; +"ulcrop", "\u{230F}"; +"ultri", "\u{25F8}"; +"umacr", "\u{016B}"; +"uml", "\u{00A8}"; +"uogon", "\u{0173}"; +"uopf", "\u{1D566}"; +"uparrow", "\u{2191}"; +"updownarrow", "\u{2195}"; +"upharpoonleft", "\u{21BF}"; +"upharpoonright", "\u{21BE}"; +"uplus", "\u{228E}"; +"upsi", "\u{03C5}"; +"upsih", "\u{03D2}"; +"upsilon", "\u{03C5}"; +"upuparrows", "\u{21C8}"; +"urcorn", "\u{231D}"; +"urcorner", "\u{231D}"; +"urcrop", "\u{230E}"; +"uring", "\u{016F}"; +"urtri", "\u{25F9}"; +"uscr", "\u{1D4CA}"; +"utdot", "\u{22F0}"; +"utilde", "\u{0169}"; +"utri", "\u{25B5}"; +"utrif", "\u{25B4}"; +"uuarr", "\u{21C8}"; +"uuml", "\u{00FC}"; +"uwangle", "\u{29A7}"; +"vArr", "\u{21D5}"; +"vBar", "\u{2AE8}"; +"vBarv", "\u{2AE9}"; +"vDash", "\u{22A8}"; +"vangrt", "\u{299C}"; +"varepsilon", "\u{03F5}"; +"varkappa", "\u{03F0}"; +"varnothing", "\u{2205}"; +"varphi", "\u{03D5}"; +"varpi", "\u{03D6}"; +"varpropto", "\u{221D}"; +"varr", "\u{2195}"; +"varrho", "\u{03F1}"; +"varsigma", "\u{03C2}"; +"varsubsetneq", "\u{228A}\u{FE00}"; +"varsubsetneqq", "\u{2ACB}\u{FE00}"; +"varsupsetneq", "\u{228B}\u{FE00}"; +"varsupsetneqq", "\u{2ACC}\u{FE00}"; +"vartheta", "\u{03D1}"; +"vartriangleleft", "\u{22B2}"; +"vartriangleright", "\u{22B3}"; +"vcy", "\u{0432}"; +"vdash", "\u{22A2}"; +"vee", "\u{2228}"; +"veebar", "\u{22BB}"; +"veeeq", "\u{225A}"; +"vellip", "\u{22EE}"; +"verbar", "\u{007C}"; +"vert", "\u{007C}"; +"vfr", "\u{1D533}"; +"vltri", "\u{22B2}"; +"vnsub", "\u{2282}\u{20D2}"; +"vnsup", "\u{2283}\u{20D2}"; +"vopf", "\u{1D567}"; +"vprop", "\u{221D}"; +"vrtri", "\u{22B3}"; +"vscr", "\u{1D4CB}"; +"vsubnE", "\u{2ACB}\u{FE00}"; +"vsubne", "\u{228A}\u{FE00}"; +"vsupnE", "\u{2ACC}\u{FE00}"; +"vsupne", "\u{228B}\u{FE00}"; +"vzigzag", "\u{299A}"; +"wcirc", "\u{0175}"; +"wedbar", "\u{2A5F}"; +"wedge", "\u{2227}"; +"wedgeq", "\u{2259}"; +"weierp", "\u{2118}"; +"wfr", "\u{1D534}"; +"wopf", "\u{1D568}"; +"wp", "\u{2118}"; +"wr", "\u{2240}"; +"wreath", "\u{2240}"; +"wscr", "\u{1D4CC}"; +"xcap", "\u{22C2}"; +"xcirc", "\u{25EF}"; +"xcup", "\u{22C3}"; +"xdtri", "\u{25BD}"; +"xfr", "\u{1D535}"; +"xhArr", "\u{27FA}"; +"xharr", "\u{27F7}"; +"xi", "\u{03BE}"; +"xlArr", "\u{27F8}"; +"xlarr", "\u{27F5}"; +"xmap", "\u{27FC}"; +"xnis", "\u{22FB}"; +"xodot", "\u{2A00}"; +"xopf", "\u{1D569}"; +"xoplus", "\u{2A01}"; +"xotime", "\u{2A02}"; +"xrArr", "\u{27F9}"; +"xrarr", "\u{27F6}"; +"xscr", "\u{1D4CD}"; +"xsqcup", "\u{2A06}"; +"xuplus", "\u{2A04}"; +"xutri", "\u{25B3}"; +"xvee", "\u{22C1}"; +"xwedge", "\u{22C0}"; +"yacute", "\u{00FD}"; +"yacy", "\u{044F}"; +"ycirc", "\u{0177}"; +"ycy", "\u{044B}"; +"yen", "\u{00A5}"; +"yfr", "\u{1D536}"; +"yicy", "\u{0457}"; +"yopf", "\u{1D56A}"; +"yscr", "\u{1D4CE}"; +"yucy", "\u{044E}"; +"yuml", "\u{00FF}"; +"zacute", "\u{017A}"; +"zcaron", "\u{017E}"; +"zcy", "\u{0437}"; +"zdot", "\u{017C}"; +"zeetrf", "\u{2128}"; +"zeta", "\u{03B6}"; +"zfr", "\u{1D537}"; +"zhcy", "\u{0436}"; +"zigrarr", "\u{21DD}"; +"zopf", "\u{1D56B}"; +"zscr", "\u{1D4CF}"; +"zwj", "\u{200D}"; +"zwnj", "\u{200C}"; +|] + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_data_uchar.ml b/ocaml-lsp-server/vendor/cmarkit/cmarkit_data_uchar.ml new file mode 100644 index 000000000..849a33464 --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_data_uchar.ml @@ -0,0 +1,658 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +(* Do not edit. Data generated by support/unicode_data.ml *) + +let unicode_version = "15.0.0" + +let whitespace = + [|0x0009; 0x000A; 0x000C; 0x000D; 0x0020; 0x00A0; 0x1680; 0x2000; 0x2001; + 0x2002; 0x2003; 0x2004; 0x2005; 0x2006; 0x2007; 0x2008; 0x2009; 0x200A; + 0x202F; 0x205F; 0x3000|] + +let punctuation = + [|0x0021; 0x0022; 0x0023; 0x0024; 0x0025; 0x0026; 0x0027; 0x0028; 0x0029; + 0x002A; 0x002B; 0x002C; 0x002D; 0x002E; 0x002F; 0x003A; 0x003B; 0x003C; + 0x003D; 0x003E; 0x003F; 0x0040; 0x005B; 0x005C; 0x005D; 0x005E; 0x005F; + 0x0060; 0x007B; 0x007C; 0x007D; 0x007E; 0x00A1; 0x00A7; 0x00AB; 0x00B6; + 0x00B7; 0x00BB; 0x00BF; 0x037E; 0x0387; 0x055A; 0x055B; 0x055C; 0x055D; + 0x055E; 0x055F; 0x0589; 0x058A; 0x05BE; 0x05C0; 0x05C3; 0x05C6; 0x05F3; + 0x05F4; 0x0609; 0x060A; 0x060C; 0x060D; 0x061B; 0x061D; 0x061E; 0x061F; + 0x066A; 0x066B; 0x066C; 0x066D; 0x06D4; 0x0700; 0x0701; 0x0702; 0x0703; + 0x0704; 0x0705; 0x0706; 0x0707; 0x0708; 0x0709; 0x070A; 0x070B; 0x070C; + 0x070D; 0x07F7; 0x07F8; 0x07F9; 0x0830; 0x0831; 0x0832; 0x0833; 0x0834; + 0x0835; 0x0836; 0x0837; 0x0838; 0x0839; 0x083A; 0x083B; 0x083C; 0x083D; + 0x083E; 0x085E; 0x0964; 0x0965; 0x0970; 0x09FD; 0x0A76; 0x0AF0; 0x0C77; + 0x0C84; 0x0DF4; 0x0E4F; 0x0E5A; 0x0E5B; 0x0F04; 0x0F05; 0x0F06; 0x0F07; + 0x0F08; 0x0F09; 0x0F0A; 0x0F0B; 0x0F0C; 0x0F0D; 0x0F0E; 0x0F0F; 0x0F10; + 0x0F11; 0x0F12; 0x0F14; 0x0F3A; 0x0F3B; 0x0F3C; 0x0F3D; 0x0F85; 0x0FD0; + 0x0FD1; 0x0FD2; 0x0FD3; 0x0FD4; 0x0FD9; 0x0FDA; 0x104A; 0x104B; 0x104C; + 0x104D; 0x104E; 0x104F; 0x10FB; 0x1360; 0x1361; 0x1362; 0x1363; 0x1364; + 0x1365; 0x1366; 0x1367; 0x1368; 0x1400; 0x166E; 0x169B; 0x169C; 0x16EB; + 0x16EC; 0x16ED; 0x1735; 0x1736; 0x17D4; 0x17D5; 0x17D6; 0x17D8; 0x17D9; + 0x17DA; 0x1800; 0x1801; 0x1802; 0x1803; 0x1804; 0x1805; 0x1806; 0x1807; + 0x1808; 0x1809; 0x180A; 0x1944; 0x1945; 0x1A1E; 0x1A1F; 0x1AA0; 0x1AA1; + 0x1AA2; 0x1AA3; 0x1AA4; 0x1AA5; 0x1AA6; 0x1AA8; 0x1AA9; 0x1AAA; 0x1AAB; + 0x1AAC; 0x1AAD; 0x1B5A; 0x1B5B; 0x1B5C; 0x1B5D; 0x1B5E; 0x1B5F; 0x1B60; + 0x1B7D; 0x1B7E; 0x1BFC; 0x1BFD; 0x1BFE; 0x1BFF; 0x1C3B; 0x1C3C; 0x1C3D; + 0x1C3E; 0x1C3F; 0x1C7E; 0x1C7F; 0x1CC0; 0x1CC1; 0x1CC2; 0x1CC3; 0x1CC4; + 0x1CC5; 0x1CC6; 0x1CC7; 0x1CD3; 0x2010; 0x2011; 0x2012; 0x2013; 0x2014; + 0x2015; 0x2016; 0x2017; 0x2018; 0x2019; 0x201A; 0x201B; 0x201C; 0x201D; + 0x201E; 0x201F; 0x2020; 0x2021; 0x2022; 0x2023; 0x2024; 0x2025; 0x2026; + 0x2027; 0x2030; 0x2031; 0x2032; 0x2033; 0x2034; 0x2035; 0x2036; 0x2037; + 0x2038; 0x2039; 0x203A; 0x203B; 0x203C; 0x203D; 0x203E; 0x203F; 0x2040; + 0x2041; 0x2042; 0x2043; 0x2045; 0x2046; 0x2047; 0x2048; 0x2049; 0x204A; + 0x204B; 0x204C; 0x204D; 0x204E; 0x204F; 0x2050; 0x2051; 0x2053; 0x2054; + 0x2055; 0x2056; 0x2057; 0x2058; 0x2059; 0x205A; 0x205B; 0x205C; 0x205D; + 0x205E; 0x207D; 0x207E; 0x208D; 0x208E; 0x2308; 0x2309; 0x230A; 0x230B; + 0x2329; 0x232A; 0x2768; 0x2769; 0x276A; 0x276B; 0x276C; 0x276D; 0x276E; + 0x276F; 0x2770; 0x2771; 0x2772; 0x2773; 0x2774; 0x2775; 0x27C5; 0x27C6; + 0x27E6; 0x27E7; 0x27E8; 0x27E9; 0x27EA; 0x27EB; 0x27EC; 0x27ED; 0x27EE; + 0x27EF; 0x2983; 0x2984; 0x2985; 0x2986; 0x2987; 0x2988; 0x2989; 0x298A; + 0x298B; 0x298C; 0x298D; 0x298E; 0x298F; 0x2990; 0x2991; 0x2992; 0x2993; + 0x2994; 0x2995; 0x2996; 0x2997; 0x2998; 0x29D8; 0x29D9; 0x29DA; 0x29DB; + 0x29FC; 0x29FD; 0x2CF9; 0x2CFA; 0x2CFB; 0x2CFC; 0x2CFE; 0x2CFF; 0x2D70; + 0x2E00; 0x2E01; 0x2E02; 0x2E03; 0x2E04; 0x2E05; 0x2E06; 0x2E07; 0x2E08; + 0x2E09; 0x2E0A; 0x2E0B; 0x2E0C; 0x2E0D; 0x2E0E; 0x2E0F; 0x2E10; 0x2E11; + 0x2E12; 0x2E13; 0x2E14; 0x2E15; 0x2E16; 0x2E17; 0x2E18; 0x2E19; 0x2E1A; + 0x2E1B; 0x2E1C; 0x2E1D; 0x2E1E; 0x2E1F; 0x2E20; 0x2E21; 0x2E22; 0x2E23; + 0x2E24; 0x2E25; 0x2E26; 0x2E27; 0x2E28; 0x2E29; 0x2E2A; 0x2E2B; 0x2E2C; + 0x2E2D; 0x2E2E; 0x2E30; 0x2E31; 0x2E32; 0x2E33; 0x2E34; 0x2E35; 0x2E36; + 0x2E37; 0x2E38; 0x2E39; 0x2E3A; 0x2E3B; 0x2E3C; 0x2E3D; 0x2E3E; 0x2E3F; + 0x2E40; 0x2E41; 0x2E42; 0x2E43; 0x2E44; 0x2E45; 0x2E46; 0x2E47; 0x2E48; + 0x2E49; 0x2E4A; 0x2E4B; 0x2E4C; 0x2E4D; 0x2E4E; 0x2E4F; 0x2E52; 0x2E53; + 0x2E54; 0x2E55; 0x2E56; 0x2E57; 0x2E58; 0x2E59; 0x2E5A; 0x2E5B; 0x2E5C; + 0x2E5D; 0x3001; 0x3002; 0x3003; 0x3008; 0x3009; 0x300A; 0x300B; 0x300C; + 0x300D; 0x300E; 0x300F; 0x3010; 0x3011; 0x3014; 0x3015; 0x3016; 0x3017; + 0x3018; 0x3019; 0x301A; 0x301B; 0x301C; 0x301D; 0x301E; 0x301F; 0x3030; + 0x303D; 0x30A0; 0x30FB; 0xA4FE; 0xA4FF; 0xA60D; 0xA60E; 0xA60F; 0xA673; + 0xA67E; 0xA6F2; 0xA6F3; 0xA6F4; 0xA6F5; 0xA6F6; 0xA6F7; 0xA874; 0xA875; + 0xA876; 0xA877; 0xA8CE; 0xA8CF; 0xA8F8; 0xA8F9; 0xA8FA; 0xA8FC; 0xA92E; + 0xA92F; 0xA95F; 0xA9C1; 0xA9C2; 0xA9C3; 0xA9C4; 0xA9C5; 0xA9C6; 0xA9C7; + 0xA9C8; 0xA9C9; 0xA9CA; 0xA9CB; 0xA9CC; 0xA9CD; 0xA9DE; 0xA9DF; 0xAA5C; + 0xAA5D; 0xAA5E; 0xAA5F; 0xAADE; 0xAADF; 0xAAF0; 0xAAF1; 0xABEB; 0xFD3E; + 0xFD3F; 0xFE10; 0xFE11; 0xFE12; 0xFE13; 0xFE14; 0xFE15; 0xFE16; 0xFE17; + 0xFE18; 0xFE19; 0xFE30; 0xFE31; 0xFE32; 0xFE33; 0xFE34; 0xFE35; 0xFE36; + 0xFE37; 0xFE38; 0xFE39; 0xFE3A; 0xFE3B; 0xFE3C; 0xFE3D; 0xFE3E; 0xFE3F; + 0xFE40; 0xFE41; 0xFE42; 0xFE43; 0xFE44; 0xFE45; 0xFE46; 0xFE47; 0xFE48; + 0xFE49; 0xFE4A; 0xFE4B; 0xFE4C; 0xFE4D; 0xFE4E; 0xFE4F; 0xFE50; 0xFE51; + 0xFE52; 0xFE54; 0xFE55; 0xFE56; 0xFE57; 0xFE58; 0xFE59; 0xFE5A; 0xFE5B; + 0xFE5C; 0xFE5D; 0xFE5E; 0xFE5F; 0xFE60; 0xFE61; 0xFE63; 0xFE68; 0xFE6A; + 0xFE6B; 0xFF01; 0xFF02; 0xFF03; 0xFF05; 0xFF06; 0xFF07; 0xFF08; 0xFF09; + 0xFF0A; 0xFF0C; 0xFF0D; 0xFF0E; 0xFF0F; 0xFF1A; 0xFF1B; 0xFF1F; 0xFF20; + 0xFF3B; 0xFF3C; 0xFF3D; 0xFF3F; 0xFF5B; 0xFF5D; 0xFF5F; 0xFF60; 0xFF61; + 0xFF62; 0xFF63; 0xFF64; 0xFF65; 0x10100; 0x10101; 0x10102; 0x1039F; + 0x103D0; 0x1056F; 0x10857; 0x1091F; 0x1093F; 0x10A50; 0x10A51; 0x10A52; + 0x10A53; 0x10A54; 0x10A55; 0x10A56; 0x10A57; 0x10A58; 0x10A7F; 0x10AF0; + 0x10AF1; 0x10AF2; 0x10AF3; 0x10AF4; 0x10AF5; 0x10AF6; 0x10B39; 0x10B3A; + 0x10B3B; 0x10B3C; 0x10B3D; 0x10B3E; 0x10B3F; 0x10B99; 0x10B9A; 0x10B9B; + 0x10B9C; 0x10EAD; 0x10F55; 0x10F56; 0x10F57; 0x10F58; 0x10F59; 0x10F86; + 0x10F87; 0x10F88; 0x10F89; 0x11047; 0x11048; 0x11049; 0x1104A; 0x1104B; + 0x1104C; 0x1104D; 0x110BB; 0x110BC; 0x110BE; 0x110BF; 0x110C0; 0x110C1; + 0x11140; 0x11141; 0x11142; 0x11143; 0x11174; 0x11175; 0x111C5; 0x111C6; + 0x111C7; 0x111C8; 0x111CD; 0x111DB; 0x111DD; 0x111DE; 0x111DF; 0x11238; + 0x11239; 0x1123A; 0x1123B; 0x1123C; 0x1123D; 0x112A9; 0x1144B; 0x1144C; + 0x1144D; 0x1144E; 0x1144F; 0x1145A; 0x1145B; 0x1145D; 0x114C6; 0x115C1; + 0x115C2; 0x115C3; 0x115C4; 0x115C5; 0x115C6; 0x115C7; 0x115C8; 0x115C9; + 0x115CA; 0x115CB; 0x115CC; 0x115CD; 0x115CE; 0x115CF; 0x115D0; 0x115D1; + 0x115D2; 0x115D3; 0x115D4; 0x115D5; 0x115D6; 0x115D7; 0x11641; 0x11642; + 0x11643; 0x11660; 0x11661; 0x11662; 0x11663; 0x11664; 0x11665; 0x11666; + 0x11667; 0x11668; 0x11669; 0x1166A; 0x1166B; 0x1166C; 0x116B9; 0x1173C; + 0x1173D; 0x1173E; 0x1183B; 0x11944; 0x11945; 0x11946; 0x119E2; 0x11A3F; + 0x11A40; 0x11A41; 0x11A42; 0x11A43; 0x11A44; 0x11A45; 0x11A46; 0x11A9A; + 0x11A9B; 0x11A9C; 0x11A9E; 0x11A9F; 0x11AA0; 0x11AA1; 0x11AA2; 0x11B00; + 0x11B01; 0x11B02; 0x11B03; 0x11B04; 0x11B05; 0x11B06; 0x11B07; 0x11B08; + 0x11B09; 0x11C41; 0x11C42; 0x11C43; 0x11C44; 0x11C45; 0x11C70; 0x11C71; + 0x11EF7; 0x11EF8; 0x11F43; 0x11F44; 0x11F45; 0x11F46; 0x11F47; 0x11F48; + 0x11F49; 0x11F4A; 0x11F4B; 0x11F4C; 0x11F4D; 0x11F4E; 0x11F4F; 0x11FFF; + 0x12470; 0x12471; 0x12472; 0x12473; 0x12474; 0x12FF1; 0x12FF2; 0x16A6E; + 0x16A6F; 0x16AF5; 0x16B37; 0x16B38; 0x16B39; 0x16B3A; 0x16B3B; 0x16B44; + 0x16E97; 0x16E98; 0x16E99; 0x16E9A; 0x16FE2; 0x1BC9F; 0x1DA87; 0x1DA88; + 0x1DA89; 0x1DA8A; 0x1DA8B; 0x1E95E; 0x1E95F|] + +let case_fold = + [|0x0041, "\u{0061}"; 0x0042, "\u{0062}"; 0x0043, "\u{0063}"; + 0x0044, "\u{0064}"; 0x0045, "\u{0065}"; 0x0046, "\u{0066}"; + 0x0047, "\u{0067}"; 0x0048, "\u{0068}"; 0x0049, "\u{0069}"; + 0x004A, "\u{006A}"; 0x004B, "\u{006B}"; 0x004C, "\u{006C}"; + 0x004D, "\u{006D}"; 0x004E, "\u{006E}"; 0x004F, "\u{006F}"; + 0x0050, "\u{0070}"; 0x0051, "\u{0071}"; 0x0052, "\u{0072}"; + 0x0053, "\u{0073}"; 0x0054, "\u{0074}"; 0x0055, "\u{0075}"; + 0x0056, "\u{0076}"; 0x0057, "\u{0077}"; 0x0058, "\u{0078}"; + 0x0059, "\u{0079}"; 0x005A, "\u{007A}"; 0x00B5, "\u{03BC}"; + 0x00C0, "\u{00E0}"; 0x00C1, "\u{00E1}"; 0x00C2, "\u{00E2}"; + 0x00C3, "\u{00E3}"; 0x00C4, "\u{00E4}"; 0x00C5, "\u{00E5}"; + 0x00C6, "\u{00E6}"; 0x00C7, "\u{00E7}"; 0x00C8, "\u{00E8}"; + 0x00C9, "\u{00E9}"; 0x00CA, "\u{00EA}"; 0x00CB, "\u{00EB}"; + 0x00CC, "\u{00EC}"; 0x00CD, "\u{00ED}"; 0x00CE, "\u{00EE}"; + 0x00CF, "\u{00EF}"; 0x00D0, "\u{00F0}"; 0x00D1, "\u{00F1}"; + 0x00D2, "\u{00F2}"; 0x00D3, "\u{00F3}"; 0x00D4, "\u{00F4}"; + 0x00D5, "\u{00F5}"; 0x00D6, "\u{00F6}"; 0x00D8, "\u{00F8}"; + 0x00D9, "\u{00F9}"; 0x00DA, "\u{00FA}"; 0x00DB, "\u{00FB}"; + 0x00DC, "\u{00FC}"; 0x00DD, "\u{00FD}"; 0x00DE, "\u{00FE}"; + 0x00DF, "\u{0073}\u{0073}"; 0x0100, "\u{0101}"; 0x0102, "\u{0103}"; + 0x0104, "\u{0105}"; 0x0106, "\u{0107}"; 0x0108, "\u{0109}"; + 0x010A, "\u{010B}"; 0x010C, "\u{010D}"; 0x010E, "\u{010F}"; + 0x0110, "\u{0111}"; 0x0112, "\u{0113}"; 0x0114, "\u{0115}"; + 0x0116, "\u{0117}"; 0x0118, "\u{0119}"; 0x011A, "\u{011B}"; + 0x011C, "\u{011D}"; 0x011E, "\u{011F}"; 0x0120, "\u{0121}"; + 0x0122, "\u{0123}"; 0x0124, "\u{0125}"; 0x0126, "\u{0127}"; + 0x0128, "\u{0129}"; 0x012A, "\u{012B}"; 0x012C, "\u{012D}"; + 0x012E, "\u{012F}"; 0x0130, "\u{0069}\u{0307}"; 0x0132, "\u{0133}"; + 0x0134, "\u{0135}"; 0x0136, "\u{0137}"; 0x0139, "\u{013A}"; + 0x013B, "\u{013C}"; 0x013D, "\u{013E}"; 0x013F, "\u{0140}"; + 0x0141, "\u{0142}"; 0x0143, "\u{0144}"; 0x0145, "\u{0146}"; + 0x0147, "\u{0148}"; 0x0149, "\u{02BC}\u{006E}"; 0x014A, "\u{014B}"; + 0x014C, "\u{014D}"; 0x014E, "\u{014F}"; 0x0150, "\u{0151}"; + 0x0152, "\u{0153}"; 0x0154, "\u{0155}"; 0x0156, "\u{0157}"; + 0x0158, "\u{0159}"; 0x015A, "\u{015B}"; 0x015C, "\u{015D}"; + 0x015E, "\u{015F}"; 0x0160, "\u{0161}"; 0x0162, "\u{0163}"; + 0x0164, "\u{0165}"; 0x0166, "\u{0167}"; 0x0168, "\u{0169}"; + 0x016A, "\u{016B}"; 0x016C, "\u{016D}"; 0x016E, "\u{016F}"; + 0x0170, "\u{0171}"; 0x0172, "\u{0173}"; 0x0174, "\u{0175}"; + 0x0176, "\u{0177}"; 0x0178, "\u{00FF}"; 0x0179, "\u{017A}"; + 0x017B, "\u{017C}"; 0x017D, "\u{017E}"; 0x017F, "\u{0073}"; + 0x0181, "\u{0253}"; 0x0182, "\u{0183}"; 0x0184, "\u{0185}"; + 0x0186, "\u{0254}"; 0x0187, "\u{0188}"; 0x0189, "\u{0256}"; + 0x018A, "\u{0257}"; 0x018B, "\u{018C}"; 0x018E, "\u{01DD}"; + 0x018F, "\u{0259}"; 0x0190, "\u{025B}"; 0x0191, "\u{0192}"; + 0x0193, "\u{0260}"; 0x0194, "\u{0263}"; 0x0196, "\u{0269}"; + 0x0197, "\u{0268}"; 0x0198, "\u{0199}"; 0x019C, "\u{026F}"; + 0x019D, "\u{0272}"; 0x019F, "\u{0275}"; 0x01A0, "\u{01A1}"; + 0x01A2, "\u{01A3}"; 0x01A4, "\u{01A5}"; 0x01A6, "\u{0280}"; + 0x01A7, "\u{01A8}"; 0x01A9, "\u{0283}"; 0x01AC, "\u{01AD}"; + 0x01AE, "\u{0288}"; 0x01AF, "\u{01B0}"; 0x01B1, "\u{028A}"; + 0x01B2, "\u{028B}"; 0x01B3, "\u{01B4}"; 0x01B5, "\u{01B6}"; + 0x01B7, "\u{0292}"; 0x01B8, "\u{01B9}"; 0x01BC, "\u{01BD}"; + 0x01C4, "\u{01C6}"; 0x01C5, "\u{01C6}"; 0x01C7, "\u{01C9}"; + 0x01C8, "\u{01C9}"; 0x01CA, "\u{01CC}"; 0x01CB, "\u{01CC}"; + 0x01CD, "\u{01CE}"; 0x01CF, "\u{01D0}"; 0x01D1, "\u{01D2}"; + 0x01D3, "\u{01D4}"; 0x01D5, "\u{01D6}"; 0x01D7, "\u{01D8}"; + 0x01D9, "\u{01DA}"; 0x01DB, "\u{01DC}"; 0x01DE, "\u{01DF}"; + 0x01E0, "\u{01E1}"; 0x01E2, "\u{01E3}"; 0x01E4, "\u{01E5}"; + 0x01E6, "\u{01E7}"; 0x01E8, "\u{01E9}"; 0x01EA, "\u{01EB}"; + 0x01EC, "\u{01ED}"; 0x01EE, "\u{01EF}"; 0x01F0, "\u{006A}\u{030C}"; + 0x01F1, "\u{01F3}"; 0x01F2, "\u{01F3}"; 0x01F4, "\u{01F5}"; + 0x01F6, "\u{0195}"; 0x01F7, "\u{01BF}"; 0x01F8, "\u{01F9}"; + 0x01FA, "\u{01FB}"; 0x01FC, "\u{01FD}"; 0x01FE, "\u{01FF}"; + 0x0200, "\u{0201}"; 0x0202, "\u{0203}"; 0x0204, "\u{0205}"; + 0x0206, "\u{0207}"; 0x0208, "\u{0209}"; 0x020A, "\u{020B}"; + 0x020C, "\u{020D}"; 0x020E, "\u{020F}"; 0x0210, "\u{0211}"; + 0x0212, "\u{0213}"; 0x0214, "\u{0215}"; 0x0216, "\u{0217}"; + 0x0218, "\u{0219}"; 0x021A, "\u{021B}"; 0x021C, "\u{021D}"; + 0x021E, "\u{021F}"; 0x0220, "\u{019E}"; 0x0222, "\u{0223}"; + 0x0224, "\u{0225}"; 0x0226, "\u{0227}"; 0x0228, "\u{0229}"; + 0x022A, "\u{022B}"; 0x022C, "\u{022D}"; 0x022E, "\u{022F}"; + 0x0230, "\u{0231}"; 0x0232, "\u{0233}"; 0x023A, "\u{2C65}"; + 0x023B, "\u{023C}"; 0x023D, "\u{019A}"; 0x023E, "\u{2C66}"; + 0x0241, "\u{0242}"; 0x0243, "\u{0180}"; 0x0244, "\u{0289}"; + 0x0245, "\u{028C}"; 0x0246, "\u{0247}"; 0x0248, "\u{0249}"; + 0x024A, "\u{024B}"; 0x024C, "\u{024D}"; 0x024E, "\u{024F}"; + 0x0345, "\u{03B9}"; 0x0370, "\u{0371}"; 0x0372, "\u{0373}"; + 0x0376, "\u{0377}"; 0x037F, "\u{03F3}"; 0x0386, "\u{03AC}"; + 0x0388, "\u{03AD}"; 0x0389, "\u{03AE}"; 0x038A, "\u{03AF}"; + 0x038C, "\u{03CC}"; 0x038E, "\u{03CD}"; 0x038F, "\u{03CE}"; + 0x0390, "\u{03B9}\u{0308}\u{0301}"; 0x0391, "\u{03B1}"; + 0x0392, "\u{03B2}"; 0x0393, "\u{03B3}"; 0x0394, "\u{03B4}"; + 0x0395, "\u{03B5}"; 0x0396, "\u{03B6}"; 0x0397, "\u{03B7}"; + 0x0398, "\u{03B8}"; 0x0399, "\u{03B9}"; 0x039A, "\u{03BA}"; + 0x039B, "\u{03BB}"; 0x039C, "\u{03BC}"; 0x039D, "\u{03BD}"; + 0x039E, "\u{03BE}"; 0x039F, "\u{03BF}"; 0x03A0, "\u{03C0}"; + 0x03A1, "\u{03C1}"; 0x03A3, "\u{03C3}"; 0x03A4, "\u{03C4}"; + 0x03A5, "\u{03C5}"; 0x03A6, "\u{03C6}"; 0x03A7, "\u{03C7}"; + 0x03A8, "\u{03C8}"; 0x03A9, "\u{03C9}"; 0x03AA, "\u{03CA}"; + 0x03AB, "\u{03CB}"; 0x03B0, "\u{03C5}\u{0308}\u{0301}"; + 0x03C2, "\u{03C3}"; 0x03CF, "\u{03D7}"; 0x03D0, "\u{03B2}"; + 0x03D1, "\u{03B8}"; 0x03D5, "\u{03C6}"; 0x03D6, "\u{03C0}"; + 0x03D8, "\u{03D9}"; 0x03DA, "\u{03DB}"; 0x03DC, "\u{03DD}"; + 0x03DE, "\u{03DF}"; 0x03E0, "\u{03E1}"; 0x03E2, "\u{03E3}"; + 0x03E4, "\u{03E5}"; 0x03E6, "\u{03E7}"; 0x03E8, "\u{03E9}"; + 0x03EA, "\u{03EB}"; 0x03EC, "\u{03ED}"; 0x03EE, "\u{03EF}"; + 0x03F0, "\u{03BA}"; 0x03F1, "\u{03C1}"; 0x03F4, "\u{03B8}"; + 0x03F5, "\u{03B5}"; 0x03F7, "\u{03F8}"; 0x03F9, "\u{03F2}"; + 0x03FA, "\u{03FB}"; 0x03FD, "\u{037B}"; 0x03FE, "\u{037C}"; + 0x03FF, "\u{037D}"; 0x0400, "\u{0450}"; 0x0401, "\u{0451}"; + 0x0402, "\u{0452}"; 0x0403, "\u{0453}"; 0x0404, "\u{0454}"; + 0x0405, "\u{0455}"; 0x0406, "\u{0456}"; 0x0407, "\u{0457}"; + 0x0408, "\u{0458}"; 0x0409, "\u{0459}"; 0x040A, "\u{045A}"; + 0x040B, "\u{045B}"; 0x040C, "\u{045C}"; 0x040D, "\u{045D}"; + 0x040E, "\u{045E}"; 0x040F, "\u{045F}"; 0x0410, "\u{0430}"; + 0x0411, "\u{0431}"; 0x0412, "\u{0432}"; 0x0413, "\u{0433}"; + 0x0414, "\u{0434}"; 0x0415, "\u{0435}"; 0x0416, "\u{0436}"; + 0x0417, "\u{0437}"; 0x0418, "\u{0438}"; 0x0419, "\u{0439}"; + 0x041A, "\u{043A}"; 0x041B, "\u{043B}"; 0x041C, "\u{043C}"; + 0x041D, "\u{043D}"; 0x041E, "\u{043E}"; 0x041F, "\u{043F}"; + 0x0420, "\u{0440}"; 0x0421, "\u{0441}"; 0x0422, "\u{0442}"; + 0x0423, "\u{0443}"; 0x0424, "\u{0444}"; 0x0425, "\u{0445}"; + 0x0426, "\u{0446}"; 0x0427, "\u{0447}"; 0x0428, "\u{0448}"; + 0x0429, "\u{0449}"; 0x042A, "\u{044A}"; 0x042B, "\u{044B}"; + 0x042C, "\u{044C}"; 0x042D, "\u{044D}"; 0x042E, "\u{044E}"; + 0x042F, "\u{044F}"; 0x0460, "\u{0461}"; 0x0462, "\u{0463}"; + 0x0464, "\u{0465}"; 0x0466, "\u{0467}"; 0x0468, "\u{0469}"; + 0x046A, "\u{046B}"; 0x046C, "\u{046D}"; 0x046E, "\u{046F}"; + 0x0470, "\u{0471}"; 0x0472, "\u{0473}"; 0x0474, "\u{0475}"; + 0x0476, "\u{0477}"; 0x0478, "\u{0479}"; 0x047A, "\u{047B}"; + 0x047C, "\u{047D}"; 0x047E, "\u{047F}"; 0x0480, "\u{0481}"; + 0x048A, "\u{048B}"; 0x048C, "\u{048D}"; 0x048E, "\u{048F}"; + 0x0490, "\u{0491}"; 0x0492, "\u{0493}"; 0x0494, "\u{0495}"; + 0x0496, "\u{0497}"; 0x0498, "\u{0499}"; 0x049A, "\u{049B}"; + 0x049C, "\u{049D}"; 0x049E, "\u{049F}"; 0x04A0, "\u{04A1}"; + 0x04A2, "\u{04A3}"; 0x04A4, "\u{04A5}"; 0x04A6, "\u{04A7}"; + 0x04A8, "\u{04A9}"; 0x04AA, "\u{04AB}"; 0x04AC, "\u{04AD}"; + 0x04AE, "\u{04AF}"; 0x04B0, "\u{04B1}"; 0x04B2, "\u{04B3}"; + 0x04B4, "\u{04B5}"; 0x04B6, "\u{04B7}"; 0x04B8, "\u{04B9}"; + 0x04BA, "\u{04BB}"; 0x04BC, "\u{04BD}"; 0x04BE, "\u{04BF}"; + 0x04C0, "\u{04CF}"; 0x04C1, "\u{04C2}"; 0x04C3, "\u{04C4}"; + 0x04C5, "\u{04C6}"; 0x04C7, "\u{04C8}"; 0x04C9, "\u{04CA}"; + 0x04CB, "\u{04CC}"; 0x04CD, "\u{04CE}"; 0x04D0, "\u{04D1}"; + 0x04D2, "\u{04D3}"; 0x04D4, "\u{04D5}"; 0x04D6, "\u{04D7}"; + 0x04D8, "\u{04D9}"; 0x04DA, "\u{04DB}"; 0x04DC, "\u{04DD}"; + 0x04DE, "\u{04DF}"; 0x04E0, "\u{04E1}"; 0x04E2, "\u{04E3}"; + 0x04E4, "\u{04E5}"; 0x04E6, "\u{04E7}"; 0x04E8, "\u{04E9}"; + 0x04EA, "\u{04EB}"; 0x04EC, "\u{04ED}"; 0x04EE, "\u{04EF}"; + 0x04F0, "\u{04F1}"; 0x04F2, "\u{04F3}"; 0x04F4, "\u{04F5}"; + 0x04F6, "\u{04F7}"; 0x04F8, "\u{04F9}"; 0x04FA, "\u{04FB}"; + 0x04FC, "\u{04FD}"; 0x04FE, "\u{04FF}"; 0x0500, "\u{0501}"; + 0x0502, "\u{0503}"; 0x0504, "\u{0505}"; 0x0506, "\u{0507}"; + 0x0508, "\u{0509}"; 0x050A, "\u{050B}"; 0x050C, "\u{050D}"; + 0x050E, "\u{050F}"; 0x0510, "\u{0511}"; 0x0512, "\u{0513}"; + 0x0514, "\u{0515}"; 0x0516, "\u{0517}"; 0x0518, "\u{0519}"; + 0x051A, "\u{051B}"; 0x051C, "\u{051D}"; 0x051E, "\u{051F}"; + 0x0520, "\u{0521}"; 0x0522, "\u{0523}"; 0x0524, "\u{0525}"; + 0x0526, "\u{0527}"; 0x0528, "\u{0529}"; 0x052A, "\u{052B}"; + 0x052C, "\u{052D}"; 0x052E, "\u{052F}"; 0x0531, "\u{0561}"; + 0x0532, "\u{0562}"; 0x0533, "\u{0563}"; 0x0534, "\u{0564}"; + 0x0535, "\u{0565}"; 0x0536, "\u{0566}"; 0x0537, "\u{0567}"; + 0x0538, "\u{0568}"; 0x0539, "\u{0569}"; 0x053A, "\u{056A}"; + 0x053B, "\u{056B}"; 0x053C, "\u{056C}"; 0x053D, "\u{056D}"; + 0x053E, "\u{056E}"; 0x053F, "\u{056F}"; 0x0540, "\u{0570}"; + 0x0541, "\u{0571}"; 0x0542, "\u{0572}"; 0x0543, "\u{0573}"; + 0x0544, "\u{0574}"; 0x0545, "\u{0575}"; 0x0546, "\u{0576}"; + 0x0547, "\u{0577}"; 0x0548, "\u{0578}"; 0x0549, "\u{0579}"; + 0x054A, "\u{057A}"; 0x054B, "\u{057B}"; 0x054C, "\u{057C}"; + 0x054D, "\u{057D}"; 0x054E, "\u{057E}"; 0x054F, "\u{057F}"; + 0x0550, "\u{0580}"; 0x0551, "\u{0581}"; 0x0552, "\u{0582}"; + 0x0553, "\u{0583}"; 0x0554, "\u{0584}"; 0x0555, "\u{0585}"; + 0x0556, "\u{0586}"; 0x0587, "\u{0565}\u{0582}"; 0x10A0, "\u{2D00}"; + 0x10A1, "\u{2D01}"; 0x10A2, "\u{2D02}"; 0x10A3, "\u{2D03}"; + 0x10A4, "\u{2D04}"; 0x10A5, "\u{2D05}"; 0x10A6, "\u{2D06}"; + 0x10A7, "\u{2D07}"; 0x10A8, "\u{2D08}"; 0x10A9, "\u{2D09}"; + 0x10AA, "\u{2D0A}"; 0x10AB, "\u{2D0B}"; 0x10AC, "\u{2D0C}"; + 0x10AD, "\u{2D0D}"; 0x10AE, "\u{2D0E}"; 0x10AF, "\u{2D0F}"; + 0x10B0, "\u{2D10}"; 0x10B1, "\u{2D11}"; 0x10B2, "\u{2D12}"; + 0x10B3, "\u{2D13}"; 0x10B4, "\u{2D14}"; 0x10B5, "\u{2D15}"; + 0x10B6, "\u{2D16}"; 0x10B7, "\u{2D17}"; 0x10B8, "\u{2D18}"; + 0x10B9, "\u{2D19}"; 0x10BA, "\u{2D1A}"; 0x10BB, "\u{2D1B}"; + 0x10BC, "\u{2D1C}"; 0x10BD, "\u{2D1D}"; 0x10BE, "\u{2D1E}"; + 0x10BF, "\u{2D1F}"; 0x10C0, "\u{2D20}"; 0x10C1, "\u{2D21}"; + 0x10C2, "\u{2D22}"; 0x10C3, "\u{2D23}"; 0x10C4, "\u{2D24}"; + 0x10C5, "\u{2D25}"; 0x10C7, "\u{2D27}"; 0x10CD, "\u{2D2D}"; + 0x13F8, "\u{13F0}"; 0x13F9, "\u{13F1}"; 0x13FA, "\u{13F2}"; + 0x13FB, "\u{13F3}"; 0x13FC, "\u{13F4}"; 0x13FD, "\u{13F5}"; + 0x1C80, "\u{0432}"; 0x1C81, "\u{0434}"; 0x1C82, "\u{043E}"; + 0x1C83, "\u{0441}"; 0x1C84, "\u{0442}"; 0x1C85, "\u{0442}"; + 0x1C86, "\u{044A}"; 0x1C87, "\u{0463}"; 0x1C88, "\u{A64B}"; + 0x1C90, "\u{10D0}"; 0x1C91, "\u{10D1}"; 0x1C92, "\u{10D2}"; + 0x1C93, "\u{10D3}"; 0x1C94, "\u{10D4}"; 0x1C95, "\u{10D5}"; + 0x1C96, "\u{10D6}"; 0x1C97, "\u{10D7}"; 0x1C98, "\u{10D8}"; + 0x1C99, "\u{10D9}"; 0x1C9A, "\u{10DA}"; 0x1C9B, "\u{10DB}"; + 0x1C9C, "\u{10DC}"; 0x1C9D, "\u{10DD}"; 0x1C9E, "\u{10DE}"; + 0x1C9F, "\u{10DF}"; 0x1CA0, "\u{10E0}"; 0x1CA1, "\u{10E1}"; + 0x1CA2, "\u{10E2}"; 0x1CA3, "\u{10E3}"; 0x1CA4, "\u{10E4}"; + 0x1CA5, "\u{10E5}"; 0x1CA6, "\u{10E6}"; 0x1CA7, "\u{10E7}"; + 0x1CA8, "\u{10E8}"; 0x1CA9, "\u{10E9}"; 0x1CAA, "\u{10EA}"; + 0x1CAB, "\u{10EB}"; 0x1CAC, "\u{10EC}"; 0x1CAD, "\u{10ED}"; + 0x1CAE, "\u{10EE}"; 0x1CAF, "\u{10EF}"; 0x1CB0, "\u{10F0}"; + 0x1CB1, "\u{10F1}"; 0x1CB2, "\u{10F2}"; 0x1CB3, "\u{10F3}"; + 0x1CB4, "\u{10F4}"; 0x1CB5, "\u{10F5}"; 0x1CB6, "\u{10F6}"; + 0x1CB7, "\u{10F7}"; 0x1CB8, "\u{10F8}"; 0x1CB9, "\u{10F9}"; + 0x1CBA, "\u{10FA}"; 0x1CBD, "\u{10FD}"; 0x1CBE, "\u{10FE}"; + 0x1CBF, "\u{10FF}"; 0x1E00, "\u{1E01}"; 0x1E02, "\u{1E03}"; + 0x1E04, "\u{1E05}"; 0x1E06, "\u{1E07}"; 0x1E08, "\u{1E09}"; + 0x1E0A, "\u{1E0B}"; 0x1E0C, "\u{1E0D}"; 0x1E0E, "\u{1E0F}"; + 0x1E10, "\u{1E11}"; 0x1E12, "\u{1E13}"; 0x1E14, "\u{1E15}"; + 0x1E16, "\u{1E17}"; 0x1E18, "\u{1E19}"; 0x1E1A, "\u{1E1B}"; + 0x1E1C, "\u{1E1D}"; 0x1E1E, "\u{1E1F}"; 0x1E20, "\u{1E21}"; + 0x1E22, "\u{1E23}"; 0x1E24, "\u{1E25}"; 0x1E26, "\u{1E27}"; + 0x1E28, "\u{1E29}"; 0x1E2A, "\u{1E2B}"; 0x1E2C, "\u{1E2D}"; + 0x1E2E, "\u{1E2F}"; 0x1E30, "\u{1E31}"; 0x1E32, "\u{1E33}"; + 0x1E34, "\u{1E35}"; 0x1E36, "\u{1E37}"; 0x1E38, "\u{1E39}"; + 0x1E3A, "\u{1E3B}"; 0x1E3C, "\u{1E3D}"; 0x1E3E, "\u{1E3F}"; + 0x1E40, "\u{1E41}"; 0x1E42, "\u{1E43}"; 0x1E44, "\u{1E45}"; + 0x1E46, "\u{1E47}"; 0x1E48, "\u{1E49}"; 0x1E4A, "\u{1E4B}"; + 0x1E4C, "\u{1E4D}"; 0x1E4E, "\u{1E4F}"; 0x1E50, "\u{1E51}"; + 0x1E52, "\u{1E53}"; 0x1E54, "\u{1E55}"; 0x1E56, "\u{1E57}"; + 0x1E58, "\u{1E59}"; 0x1E5A, "\u{1E5B}"; 0x1E5C, "\u{1E5D}"; + 0x1E5E, "\u{1E5F}"; 0x1E60, "\u{1E61}"; 0x1E62, "\u{1E63}"; + 0x1E64, "\u{1E65}"; 0x1E66, "\u{1E67}"; 0x1E68, "\u{1E69}"; + 0x1E6A, "\u{1E6B}"; 0x1E6C, "\u{1E6D}"; 0x1E6E, "\u{1E6F}"; + 0x1E70, "\u{1E71}"; 0x1E72, "\u{1E73}"; 0x1E74, "\u{1E75}"; + 0x1E76, "\u{1E77}"; 0x1E78, "\u{1E79}"; 0x1E7A, "\u{1E7B}"; + 0x1E7C, "\u{1E7D}"; 0x1E7E, "\u{1E7F}"; 0x1E80, "\u{1E81}"; + 0x1E82, "\u{1E83}"; 0x1E84, "\u{1E85}"; 0x1E86, "\u{1E87}"; + 0x1E88, "\u{1E89}"; 0x1E8A, "\u{1E8B}"; 0x1E8C, "\u{1E8D}"; + 0x1E8E, "\u{1E8F}"; 0x1E90, "\u{1E91}"; 0x1E92, "\u{1E93}"; + 0x1E94, "\u{1E95}"; 0x1E96, "\u{0068}\u{0331}"; + 0x1E97, "\u{0074}\u{0308}"; 0x1E98, "\u{0077}\u{030A}"; + 0x1E99, "\u{0079}\u{030A}"; 0x1E9A, "\u{0061}\u{02BE}"; + 0x1E9B, "\u{1E61}"; 0x1E9E, "\u{0073}\u{0073}"; 0x1EA0, "\u{1EA1}"; + 0x1EA2, "\u{1EA3}"; 0x1EA4, "\u{1EA5}"; 0x1EA6, "\u{1EA7}"; + 0x1EA8, "\u{1EA9}"; 0x1EAA, "\u{1EAB}"; 0x1EAC, "\u{1EAD}"; + 0x1EAE, "\u{1EAF}"; 0x1EB0, "\u{1EB1}"; 0x1EB2, "\u{1EB3}"; + 0x1EB4, "\u{1EB5}"; 0x1EB6, "\u{1EB7}"; 0x1EB8, "\u{1EB9}"; + 0x1EBA, "\u{1EBB}"; 0x1EBC, "\u{1EBD}"; 0x1EBE, "\u{1EBF}"; + 0x1EC0, "\u{1EC1}"; 0x1EC2, "\u{1EC3}"; 0x1EC4, "\u{1EC5}"; + 0x1EC6, "\u{1EC7}"; 0x1EC8, "\u{1EC9}"; 0x1ECA, "\u{1ECB}"; + 0x1ECC, "\u{1ECD}"; 0x1ECE, "\u{1ECF}"; 0x1ED0, "\u{1ED1}"; + 0x1ED2, "\u{1ED3}"; 0x1ED4, "\u{1ED5}"; 0x1ED6, "\u{1ED7}"; + 0x1ED8, "\u{1ED9}"; 0x1EDA, "\u{1EDB}"; 0x1EDC, "\u{1EDD}"; + 0x1EDE, "\u{1EDF}"; 0x1EE0, "\u{1EE1}"; 0x1EE2, "\u{1EE3}"; + 0x1EE4, "\u{1EE5}"; 0x1EE6, "\u{1EE7}"; 0x1EE8, "\u{1EE9}"; + 0x1EEA, "\u{1EEB}"; 0x1EEC, "\u{1EED}"; 0x1EEE, "\u{1EEF}"; + 0x1EF0, "\u{1EF1}"; 0x1EF2, "\u{1EF3}"; 0x1EF4, "\u{1EF5}"; + 0x1EF6, "\u{1EF7}"; 0x1EF8, "\u{1EF9}"; 0x1EFA, "\u{1EFB}"; + 0x1EFC, "\u{1EFD}"; 0x1EFE, "\u{1EFF}"; 0x1F08, "\u{1F00}"; + 0x1F09, "\u{1F01}"; 0x1F0A, "\u{1F02}"; 0x1F0B, "\u{1F03}"; + 0x1F0C, "\u{1F04}"; 0x1F0D, "\u{1F05}"; 0x1F0E, "\u{1F06}"; + 0x1F0F, "\u{1F07}"; 0x1F18, "\u{1F10}"; 0x1F19, "\u{1F11}"; + 0x1F1A, "\u{1F12}"; 0x1F1B, "\u{1F13}"; 0x1F1C, "\u{1F14}"; + 0x1F1D, "\u{1F15}"; 0x1F28, "\u{1F20}"; 0x1F29, "\u{1F21}"; + 0x1F2A, "\u{1F22}"; 0x1F2B, "\u{1F23}"; 0x1F2C, "\u{1F24}"; + 0x1F2D, "\u{1F25}"; 0x1F2E, "\u{1F26}"; 0x1F2F, "\u{1F27}"; + 0x1F38, "\u{1F30}"; 0x1F39, "\u{1F31}"; 0x1F3A, "\u{1F32}"; + 0x1F3B, "\u{1F33}"; 0x1F3C, "\u{1F34}"; 0x1F3D, "\u{1F35}"; + 0x1F3E, "\u{1F36}"; 0x1F3F, "\u{1F37}"; 0x1F48, "\u{1F40}"; + 0x1F49, "\u{1F41}"; 0x1F4A, "\u{1F42}"; 0x1F4B, "\u{1F43}"; + 0x1F4C, "\u{1F44}"; 0x1F4D, "\u{1F45}"; 0x1F50, "\u{03C5}\u{0313}"; + 0x1F52, "\u{03C5}\u{0313}\u{0300}"; 0x1F54, "\u{03C5}\u{0313}\u{0301}"; + 0x1F56, "\u{03C5}\u{0313}\u{0342}"; 0x1F59, "\u{1F51}"; + 0x1F5B, "\u{1F53}"; 0x1F5D, "\u{1F55}"; 0x1F5F, "\u{1F57}"; + 0x1F68, "\u{1F60}"; 0x1F69, "\u{1F61}"; 0x1F6A, "\u{1F62}"; + 0x1F6B, "\u{1F63}"; 0x1F6C, "\u{1F64}"; 0x1F6D, "\u{1F65}"; + 0x1F6E, "\u{1F66}"; 0x1F6F, "\u{1F67}"; 0x1F80, "\u{1F00}\u{03B9}"; + 0x1F81, "\u{1F01}\u{03B9}"; 0x1F82, "\u{1F02}\u{03B9}"; + 0x1F83, "\u{1F03}\u{03B9}"; 0x1F84, "\u{1F04}\u{03B9}"; + 0x1F85, "\u{1F05}\u{03B9}"; 0x1F86, "\u{1F06}\u{03B9}"; + 0x1F87, "\u{1F07}\u{03B9}"; 0x1F88, "\u{1F00}\u{03B9}"; + 0x1F89, "\u{1F01}\u{03B9}"; 0x1F8A, "\u{1F02}\u{03B9}"; + 0x1F8B, "\u{1F03}\u{03B9}"; 0x1F8C, "\u{1F04}\u{03B9}"; + 0x1F8D, "\u{1F05}\u{03B9}"; 0x1F8E, "\u{1F06}\u{03B9}"; + 0x1F8F, "\u{1F07}\u{03B9}"; 0x1F90, "\u{1F20}\u{03B9}"; + 0x1F91, "\u{1F21}\u{03B9}"; 0x1F92, "\u{1F22}\u{03B9}"; + 0x1F93, "\u{1F23}\u{03B9}"; 0x1F94, "\u{1F24}\u{03B9}"; + 0x1F95, "\u{1F25}\u{03B9}"; 0x1F96, "\u{1F26}\u{03B9}"; + 0x1F97, "\u{1F27}\u{03B9}"; 0x1F98, "\u{1F20}\u{03B9}"; + 0x1F99, "\u{1F21}\u{03B9}"; 0x1F9A, "\u{1F22}\u{03B9}"; + 0x1F9B, "\u{1F23}\u{03B9}"; 0x1F9C, "\u{1F24}\u{03B9}"; + 0x1F9D, "\u{1F25}\u{03B9}"; 0x1F9E, "\u{1F26}\u{03B9}"; + 0x1F9F, "\u{1F27}\u{03B9}"; 0x1FA0, "\u{1F60}\u{03B9}"; + 0x1FA1, "\u{1F61}\u{03B9}"; 0x1FA2, "\u{1F62}\u{03B9}"; + 0x1FA3, "\u{1F63}\u{03B9}"; 0x1FA4, "\u{1F64}\u{03B9}"; + 0x1FA5, "\u{1F65}\u{03B9}"; 0x1FA6, "\u{1F66}\u{03B9}"; + 0x1FA7, "\u{1F67}\u{03B9}"; 0x1FA8, "\u{1F60}\u{03B9}"; + 0x1FA9, "\u{1F61}\u{03B9}"; 0x1FAA, "\u{1F62}\u{03B9}"; + 0x1FAB, "\u{1F63}\u{03B9}"; 0x1FAC, "\u{1F64}\u{03B9}"; + 0x1FAD, "\u{1F65}\u{03B9}"; 0x1FAE, "\u{1F66}\u{03B9}"; + 0x1FAF, "\u{1F67}\u{03B9}"; 0x1FB2, "\u{1F70}\u{03B9}"; + 0x1FB3, "\u{03B1}\u{03B9}"; 0x1FB4, "\u{03AC}\u{03B9}"; + 0x1FB6, "\u{03B1}\u{0342}"; 0x1FB7, "\u{03B1}\u{0342}\u{03B9}"; + 0x1FB8, "\u{1FB0}"; 0x1FB9, "\u{1FB1}"; 0x1FBA, "\u{1F70}"; + 0x1FBB, "\u{1F71}"; 0x1FBC, "\u{03B1}\u{03B9}"; 0x1FBE, "\u{03B9}"; + 0x1FC2, "\u{1F74}\u{03B9}"; 0x1FC3, "\u{03B7}\u{03B9}"; + 0x1FC4, "\u{03AE}\u{03B9}"; 0x1FC6, "\u{03B7}\u{0342}"; + 0x1FC7, "\u{03B7}\u{0342}\u{03B9}"; 0x1FC8, "\u{1F72}"; + 0x1FC9, "\u{1F73}"; 0x1FCA, "\u{1F74}"; 0x1FCB, "\u{1F75}"; + 0x1FCC, "\u{03B7}\u{03B9}"; 0x1FD2, "\u{03B9}\u{0308}\u{0300}"; + 0x1FD3, "\u{03B9}\u{0308}\u{0301}"; 0x1FD6, "\u{03B9}\u{0342}"; + 0x1FD7, "\u{03B9}\u{0308}\u{0342}"; 0x1FD8, "\u{1FD0}"; + 0x1FD9, "\u{1FD1}"; 0x1FDA, "\u{1F76}"; 0x1FDB, "\u{1F77}"; + 0x1FE2, "\u{03C5}\u{0308}\u{0300}"; 0x1FE3, "\u{03C5}\u{0308}\u{0301}"; + 0x1FE4, "\u{03C1}\u{0313}"; 0x1FE6, "\u{03C5}\u{0342}"; + 0x1FE7, "\u{03C5}\u{0308}\u{0342}"; 0x1FE8, "\u{1FE0}"; + 0x1FE9, "\u{1FE1}"; 0x1FEA, "\u{1F7A}"; 0x1FEB, "\u{1F7B}"; + 0x1FEC, "\u{1FE5}"; 0x1FF2, "\u{1F7C}\u{03B9}"; + 0x1FF3, "\u{03C9}\u{03B9}"; 0x1FF4, "\u{03CE}\u{03B9}"; + 0x1FF6, "\u{03C9}\u{0342}"; 0x1FF7, "\u{03C9}\u{0342}\u{03B9}"; + 0x1FF8, "\u{1F78}"; 0x1FF9, "\u{1F79}"; 0x1FFA, "\u{1F7C}"; + 0x1FFB, "\u{1F7D}"; 0x1FFC, "\u{03C9}\u{03B9}"; 0x2126, "\u{03C9}"; + 0x212A, "\u{006B}"; 0x212B, "\u{00E5}"; 0x2132, "\u{214E}"; + 0x2160, "\u{2170}"; 0x2161, "\u{2171}"; 0x2162, "\u{2172}"; + 0x2163, "\u{2173}"; 0x2164, "\u{2174}"; 0x2165, "\u{2175}"; + 0x2166, "\u{2176}"; 0x2167, "\u{2177}"; 0x2168, "\u{2178}"; + 0x2169, "\u{2179}"; 0x216A, "\u{217A}"; 0x216B, "\u{217B}"; + 0x216C, "\u{217C}"; 0x216D, "\u{217D}"; 0x216E, "\u{217E}"; + 0x216F, "\u{217F}"; 0x2183, "\u{2184}"; 0x24B6, "\u{24D0}"; + 0x24B7, "\u{24D1}"; 0x24B8, "\u{24D2}"; 0x24B9, "\u{24D3}"; + 0x24BA, "\u{24D4}"; 0x24BB, "\u{24D5}"; 0x24BC, "\u{24D6}"; + 0x24BD, "\u{24D7}"; 0x24BE, "\u{24D8}"; 0x24BF, "\u{24D9}"; + 0x24C0, "\u{24DA}"; 0x24C1, "\u{24DB}"; 0x24C2, "\u{24DC}"; + 0x24C3, "\u{24DD}"; 0x24C4, "\u{24DE}"; 0x24C5, "\u{24DF}"; + 0x24C6, "\u{24E0}"; 0x24C7, "\u{24E1}"; 0x24C8, "\u{24E2}"; + 0x24C9, "\u{24E3}"; 0x24CA, "\u{24E4}"; 0x24CB, "\u{24E5}"; + 0x24CC, "\u{24E6}"; 0x24CD, "\u{24E7}"; 0x24CE, "\u{24E8}"; + 0x24CF, "\u{24E9}"; 0x2C00, "\u{2C30}"; 0x2C01, "\u{2C31}"; + 0x2C02, "\u{2C32}"; 0x2C03, "\u{2C33}"; 0x2C04, "\u{2C34}"; + 0x2C05, "\u{2C35}"; 0x2C06, "\u{2C36}"; 0x2C07, "\u{2C37}"; + 0x2C08, "\u{2C38}"; 0x2C09, "\u{2C39}"; 0x2C0A, "\u{2C3A}"; + 0x2C0B, "\u{2C3B}"; 0x2C0C, "\u{2C3C}"; 0x2C0D, "\u{2C3D}"; + 0x2C0E, "\u{2C3E}"; 0x2C0F, "\u{2C3F}"; 0x2C10, "\u{2C40}"; + 0x2C11, "\u{2C41}"; 0x2C12, "\u{2C42}"; 0x2C13, "\u{2C43}"; + 0x2C14, "\u{2C44}"; 0x2C15, "\u{2C45}"; 0x2C16, "\u{2C46}"; + 0x2C17, "\u{2C47}"; 0x2C18, "\u{2C48}"; 0x2C19, "\u{2C49}"; + 0x2C1A, "\u{2C4A}"; 0x2C1B, "\u{2C4B}"; 0x2C1C, "\u{2C4C}"; + 0x2C1D, "\u{2C4D}"; 0x2C1E, "\u{2C4E}"; 0x2C1F, "\u{2C4F}"; + 0x2C20, "\u{2C50}"; 0x2C21, "\u{2C51}"; 0x2C22, "\u{2C52}"; + 0x2C23, "\u{2C53}"; 0x2C24, "\u{2C54}"; 0x2C25, "\u{2C55}"; + 0x2C26, "\u{2C56}"; 0x2C27, "\u{2C57}"; 0x2C28, "\u{2C58}"; + 0x2C29, "\u{2C59}"; 0x2C2A, "\u{2C5A}"; 0x2C2B, "\u{2C5B}"; + 0x2C2C, "\u{2C5C}"; 0x2C2D, "\u{2C5D}"; 0x2C2E, "\u{2C5E}"; + 0x2C2F, "\u{2C5F}"; 0x2C60, "\u{2C61}"; 0x2C62, "\u{026B}"; + 0x2C63, "\u{1D7D}"; 0x2C64, "\u{027D}"; 0x2C67, "\u{2C68}"; + 0x2C69, "\u{2C6A}"; 0x2C6B, "\u{2C6C}"; 0x2C6D, "\u{0251}"; + 0x2C6E, "\u{0271}"; 0x2C6F, "\u{0250}"; 0x2C70, "\u{0252}"; + 0x2C72, "\u{2C73}"; 0x2C75, "\u{2C76}"; 0x2C7E, "\u{023F}"; + 0x2C7F, "\u{0240}"; 0x2C80, "\u{2C81}"; 0x2C82, "\u{2C83}"; + 0x2C84, "\u{2C85}"; 0x2C86, "\u{2C87}"; 0x2C88, "\u{2C89}"; + 0x2C8A, "\u{2C8B}"; 0x2C8C, "\u{2C8D}"; 0x2C8E, "\u{2C8F}"; + 0x2C90, "\u{2C91}"; 0x2C92, "\u{2C93}"; 0x2C94, "\u{2C95}"; + 0x2C96, "\u{2C97}"; 0x2C98, "\u{2C99}"; 0x2C9A, "\u{2C9B}"; + 0x2C9C, "\u{2C9D}"; 0x2C9E, "\u{2C9F}"; 0x2CA0, "\u{2CA1}"; + 0x2CA2, "\u{2CA3}"; 0x2CA4, "\u{2CA5}"; 0x2CA6, "\u{2CA7}"; + 0x2CA8, "\u{2CA9}"; 0x2CAA, "\u{2CAB}"; 0x2CAC, "\u{2CAD}"; + 0x2CAE, "\u{2CAF}"; 0x2CB0, "\u{2CB1}"; 0x2CB2, "\u{2CB3}"; + 0x2CB4, "\u{2CB5}"; 0x2CB6, "\u{2CB7}"; 0x2CB8, "\u{2CB9}"; + 0x2CBA, "\u{2CBB}"; 0x2CBC, "\u{2CBD}"; 0x2CBE, "\u{2CBF}"; + 0x2CC0, "\u{2CC1}"; 0x2CC2, "\u{2CC3}"; 0x2CC4, "\u{2CC5}"; + 0x2CC6, "\u{2CC7}"; 0x2CC8, "\u{2CC9}"; 0x2CCA, "\u{2CCB}"; + 0x2CCC, "\u{2CCD}"; 0x2CCE, "\u{2CCF}"; 0x2CD0, "\u{2CD1}"; + 0x2CD2, "\u{2CD3}"; 0x2CD4, "\u{2CD5}"; 0x2CD6, "\u{2CD7}"; + 0x2CD8, "\u{2CD9}"; 0x2CDA, "\u{2CDB}"; 0x2CDC, "\u{2CDD}"; + 0x2CDE, "\u{2CDF}"; 0x2CE0, "\u{2CE1}"; 0x2CE2, "\u{2CE3}"; + 0x2CEB, "\u{2CEC}"; 0x2CED, "\u{2CEE}"; 0x2CF2, "\u{2CF3}"; + 0xA640, "\u{A641}"; 0xA642, "\u{A643}"; 0xA644, "\u{A645}"; + 0xA646, "\u{A647}"; 0xA648, "\u{A649}"; 0xA64A, "\u{A64B}"; + 0xA64C, "\u{A64D}"; 0xA64E, "\u{A64F}"; 0xA650, "\u{A651}"; + 0xA652, "\u{A653}"; 0xA654, "\u{A655}"; 0xA656, "\u{A657}"; + 0xA658, "\u{A659}"; 0xA65A, "\u{A65B}"; 0xA65C, "\u{A65D}"; + 0xA65E, "\u{A65F}"; 0xA660, "\u{A661}"; 0xA662, "\u{A663}"; + 0xA664, "\u{A665}"; 0xA666, "\u{A667}"; 0xA668, "\u{A669}"; + 0xA66A, "\u{A66B}"; 0xA66C, "\u{A66D}"; 0xA680, "\u{A681}"; + 0xA682, "\u{A683}"; 0xA684, "\u{A685}"; 0xA686, "\u{A687}"; + 0xA688, "\u{A689}"; 0xA68A, "\u{A68B}"; 0xA68C, "\u{A68D}"; + 0xA68E, "\u{A68F}"; 0xA690, "\u{A691}"; 0xA692, "\u{A693}"; + 0xA694, "\u{A695}"; 0xA696, "\u{A697}"; 0xA698, "\u{A699}"; + 0xA69A, "\u{A69B}"; 0xA722, "\u{A723}"; 0xA724, "\u{A725}"; + 0xA726, "\u{A727}"; 0xA728, "\u{A729}"; 0xA72A, "\u{A72B}"; + 0xA72C, "\u{A72D}"; 0xA72E, "\u{A72F}"; 0xA732, "\u{A733}"; + 0xA734, "\u{A735}"; 0xA736, "\u{A737}"; 0xA738, "\u{A739}"; + 0xA73A, "\u{A73B}"; 0xA73C, "\u{A73D}"; 0xA73E, "\u{A73F}"; + 0xA740, "\u{A741}"; 0xA742, "\u{A743}"; 0xA744, "\u{A745}"; + 0xA746, "\u{A747}"; 0xA748, "\u{A749}"; 0xA74A, "\u{A74B}"; + 0xA74C, "\u{A74D}"; 0xA74E, "\u{A74F}"; 0xA750, "\u{A751}"; + 0xA752, "\u{A753}"; 0xA754, "\u{A755}"; 0xA756, "\u{A757}"; + 0xA758, "\u{A759}"; 0xA75A, "\u{A75B}"; 0xA75C, "\u{A75D}"; + 0xA75E, "\u{A75F}"; 0xA760, "\u{A761}"; 0xA762, "\u{A763}"; + 0xA764, "\u{A765}"; 0xA766, "\u{A767}"; 0xA768, "\u{A769}"; + 0xA76A, "\u{A76B}"; 0xA76C, "\u{A76D}"; 0xA76E, "\u{A76F}"; + 0xA779, "\u{A77A}"; 0xA77B, "\u{A77C}"; 0xA77D, "\u{1D79}"; + 0xA77E, "\u{A77F}"; 0xA780, "\u{A781}"; 0xA782, "\u{A783}"; + 0xA784, "\u{A785}"; 0xA786, "\u{A787}"; 0xA78B, "\u{A78C}"; + 0xA78D, "\u{0265}"; 0xA790, "\u{A791}"; 0xA792, "\u{A793}"; + 0xA796, "\u{A797}"; 0xA798, "\u{A799}"; 0xA79A, "\u{A79B}"; + 0xA79C, "\u{A79D}"; 0xA79E, "\u{A79F}"; 0xA7A0, "\u{A7A1}"; + 0xA7A2, "\u{A7A3}"; 0xA7A4, "\u{A7A5}"; 0xA7A6, "\u{A7A7}"; + 0xA7A8, "\u{A7A9}"; 0xA7AA, "\u{0266}"; 0xA7AB, "\u{025C}"; + 0xA7AC, "\u{0261}"; 0xA7AD, "\u{026C}"; 0xA7AE, "\u{026A}"; + 0xA7B0, "\u{029E}"; 0xA7B1, "\u{0287}"; 0xA7B2, "\u{029D}"; + 0xA7B3, "\u{AB53}"; 0xA7B4, "\u{A7B5}"; 0xA7B6, "\u{A7B7}"; + 0xA7B8, "\u{A7B9}"; 0xA7BA, "\u{A7BB}"; 0xA7BC, "\u{A7BD}"; + 0xA7BE, "\u{A7BF}"; 0xA7C0, "\u{A7C1}"; 0xA7C2, "\u{A7C3}"; + 0xA7C4, "\u{A794}"; 0xA7C5, "\u{0282}"; 0xA7C6, "\u{1D8E}"; + 0xA7C7, "\u{A7C8}"; 0xA7C9, "\u{A7CA}"; 0xA7D0, "\u{A7D1}"; + 0xA7D6, "\u{A7D7}"; 0xA7D8, "\u{A7D9}"; 0xA7F5, "\u{A7F6}"; + 0xAB70, "\u{13A0}"; 0xAB71, "\u{13A1}"; 0xAB72, "\u{13A2}"; + 0xAB73, "\u{13A3}"; 0xAB74, "\u{13A4}"; 0xAB75, "\u{13A5}"; + 0xAB76, "\u{13A6}"; 0xAB77, "\u{13A7}"; 0xAB78, "\u{13A8}"; + 0xAB79, "\u{13A9}"; 0xAB7A, "\u{13AA}"; 0xAB7B, "\u{13AB}"; + 0xAB7C, "\u{13AC}"; 0xAB7D, "\u{13AD}"; 0xAB7E, "\u{13AE}"; + 0xAB7F, "\u{13AF}"; 0xAB80, "\u{13B0}"; 0xAB81, "\u{13B1}"; + 0xAB82, "\u{13B2}"; 0xAB83, "\u{13B3}"; 0xAB84, "\u{13B4}"; + 0xAB85, "\u{13B5}"; 0xAB86, "\u{13B6}"; 0xAB87, "\u{13B7}"; + 0xAB88, "\u{13B8}"; 0xAB89, "\u{13B9}"; 0xAB8A, "\u{13BA}"; + 0xAB8B, "\u{13BB}"; 0xAB8C, "\u{13BC}"; 0xAB8D, "\u{13BD}"; + 0xAB8E, "\u{13BE}"; 0xAB8F, "\u{13BF}"; 0xAB90, "\u{13C0}"; + 0xAB91, "\u{13C1}"; 0xAB92, "\u{13C2}"; 0xAB93, "\u{13C3}"; + 0xAB94, "\u{13C4}"; 0xAB95, "\u{13C5}"; 0xAB96, "\u{13C6}"; + 0xAB97, "\u{13C7}"; 0xAB98, "\u{13C8}"; 0xAB99, "\u{13C9}"; + 0xAB9A, "\u{13CA}"; 0xAB9B, "\u{13CB}"; 0xAB9C, "\u{13CC}"; + 0xAB9D, "\u{13CD}"; 0xAB9E, "\u{13CE}"; 0xAB9F, "\u{13CF}"; + 0xABA0, "\u{13D0}"; 0xABA1, "\u{13D1}"; 0xABA2, "\u{13D2}"; + 0xABA3, "\u{13D3}"; 0xABA4, "\u{13D4}"; 0xABA5, "\u{13D5}"; + 0xABA6, "\u{13D6}"; 0xABA7, "\u{13D7}"; 0xABA8, "\u{13D8}"; + 0xABA9, "\u{13D9}"; 0xABAA, "\u{13DA}"; 0xABAB, "\u{13DB}"; + 0xABAC, "\u{13DC}"; 0xABAD, "\u{13DD}"; 0xABAE, "\u{13DE}"; + 0xABAF, "\u{13DF}"; 0xABB0, "\u{13E0}"; 0xABB1, "\u{13E1}"; + 0xABB2, "\u{13E2}"; 0xABB3, "\u{13E3}"; 0xABB4, "\u{13E4}"; + 0xABB5, "\u{13E5}"; 0xABB6, "\u{13E6}"; 0xABB7, "\u{13E7}"; + 0xABB8, "\u{13E8}"; 0xABB9, "\u{13E9}"; 0xABBA, "\u{13EA}"; + 0xABBB, "\u{13EB}"; 0xABBC, "\u{13EC}"; 0xABBD, "\u{13ED}"; + 0xABBE, "\u{13EE}"; 0xABBF, "\u{13EF}"; 0xFB00, "\u{0066}\u{0066}"; + 0xFB01, "\u{0066}\u{0069}"; 0xFB02, "\u{0066}\u{006C}"; + 0xFB03, "\u{0066}\u{0066}\u{0069}"; 0xFB04, "\u{0066}\u{0066}\u{006C}"; + 0xFB05, "\u{0073}\u{0074}"; 0xFB06, "\u{0073}\u{0074}"; + 0xFB13, "\u{0574}\u{0576}"; 0xFB14, "\u{0574}\u{0565}"; + 0xFB15, "\u{0574}\u{056B}"; 0xFB16, "\u{057E}\u{0576}"; + 0xFB17, "\u{0574}\u{056D}"; 0xFF21, "\u{FF41}"; 0xFF22, "\u{FF42}"; + 0xFF23, "\u{FF43}"; 0xFF24, "\u{FF44}"; 0xFF25, "\u{FF45}"; + 0xFF26, "\u{FF46}"; 0xFF27, "\u{FF47}"; 0xFF28, "\u{FF48}"; + 0xFF29, "\u{FF49}"; 0xFF2A, "\u{FF4A}"; 0xFF2B, "\u{FF4B}"; + 0xFF2C, "\u{FF4C}"; 0xFF2D, "\u{FF4D}"; 0xFF2E, "\u{FF4E}"; + 0xFF2F, "\u{FF4F}"; 0xFF30, "\u{FF50}"; 0xFF31, "\u{FF51}"; + 0xFF32, "\u{FF52}"; 0xFF33, "\u{FF53}"; 0xFF34, "\u{FF54}"; + 0xFF35, "\u{FF55}"; 0xFF36, "\u{FF56}"; 0xFF37, "\u{FF57}"; + 0xFF38, "\u{FF58}"; 0xFF39, "\u{FF59}"; 0xFF3A, "\u{FF5A}"; + 0x10400, "\u{10428}"; 0x10401, "\u{10429}"; 0x10402, "\u{1042A}"; + 0x10403, "\u{1042B}"; 0x10404, "\u{1042C}"; 0x10405, "\u{1042D}"; + 0x10406, "\u{1042E}"; 0x10407, "\u{1042F}"; 0x10408, "\u{10430}"; + 0x10409, "\u{10431}"; 0x1040A, "\u{10432}"; 0x1040B, "\u{10433}"; + 0x1040C, "\u{10434}"; 0x1040D, "\u{10435}"; 0x1040E, "\u{10436}"; + 0x1040F, "\u{10437}"; 0x10410, "\u{10438}"; 0x10411, "\u{10439}"; + 0x10412, "\u{1043A}"; 0x10413, "\u{1043B}"; 0x10414, "\u{1043C}"; + 0x10415, "\u{1043D}"; 0x10416, "\u{1043E}"; 0x10417, "\u{1043F}"; + 0x10418, "\u{10440}"; 0x10419, "\u{10441}"; 0x1041A, "\u{10442}"; + 0x1041B, "\u{10443}"; 0x1041C, "\u{10444}"; 0x1041D, "\u{10445}"; + 0x1041E, "\u{10446}"; 0x1041F, "\u{10447}"; 0x10420, "\u{10448}"; + 0x10421, "\u{10449}"; 0x10422, "\u{1044A}"; 0x10423, "\u{1044B}"; + 0x10424, "\u{1044C}"; 0x10425, "\u{1044D}"; 0x10426, "\u{1044E}"; + 0x10427, "\u{1044F}"; 0x104B0, "\u{104D8}"; 0x104B1, "\u{104D9}"; + 0x104B2, "\u{104DA}"; 0x104B3, "\u{104DB}"; 0x104B4, "\u{104DC}"; + 0x104B5, "\u{104DD}"; 0x104B6, "\u{104DE}"; 0x104B7, "\u{104DF}"; + 0x104B8, "\u{104E0}"; 0x104B9, "\u{104E1}"; 0x104BA, "\u{104E2}"; + 0x104BB, "\u{104E3}"; 0x104BC, "\u{104E4}"; 0x104BD, "\u{104E5}"; + 0x104BE, "\u{104E6}"; 0x104BF, "\u{104E7}"; 0x104C0, "\u{104E8}"; + 0x104C1, "\u{104E9}"; 0x104C2, "\u{104EA}"; 0x104C3, "\u{104EB}"; + 0x104C4, "\u{104EC}"; 0x104C5, "\u{104ED}"; 0x104C6, "\u{104EE}"; + 0x104C7, "\u{104EF}"; 0x104C8, "\u{104F0}"; 0x104C9, "\u{104F1}"; + 0x104CA, "\u{104F2}"; 0x104CB, "\u{104F3}"; 0x104CC, "\u{104F4}"; + 0x104CD, "\u{104F5}"; 0x104CE, "\u{104F6}"; 0x104CF, "\u{104F7}"; + 0x104D0, "\u{104F8}"; 0x104D1, "\u{104F9}"; 0x104D2, "\u{104FA}"; + 0x104D3, "\u{104FB}"; 0x10570, "\u{10597}"; 0x10571, "\u{10598}"; + 0x10572, "\u{10599}"; 0x10573, "\u{1059A}"; 0x10574, "\u{1059B}"; + 0x10575, "\u{1059C}"; 0x10576, "\u{1059D}"; 0x10577, "\u{1059E}"; + 0x10578, "\u{1059F}"; 0x10579, "\u{105A0}"; 0x1057A, "\u{105A1}"; + 0x1057C, "\u{105A3}"; 0x1057D, "\u{105A4}"; 0x1057E, "\u{105A5}"; + 0x1057F, "\u{105A6}"; 0x10580, "\u{105A7}"; 0x10581, "\u{105A8}"; + 0x10582, "\u{105A9}"; 0x10583, "\u{105AA}"; 0x10584, "\u{105AB}"; + 0x10585, "\u{105AC}"; 0x10586, "\u{105AD}"; 0x10587, "\u{105AE}"; + 0x10588, "\u{105AF}"; 0x10589, "\u{105B0}"; 0x1058A, "\u{105B1}"; + 0x1058C, "\u{105B3}"; 0x1058D, "\u{105B4}"; 0x1058E, "\u{105B5}"; + 0x1058F, "\u{105B6}"; 0x10590, "\u{105B7}"; 0x10591, "\u{105B8}"; + 0x10592, "\u{105B9}"; 0x10594, "\u{105BB}"; 0x10595, "\u{105BC}"; + 0x10C80, "\u{10CC0}"; 0x10C81, "\u{10CC1}"; 0x10C82, "\u{10CC2}"; + 0x10C83, "\u{10CC3}"; 0x10C84, "\u{10CC4}"; 0x10C85, "\u{10CC5}"; + 0x10C86, "\u{10CC6}"; 0x10C87, "\u{10CC7}"; 0x10C88, "\u{10CC8}"; + 0x10C89, "\u{10CC9}"; 0x10C8A, "\u{10CCA}"; 0x10C8B, "\u{10CCB}"; + 0x10C8C, "\u{10CCC}"; 0x10C8D, "\u{10CCD}"; 0x10C8E, "\u{10CCE}"; + 0x10C8F, "\u{10CCF}"; 0x10C90, "\u{10CD0}"; 0x10C91, "\u{10CD1}"; + 0x10C92, "\u{10CD2}"; 0x10C93, "\u{10CD3}"; 0x10C94, "\u{10CD4}"; + 0x10C95, "\u{10CD5}"; 0x10C96, "\u{10CD6}"; 0x10C97, "\u{10CD7}"; + 0x10C98, "\u{10CD8}"; 0x10C99, "\u{10CD9}"; 0x10C9A, "\u{10CDA}"; + 0x10C9B, "\u{10CDB}"; 0x10C9C, "\u{10CDC}"; 0x10C9D, "\u{10CDD}"; + 0x10C9E, "\u{10CDE}"; 0x10C9F, "\u{10CDF}"; 0x10CA0, "\u{10CE0}"; + 0x10CA1, "\u{10CE1}"; 0x10CA2, "\u{10CE2}"; 0x10CA3, "\u{10CE3}"; + 0x10CA4, "\u{10CE4}"; 0x10CA5, "\u{10CE5}"; 0x10CA6, "\u{10CE6}"; + 0x10CA7, "\u{10CE7}"; 0x10CA8, "\u{10CE8}"; 0x10CA9, "\u{10CE9}"; + 0x10CAA, "\u{10CEA}"; 0x10CAB, "\u{10CEB}"; 0x10CAC, "\u{10CEC}"; + 0x10CAD, "\u{10CED}"; 0x10CAE, "\u{10CEE}"; 0x10CAF, "\u{10CEF}"; + 0x10CB0, "\u{10CF0}"; 0x10CB1, "\u{10CF1}"; 0x10CB2, "\u{10CF2}"; + 0x118A0, "\u{118C0}"; 0x118A1, "\u{118C1}"; 0x118A2, "\u{118C2}"; + 0x118A3, "\u{118C3}"; 0x118A4, "\u{118C4}"; 0x118A5, "\u{118C5}"; + 0x118A6, "\u{118C6}"; 0x118A7, "\u{118C7}"; 0x118A8, "\u{118C8}"; + 0x118A9, "\u{118C9}"; 0x118AA, "\u{118CA}"; 0x118AB, "\u{118CB}"; + 0x118AC, "\u{118CC}"; 0x118AD, "\u{118CD}"; 0x118AE, "\u{118CE}"; + 0x118AF, "\u{118CF}"; 0x118B0, "\u{118D0}"; 0x118B1, "\u{118D1}"; + 0x118B2, "\u{118D2}"; 0x118B3, "\u{118D3}"; 0x118B4, "\u{118D4}"; + 0x118B5, "\u{118D5}"; 0x118B6, "\u{118D6}"; 0x118B7, "\u{118D7}"; + 0x118B8, "\u{118D8}"; 0x118B9, "\u{118D9}"; 0x118BA, "\u{118DA}"; + 0x118BB, "\u{118DB}"; 0x118BC, "\u{118DC}"; 0x118BD, "\u{118DD}"; + 0x118BE, "\u{118DE}"; 0x118BF, "\u{118DF}"; 0x16E40, "\u{16E60}"; + 0x16E41, "\u{16E61}"; 0x16E42, "\u{16E62}"; 0x16E43, "\u{16E63}"; + 0x16E44, "\u{16E64}"; 0x16E45, "\u{16E65}"; 0x16E46, "\u{16E66}"; + 0x16E47, "\u{16E67}"; 0x16E48, "\u{16E68}"; 0x16E49, "\u{16E69}"; + 0x16E4A, "\u{16E6A}"; 0x16E4B, "\u{16E6B}"; 0x16E4C, "\u{16E6C}"; + 0x16E4D, "\u{16E6D}"; 0x16E4E, "\u{16E6E}"; 0x16E4F, "\u{16E6F}"; + 0x16E50, "\u{16E70}"; 0x16E51, "\u{16E71}"; 0x16E52, "\u{16E72}"; + 0x16E53, "\u{16E73}"; 0x16E54, "\u{16E74}"; 0x16E55, "\u{16E75}"; + 0x16E56, "\u{16E76}"; 0x16E57, "\u{16E77}"; 0x16E58, "\u{16E78}"; + 0x16E59, "\u{16E79}"; 0x16E5A, "\u{16E7A}"; 0x16E5B, "\u{16E7B}"; + 0x16E5C, "\u{16E7C}"; 0x16E5D, "\u{16E7D}"; 0x16E5E, "\u{16E7E}"; + 0x16E5F, "\u{16E7F}"; 0x1E900, "\u{1E922}"; 0x1E901, "\u{1E923}"; + 0x1E902, "\u{1E924}"; 0x1E903, "\u{1E925}"; 0x1E904, "\u{1E926}"; + 0x1E905, "\u{1E927}"; 0x1E906, "\u{1E928}"; 0x1E907, "\u{1E929}"; + 0x1E908, "\u{1E92A}"; 0x1E909, "\u{1E92B}"; 0x1E90A, "\u{1E92C}"; + 0x1E90B, "\u{1E92D}"; 0x1E90C, "\u{1E92E}"; 0x1E90D, "\u{1E92F}"; + 0x1E90E, "\u{1E930}"; 0x1E90F, "\u{1E931}"; 0x1E910, "\u{1E932}"; + 0x1E911, "\u{1E933}"; 0x1E912, "\u{1E934}"; 0x1E913, "\u{1E935}"; + 0x1E914, "\u{1E936}"; 0x1E915, "\u{1E937}"; 0x1E916, "\u{1E938}"; + 0x1E917, "\u{1E939}"; 0x1E918, "\u{1E93A}"; 0x1E919, "\u{1E93B}"; + 0x1E91A, "\u{1E93C}"; 0x1E91B, "\u{1E93D}"; 0x1E91C, "\u{1E93E}"; + 0x1E91D, "\u{1E93F}"; 0x1E91E, "\u{1E940}"; 0x1E91F, "\u{1E941}"; + 0x1E920, "\u{1E942}"; 0x1E921, "\u{1E943}"|] + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_html.ml b/ocaml-lsp-server/vendor/cmarkit/cmarkit_html.ml new file mode 100644 index 000000000..40c8d8227 --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_html.ml @@ -0,0 +1,518 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +open Cmarkit +module C = Cmarkit_renderer.Context +module String_set = Set.Make (String) + +(* Renderer state *) + +type state = + { safe : bool; + backend_blocks : bool; + mutable ids : String_set.t; + mutable footnote_count : int; + mutable footnotes : + (* Text, id, ref count, footnote *) + (string * string * int ref * Block.Footnote.t) Label.Map.t } + +let state : state C.State.t = C.State.make () +let safe c = (C.State.get c state).safe +let backend_blocks c = (C.State.get c state).backend_blocks +let init_context ?(backend_blocks = false) ~safe c _ = + let ids = String_set.empty and footnotes = Label.Map.empty in + let st = { safe; backend_blocks; ids; footnote_count = 0; footnotes } in + C.State.set c state (Some st) + +let unique_id c id = + let st = C.State.get c state in + let rec loop ids id c = + let id' = if c = 0 then id else (String.concat "-" [id; Int.to_string c]) in + match String_set.mem id' ids with + | true -> loop ids id (c + 1) + | false -> st.ids <- String_set.add id' ids; id' + in + loop st.ids id 0 + +let footnote_id label = + let make_label l = String.map (function ' ' | '\t' -> '-' | c -> c) l in + "fn-" ^ (make_label (String.sub label 1 (String.length label - 1))) + +let footnote_ref_id fnid c = String.concat "-" ["ref"; Int.to_string c; fnid] + +let make_footnote_ref_ids c label fn = + let st = C.State.get c state in + match Label.Map.find_opt label st.footnotes with + | Some (text, id, refc, _) -> incr refc; (text, id, footnote_ref_id id !refc) + | None -> + st.footnote_count <- st.footnote_count + 1; + let text = String.concat "" ["["; Int.to_string st.footnote_count;"]"] in + let id = footnote_id label in + st.footnotes <- Label.Map.add label (text, id, ref 1, fn) st.footnotes; + text, id, footnote_ref_id id 1 + +(* Escaping *) + +let buffer_add_html_escaped_uchar b u = match Uchar.to_int u with +| 0x0000 -> Buffer.add_utf_8_uchar b Uchar.rep +| 0x0026 (* & *) -> Buffer.add_string b "&" +| 0x003C (* < *) -> Buffer.add_string b "<" +| 0x003E (* > *) -> Buffer.add_string b ">" +(* | 0x0027 (* ' *) -> Buffer.add_string b "'" *) +| 0x0022 (* '\"' *) -> Buffer.add_string b """ +| _ -> Buffer.add_utf_8_uchar b u + +let html_escaped_uchar c s = buffer_add_html_escaped_uchar (C.buffer c) s + +let buffer_add_html_escaped_string b s = + let string = Buffer.add_string in + let len = String.length s in + let max_idx = len - 1 in + let flush b start i = + if start < len then Buffer.add_substring b s start (i - start); + in + let rec loop start i = + if i > max_idx then flush b start i else + let next = i + 1 in + match String.get s i with + | '\x00' -> + flush b start i; Buffer.add_utf_8_uchar b Uchar.rep; loop next next + | '&' -> flush b start i; string b "&"; loop next next + | '<' -> flush b start i; string b "<"; loop next next + | '>' -> flush b start i; string b ">"; loop next next +(* | '\'' -> flush c start i; string c "'"; loop next next *) + | '\"' -> flush b start i; string b """; loop next next + | c -> loop start next + in + loop 0 0 + +let html_escaped_string c s = buffer_add_html_escaped_string (C.buffer c) s + +let buffer_add_pct_encoded_string b s = (* Percent encoded + HTML escaped *) + let byte = Buffer.add_char and string = Buffer.add_string in + let unsafe_hexdig_of_int i = match i < 10 with + | true -> Char.unsafe_chr (i + 0x30) + | false -> Char.unsafe_chr (i + 0x37) + in + let flush b max start i = + if start <= max then Buffer.add_substring b s start (i - start); + in + let rec loop b s max start i = + if i > max then flush b max start i else + let next = i + 1 in + match String.get s i with + | '%' (* In CommonMark destinations may have percent encoded chars *) + (* See https://tools.ietf.org/html/rfc3986 *) + (* unreserved *) + | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '-' | '.' | '_' | '~' + (* sub-delims *) + | '!' | '$' | (*'&' | '\'' | *) '(' | ')' | '*' | '+' | ',' | ';' | '=' + (* gen-delims *) + | ':' | '/' | '?' | '#' | (* '[' | ']' cmark escapes them | *) '@' -> + loop b s max start next + | '&' -> flush b max start i; string b "&"; loop b s max next next + | '\'' -> flush b max start i; string b "'"; loop b s max next next + | c -> + flush b max start i; + let hi = (Char.code c lsr 4) land 0xF in + let lo = (Char.code c) land 0xF in + byte b '%'; + byte b (unsafe_hexdig_of_int hi); + byte b (unsafe_hexdig_of_int lo); + loop b s max next next + in + loop b s (String.length s - 1) 0 0 + +let pct_encoded_string c s = buffer_add_pct_encoded_string (C.buffer c) s + +(* Rendering functions *) + +let comment c s = + C.string c "" + +let comment_undefined_label c l = match Inline.Link.referenced_label l with +| None -> () | Some def -> comment c ("Undefined label " ^ (Label.key def)) + +let comment_unknown_def_type c l = match Inline.Link.referenced_label l with +| None -> () | Some def -> + comment c ("Unknown label definition type for " ^ (Label.key def)) + +let comment_foonote_image c l = match Inline.Link.referenced_label l with +| None -> () | Some def -> + comment c ("Footnote " ^ (Label.key def) ^ " referenced as image") + +let block_lines c = function (* newlines only between lines *) +| [] -> () | (l, _) :: ls -> + let line c (l, _) = C.byte c '\n'; C.string c l in + C.string c l; List.iter (line c) ls + +(* Inline rendering *) + +let autolink c a = + let pre = if Inline.Autolink.is_email a then "mailto:" else "" in + let url = pre ^ (fst (Inline.Autolink.link a)) in + let url = if Inline.Link.is_unsafe url then "" else url in + C.string c ""; + html_escaped_string c (fst (Inline.Autolink.link a)); + C.string c "" + +let break c b = match Inline.Break.type' b with +| `Hard -> C.string c "
\n" +| `Soft -> C.byte c '\n' + +let code_span c cs = + C.string c ""; + html_escaped_string c (Inline.Code_span.code cs); + C.string c "" + +let emphasis c e = + C.string c ""; C.inline c (Inline.Emphasis.inline e); C.string c "" + +let strong_emphasis c e = + C.string c ""; + C.inline c (Inline.Emphasis.inline e); + C.string c "" + +let link_dest_and_title c ld = + let dest = match Link_definition.dest ld with + | None -> "" + | Some (link, _) when safe c && Inline.Link.is_unsafe link -> "" + | Some (link, _) -> link + in + let title = match Link_definition.title ld with + | None -> "" + | Some title -> String.concat "\n" (List.map (fun (_, (t, _)) -> t) title) + in + dest, title + +let image ?(close = " >") c i = + match Inline.Link.reference_definition (C.get_defs c) i with + | Some (Link_definition.Def (ld, _)) -> + let plain_text c i = + let lines = Inline.to_plain_text ~break_on_soft:false i in + String.concat "\n" (List.map (String.concat "") lines) + in + let link, title = link_dest_and_title c ld in + C.string c "\""; "" + then (C.string c " title=\""; html_escaped_string c title; C.byte c '\"'); + C.string c close + | Some (Block.Footnote.Def _) -> comment_foonote_image c i + | None -> comment_undefined_label c i + | Some _ -> comment_unknown_def_type c i + +let link_footnote c l fn = + let key = Label.key (Option.get (Inline.Link.referenced_label l)) in + let text, label, ref = make_footnote_ref_ids c key fn in + let is_full_ref = match Inline.Link.reference l with + | `Ref (`Full, _, _) -> true | _ -> false + in + if is_full_ref then begin + C.string c ""; + C.inline c (Inline.Link.text l); C.string c "" + end else begin + C.string c ""; + C.string c text; C.string c "" + end + +let link c l = match Inline.Link.reference_definition (C.get_defs c) l with +| Some (Link_definition.Def (ld, _)) -> + let link, title = link_dest_and_title c ld in + C.string c " "" then (C.string c "\" title=\""; html_escaped_string c title); + C.string c "\">"; C.inline c (Inline.Link.text l); C.string c "" +| Some (Block.Footnote.Def (fn, _)) -> link_footnote c l fn +| None -> C.inline c (Inline.Link.text l); comment_undefined_label c l +| Some _ -> C.inline c (Inline.Link.text l); comment_unknown_def_type c l + +let raw_html c h = + if safe c then comment c "CommonMark raw HTML omitted" else + let line c (_, (h, _)) = C.byte c '\n'; C.string c h in + if h <> [] + then (C.string c (fst (snd (List.hd h))); List.iter (line c) (List.tl h)) + +let strikethrough c s = + C.string c ""; + C.inline c (Inline.Strikethrough.inline s); + C.string c "" + +let math_span c ms = + let tex_line c l = html_escaped_string c (Block_line.tight_to_string l) in + let tex_lines c = function (* newlines only between lines *) + | [] -> () | l :: ls -> + let line c l = C.byte c '\n'; tex_line c l in + tex_line c l; List.iter (line c) ls + in + let tex = Inline.Math_span.tex_layout ms in + if tex = [] then () else + (C.string c (if Inline.Math_span.display ms then "\\[" else "\\("); + tex_lines c tex; + C.string c (if Inline.Math_span.display ms then "\\]" else "\\)")) + +let inline c = function +| Inline.Autolink (a, _) -> autolink c a; true +| Inline.Break (b, _) -> break c b; true +| Inline.Code_span (cs, _) -> code_span c cs; true +| Inline.Emphasis (e, _) -> emphasis c e; true +| Inline.Image (i, _) -> image c i; true +| Inline.Inlines (is, _) -> List.iter (C.inline c) is; true +| Inline.Link (l, _) -> link c l; true +| Inline.Raw_html (html, _) -> raw_html c html; true +| Inline.Strong_emphasis (e, _) -> strong_emphasis c e; true +| Inline.Text (t, _) -> html_escaped_string c t; true +| Inline.Ext_strikethrough (s, _) -> strikethrough c s; true +| Inline.Ext_math_span (ms, _) -> math_span c ms; true +| _ -> comment c ""; true + +(* Block rendering *) + +let block_quote c bq = + C.string c "
\n"; + C.block c (Block.Block_quote.block bq); + C.string c "
\n" + +let code_block c cb = + let i = Option.map fst (Block.Code_block.info_string cb) in + let lang = Option.bind i Block.Code_block.language_of_info_string in + let line (l, _) = html_escaped_string c l; C.byte c '\n' in + match lang with + | Some (lang, _env) when backend_blocks c && lang.[0] = '=' -> + if lang = "=html" && not (safe c) + then block_lines c (Block.Code_block.code cb) else () + | _ -> + C.string c "
 ()
+      | Some (lang, _env) ->
+          C.string c " class=\"language-"; html_escaped_string c lang;
+          C.byte c '\"'
+      end;
+      C.byte c '>';
+      List.iter line (Block.Code_block.code cb);
+      C.string c "
\n" + +let heading c h = + let level = string_of_int (Block.Heading.level h) in + C.string c " C.byte c '>'; + | Some (`Auto id | `Id id) -> + let id = unique_id c id in + C.string c " id=\""; C.string c id; + C.string c "\">"; + end; + C.inline c (Block.Heading.inline h); + C.string c "\n" + +let paragraph c p = + C.string c "

"; C.inline c (Block.Paragraph.inline p); C.string c "

\n" + +let item_block ~tight c = function +| Block.Blank_line _ -> () +| Block.Paragraph (p, _) when tight -> C.inline c (Block.Paragraph.inline p) +| Block.Blocks (bs, _) -> + let rec loop c add_nl = function + | Block.Blank_line _ :: bs -> loop c add_nl bs + | Block.Paragraph (p,_) :: bs when tight -> + C.inline c (Block.Paragraph.inline p); loop c true bs + | b :: bs -> (if add_nl then C.byte c '\n'); C.block c b; loop c false bs + | [] -> () + in + loop c true bs +| b -> C.byte c '\n'; C.block c b + +let list_item ~tight c (i, _) = match Block.List_item.ext_task_marker i with +| None -> + C.string c "
  • "; + item_block ~tight c (Block.List_item.block i); + C.string c "
  • \n" +| Some (mark, _) -> + C.string c "
  • "; + let close = match Block.List_item.task_status_of_task_marker mark with + | `Unchecked -> + C.string c + "
    "; + "
  • \n" + | `Checked | `Other _ -> + C.string c + "
    "; + "
    \n" + | `Cancelled -> + C.string c + "
    "; + "
    \n" + in + item_block ~tight c (Block.List_item.block i); + C.string c close + +let list c l = + let tight = Block.List'.tight l in + match Block.List'.type' l with + | `Unordered _ -> + C.string c "
      \n"; + List.iter (list_item ~tight c) (Block.List'.items l); + C.string c "
    \n" + | `Ordered (start, _) -> + C.string c "\n" else + (C.string c " start=\""; C.string c (string_of_int start); + C.string c "\">\n"); + List.iter (list_item ~tight c) (Block.List'.items l); + C.string c "\n" + +let html_block c lines = + let line (l, _) = C.string c l; C.byte c '\n' in + if safe c then (comment c "CommonMark HTML block omitted"; C.byte c '\n') else + List.iter line lines + +let thematic_break c = C.string c "
    \n" + +let math_block c cb = + let line l = html_escaped_string c (Block_line.to_string l); C.byte c '\n' in + C.string c "\\[\n"; + List.iter line (Block.Code_block.code cb); + C.string c "\\]\n" + +let table c t = + let start c align tag = + C.byte c '<'; C.string c tag; + match align with + | None -> C.byte c '>'; + | Some `Left -> C.string c " class=\"left\">" + | Some `Center -> C.string c " class=\"center\">" + | Some `Right -> C.string c " class=\"right\">" + in + let close c tag = C.string c "\n" in + let rec cols c tag ~align count cs = match align, cs with + | ((a, _) :: align), (col, _) :: cs -> + start c (fst a) tag; C.inline c col; close c tag; + cols c tag ~align (count - 1) cs + | ((a, _) :: align), [] -> + start c (fst a) tag; close c tag; + cols c tag ~align (count - 1) [] + | [], (col, _) :: cs -> + start c None tag; C.inline c col; close c tag; + cols c tag ~align:[] (count - 1) cs + | [], [] -> + for i = count downto 1 do start c None tag; close c tag done; + in + let row c tag ~align count cs = + C.string c "\n"; cols c tag ~align count cs; C.string c "\n"; + in + let header c count ~align cols = row c "th" ~align count cols in + let data c count ~align cols = row c "td" ~align count cols in + let rec rows c col_count ~align = function + | ((`Header cols, _), _) :: rs -> + let align, rs = match rs with + | ((`Sep align, _), _) :: rs -> align, rs + | _ -> align, rs + in + header c col_count ~align cols; rows c col_count ~align rs + | ((`Data cols, _), _) :: rs -> + data c col_count ~align cols; rows c col_count ~align rs + | ((`Sep align, _), _) :: rs -> rows c col_count ~align rs + | [] -> () + in + C.string c "
    \n"; + rows c (Block.Table.col_count t) ~align:[] (Block.Table.rows t); + C.string c "
    " + +let block c = function +| Block.Block_quote (bq, _) -> block_quote c bq; true +| Block.Blocks (bs, _) -> List.iter (C.block c) bs; true +| Block.Code_block (cb, _) -> code_block c cb; true +| Block.Heading (h, _) -> heading c h; true +| Block.Html_block (h, _) -> html_block c h; true +| Block.List (l, _) -> list c l; true +| Block.Paragraph (p, _) -> paragraph c p; true +| Block.Thematic_break (_, _) -> thematic_break c; true +| Block.Ext_math_block (cb, _) -> math_block c cb; true +| Block.Ext_table (t, _) -> table c t; true +| Block.Blank_line _ +| Block.Link_reference_definition _ +| Block.Ext_footnote_definition _ -> true +| _ -> comment c "Unknown Cmarkit block"; C.byte c '\n'; true + +(* XHTML rendering *) + +let xhtml_block c = function +| Block.Thematic_break _ -> C.string c "
    \n"; true +| b -> block c b + +let xhtml_inline c = function +| Inline.Break (b, _) when Inline.Break.type' b = `Hard -> + C.string c "
    \n"; true +| Inline.Image (i, _) -> + image ~close:" />" c i; true +| i -> inline c i + +(* Document rendering *) + +let footnotes c fns = + (* XXX we could do something about recursive footnotes and footnotes in + footnotes here. *) + let fns = Label.Map.fold (fun _ fn acc -> fn :: acc) fns [] in + let fns = List.sort Stdlib.compare fns in + let footnote c (_, id, refc, fn) = + C.string c "
  • \n"; + C.block c (Block.Footnote.block fn); + C.string c ""; + for r = 1 to !refc do + C.string c "↩ī¸Žī¸Ž"; + if !refc > 1 then + (C.string c ""; C.string c (Int.to_string r); C.string c ""); + C.string c "" + done; + C.string c ""; + C.string c "
  • " + in + C.string c "
      \n"; + List.iter (footnote c) fns; + C.string c "
    \n" + +let doc c d = + C.block c (Doc.block d); + let st = C.State.get c state in + if Label.Map.is_empty st.footnotes then () else footnotes c st.footnotes; + true + +(* Renderer *) + +let renderer ?backend_blocks ~safe () = + let init_context = init_context ?backend_blocks ~safe in + Cmarkit_renderer.make ~init_context ~inline ~block ~doc () + +let xhtml_renderer ?backend_blocks ~safe () = + let init_context = init_context ?backend_blocks ~safe in + let inline = xhtml_inline and block = xhtml_block in + Cmarkit_renderer.make ~init_context ~inline ~block ~doc () + +let of_doc ?backend_blocks ~safe d = + Cmarkit_renderer.doc_to_string (renderer ~safe ()) d + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_html.mli b/ocaml-lsp-server/vendor/cmarkit/cmarkit_html.mli new file mode 100644 index 000000000..b5d05c135 --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_html.mli @@ -0,0 +1,185 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +(** Rendering CommonMark to HTML. + + Generates HTML fragments, consult the + {{!integration}integration notes} for requirements on the webpage. + + See {{!page-index.quick}a quick example} and + {{!page_frame}another one}. + + {b Warning.} Rendering outputs are unstable, they may be tweaked + even between minor versions of the library. *) + +(** {1:rendering Rendering} *) + +val of_doc : ?backend_blocks:bool -> safe:bool -> Cmarkit.Doc.t -> string +(** [of_doc ~safe d] is an HTML fragment for [d]. See {!renderer} + for more details and documentation about rendering options. *) + +(** {1:renderers Renderers} *) + +val renderer : ?backend_blocks:bool -> safe:bool -> unit -> Cmarkit_renderer.t +(** [renderer ~safe ()] is the default HTML renderer. This renders the + strict CommonMark abstract syntax tree and the supported Cmarkit + {{!Cmarkit.extensions}extensions}. + + The inline, block and document renderers always return + [true]. Unknown block and inline values are rendered by an HTML + comment. + + The following options are available: + + {ul + {- [safe], if [true] {{!Cmarkit.Block.extension-Html_block}HTML blocks} and + {{!Cmarkit.Inline.extension-Raw_html}raw HTML inlines} are discarded and + replaced by an HTML comment in the output. Besides the URLs of + autolinks, links and images that satisfy + {!Cmarkit.Inline.Link.is_unsafe} are replaced by the empty string. + + Using safe renderings is a good first step at preventing + {{:https://en.wikipedia.org/wiki/Cross-site_scripting}XSS} from + untrusted user inputs but you should rather post-process rendering + outputs with a dedicated HTML sanitizer.} + {- [backend_blocks], if [true], code blocks with language [=html] + are written verbatim in the output (iff [safe] is [true]) and + any other code block whose langage starts with [=] is + dropped. Defaults to [false].}} + + See {{!Cmarkit_renderer.example}this example} to extend or + selectively override the renderer. *) + +val xhtml_renderer : + ?backend_blocks:bool -> safe:bool -> unit -> Cmarkit_renderer.t +(** [xhtml_renderer] is like {!val-renderer} but explicitely closes + empty tags to possibly make the output valid XML. Note that it + still renders HTML blocks and inline raw HTML unless {!safe} is + [true] (which also suppresses some URLs). + + See {{!Cmarkit_renderer.example}this example} to extend or + selectively override the renderer. *) + +(** {1:render Render functions} + + Only useful if you extend the renderer. *) + +val safe : Cmarkit_renderer.context -> bool +(** [safe c] is [true] if a safe rendering is requested. + See {!renderer} for more information. *) + +val html_escaped_uchar : Cmarkit_renderer.context -> Uchar.t -> unit +(** [html_escaped_uchar c u] renders the UTF-8 encoding of [u] on [c] + with HTML markup delimiters [<] [>] [&] and ["] escaped + to HTML entities (Single quotes ['] are not escaped use ["] to delimit your + attributes). This also renders U+0000 to {!Uchar.rep}. *) + +val buffer_add_html_escaped_uchar : Buffer.t -> Uchar.t -> unit +(** [buffer_add_html_escaped_uchar] is {!html_escaped_uchar} but appends + to a buffer value. *) + +val html_escaped_string : Cmarkit_renderer.context -> string -> unit +(** [html_escaped_string c s] renders string [s] on [c] with HTML + markup delimiters [<], [>], [&], and ["] escaped to HTML + entities (Single quotes ['] are not escaped, use ["] to delimit your + attributes). *) + +val buffer_add_html_escaped_string : Buffer.t -> string -> unit +(** [buffer_add_html_escaped_string] is {!html_escaped_string} but appends + to a buffer value. *) + +val pct_encoded_string : Cmarkit_renderer.context -> string -> unit +(** [pct_encoded_string c s] renders string [s] on [c] with everything + percent encoded except [%] and the + {{:https://datatracker.ietf.org/doc/html/rfc3986#section-2.3} + [unreserved]}, + {{:https://datatracker.ietf.org/doc/html/rfc3986#section-2.2} + [sub-delims]} + and the {{:https://datatracker.ietf.org/doc/html/rfc3986#section-2.2} + [gen-delims]} + URI characters except brackets [\[] and [\]] (to match the [cmark] tool). + + In other words only characters [%] [a-z] [A-Z] [0-9] [-] [.] [_] [~] [!] + [$] [&] ['] [(] [)] [*] [+] [,] [;] [=] [:] [/] [?] [#] [@] + are not percent-encoded. + + {b Warning.} The function also replaces both [&] and ['] by their + corresponding HTML entities, so you can't use this in a context + that doesn't allow entities. Besides this assumes [s] may already + have percent encoded bits so it doesn't percent encode [%], as such you + can't use this as a general percent encode function. *) + +val buffer_add_pct_encoded_string : Buffer.t -> string -> unit +(** [buffer_add_pct_encoded_string b s] is {!pct_encoded_string} but + appends to a buffer value. *) + +(** {1:integration HTML integration notes} + + {2:code_blocks Code blocks} + + If a language [lang] can be extracted from the info string of a + code block with + {!Cmarkit.Block.Code_block.language_of_info_string}, a + [language-lang] class is added to the corresponding [code] + element. If you want to highlight the syntax, adding + {{:https://highlightjs.org/}highlight.js} to your page is an + option. + + {2:ids Heading identifiers} + + Headings identifiers and anchors are added to the output whenever + {!Cmarkit.Block.Heading.val-id} holds a value. If the identifier + already exists it is made unique by appending ["-"] and the first + number starting from 1 that makes it unique. + + {2:math Maths} + + If your document has {!Cmarkit.Inline.extension-Ext_math_span} + inlines or {!Cmarkit.Block.extension-Ext_math_block} blocks, the + default renderer outputs them in [\(], [\)] and + [\\[], [\\]] delimiters. You should add + {{:https://katex.org/}K{^A}T{_E}X} or + {{:https://www.mathjax.org/}MathJax} in your page to let these + bits be rendered by the typography they deserve. + + {2:page_frame Page frame} + + The default renderers only generate HTML fragments. You may + want to add a page frame. For example: +{[ +let html_doc_of_md ?(lang = "en") ~title ~safe md = + let doc = Cmarkit.Doc.of_string md in + let r = Cmarkit_html.renderer ~safe () in + let buffer_add_doc = Cmarkit_renderer.buffer_add_doc r in + let buffer_add_title = Cmarkit_html.buffer_add_html_escaped_string in + Printf.kbprintf Buffer.contents (Buffer.create 1024) +{| + + + + %a + + +%a +|} + lang buffer_add_title title buffer_add_doc doc +]} +*) + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_latex.ml b/ocaml-lsp-server/vendor/cmarkit/cmarkit_latex.ml new file mode 100644 index 000000000..2439be6f1 --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_latex.ml @@ -0,0 +1,423 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +open Cmarkit +module C = Cmarkit_renderer.Context +module String_set = Set.Make (String) + +(* State *) + +type state = + { backend_blocks : bool; + mutable sot : bool; (* start of text *) + mutable labels : String_set.t; + mutable footnote_labels : string Label.Map.t; } + +let state : state C.State.t = C.State.make () +let get_state c = C.State.get c state +let backend_blocks c = (get_state c).backend_blocks +let init_context ?(backend_blocks = false) c _ = + let labels = String_set.empty and footnote_labels = Label.Map.empty in + let st = { backend_blocks; sot = true; labels; footnote_labels } in + C.State.set c state (Some st) + +let unique_label c l = + let st = C.State.get c state in + let rec loop labels l c = + let l' = if c = 0 then l else (String.concat "-" [l; Int.to_string c]) in + match String_set.mem l' labels with + | true -> loop labels l (c + 1) + | false -> st.labels <- String_set.add l' labels; l' + in + loop st.labels l 0 + +let make_label l = (* latex seems to choke on these underscores in labels *) + String.map (function '_' | ' ' | '\t' -> '-' | c -> c) l + +let footnote_label c id = + let st = get_state c in + match Label.Map.find_opt id st.footnote_labels with + | Some l -> l, false + | None -> + let l = make_label (String.sub id 1 (String.length id - 1)) in + let l = "fn-" ^ l in + st.footnote_labels <- Label.Map.add id l st.footnote_labels; + l, true + +(* Escaping *) + +let buffer_add_latex_escaped_uchar b u = match Uchar.to_int u with +| 0x0000 -> Buffer.add_utf_8_uchar b Uchar.rep +| 0x0023 (* # *) -> Buffer.add_string b {|\#|} +| 0x0024 (* $ *) -> Buffer.add_string b {|\$|} +| 0x0025 (* % *) -> Buffer.add_string b {|\%|} +| 0x0026 (* & *) -> Buffer.add_string b {|\&|} +| 0x005C (* \ *) -> Buffer.add_string b {|\textbackslash{}|} +| 0x005E (* ^ *) -> Buffer.add_string b {|\textasciicircum{}|} +| 0x005F (* _ *) -> Buffer.add_string b {|\_|} +| 0x007B (* { *) -> Buffer.add_string b {|\{|} +| 0x007D (* } *) -> Buffer.add_string b {|\}|} +| 0x007E (* ~ *) -> Buffer.add_string b {|\textasciitilde{}|} +| _ -> Buffer.add_utf_8_uchar b u + +let latex_escaped_uchar c u = buffer_add_latex_escaped_uchar (C.buffer c) u + +let buffer_add_latex_escaped_string b s = + let string = Buffer.add_string in + let flush b max start i = + if start <= max then Buffer.add_substring b s start (i - start); + in + let rec loop b s max start i = + if i > max then flush b max start i else + let next = i + 1 in + match String.get s i with + | '\x00' -> + flush b max start i; Buffer.add_utf_8_uchar b Uchar.rep; + loop b s max next next + | '#' -> flush b max start i; string b {|\#|}; loop b s max next next + | '$' -> flush b max start i; string b {|\$|}; loop b s max next next + | '%' -> flush b max start i; string b {|\%|}; loop b s max next next + | '&' -> flush b max start i; string b {|\&|}; loop b s max next next + | '\\' -> + flush b max start i; string b {|\textbackslash{}|}; + loop b s max next next + | '^' -> + flush b max start i; string b {|\textasciicircum{}|}; + loop b s max next next + | '_' -> flush b max start i; string b {|\_|}; loop b s max next next + | '{' -> flush b max start i; string b {|\{|}; loop b s max next next + | '}' -> flush b max start i; string b {|\}|}; loop b s max next next + | '~' -> + flush b max start i; string b {|\textasciitilde{}|}; + loop b s max next next + | c -> loop b s max start next + in + loop b s (String.length s - 1) 0 0 + +let latex_escaped_string c s = buffer_add_latex_escaped_string (C.buffer c) s + +(* Rendering functions *) + +let newline c = + (* Block generally introduce newlines, except the first one. *) + let st = get_state c in if st.sot then st.sot <- false else C.byte c '\n' + +let comment c s = C.string c "% "; latex_escaped_string c s; newline c + +let comment_undefined_label c l = match Inline.Link.referenced_label l with +| None -> () | Some def -> comment c ("Undefined label " ^ (Label.key def)) + +let comment_unknown_def_type c l = match Inline.Link.referenced_label l with +| None -> () | Some def -> + comment c ("Unknown label definition type for " ^ (Label.key def)) + +let comment_foonote_image c l = match Inline.Link.referenced_label l with +| None -> () | Some def -> + comment c ("Footnote " ^ (Label.key def) ^ " referenced as image") + +let block_lines c = function (* newlines only between lines *) +| [] -> () | l :: ls -> + let line c l = newline c; C.string c (Block_line.to_string l) in + C.string c (Block_line.to_string l); List.iter (line c) ls + +let tight_block_lines c = function (* newlines only between lines *) +| [] -> () | l :: ls -> + let line c l = newline c; C.string c (Block_line.tight_to_string l) in + C.string c (Block_line.tight_to_string l); List.iter (line c) ls + +(* Inline rendering *) + +let autolink c a = + let pre = if Inline.Autolink.is_email a then "mailto:" else "" in + let link = pre ^ (fst (Inline.Autolink.link a)) in + C.string c "\\url{"; latex_escaped_string c link; C.byte c '}' + +let code_span c cs = + let code = Inline.Code_span.code cs in + C.string c "\\texttt{"; latex_escaped_string c code; C.byte c '}' + +let emphasis c e = + C.string c "\\emph{"; C.inline c (Inline.Emphasis.inline e); C.byte c '}' + +let link c l = match Inline.Link.reference_definition (C.get_defs c) l with +| Some (Link_definition.Def (ld, _)) -> + let d = match Link_definition.dest ld with None -> "" | Some (u, _) -> u in + let dlen = String.length d in + begin match dlen > 0 && d.[0] = '#' with + | true -> + let label = make_label (String.sub d 1 (dlen - 1)) in + C.string c "\\hyperref["; + latex_escaped_string c label; + C.string c "]{"; + C.inline c (Inline.Link.text l); C.byte c '}' + | false -> + C.string c "\\href{"; + latex_escaped_string c d; + C.string c "}{"; + C.inline c (Inline.Link.text l); C.byte c '}' + end +| Some (Block.Footnote.Def (fn, _)) -> + let key = Label.key (Option.get (Inline.Link.referenced_label l)) in + let l, new' = footnote_label c key in + begin match new' with + | false -> + C.string c "\\textsuperscript{\\ref{"; C.string c l; C.string c "}}" + | true -> + C.string c "\\footnote{\\label{"; C.string c l; C.string c "}"; + C.block c (Block.Footnote.block fn); + C.string c "}" + end +| None -> C.inline c (Inline.Link.text l); comment_undefined_label c l +| Some _ -> C.inline c (Inline.Link.text l); comment_unknown_def_type c l + +let image c i = match Inline.Link.reference_definition (C.get_defs c) i with +| Some (Link_definition.Def (ld, _)) -> + let d = match Link_definition.dest ld with + | None -> "" | Some (u, _) -> u + in + let is_external d = + String.starts_with ~prefix:"http:" d || + String.starts_with ~prefix:"https:" d + in + if is_external d then link c i else + begin + C.string c "\\protect\\includegraphics{"; + latex_escaped_string c d; + C.byte c '}' + end +| Some (Block.Footnote.Def _) -> comment_foonote_image c i +| None -> comment_undefined_label c i +| Some _ -> comment_unknown_def_type c i + +let strong_emphasis c e = + C.string c "\\textbf{"; C.inline c (Inline.Emphasis.inline e); C.byte c '}' + +let break c b = match Inline.Break.type' b with +| `Hard -> C.string c "\\\\"; newline c +| `Soft -> newline c + +let text c t = latex_escaped_string c t + +let strikethrough c s = + C.string c "\\sout{"; C.inline c (Inline.Strikethrough.inline s); C.byte c '}' + +let math_span c ms = + let tex = Inline.Math_span.tex_layout ms in + C.string c (if Inline.Math_span.display ms then "\\[" else "\\("); + tight_block_lines c tex; + C.string c (if Inline.Math_span.display ms then "\\]" else "\\)") + +let inline c = function +| Inline.Autolink (a, _) -> autolink c a; true +| Inline.Break (b, _) -> break c b; true +| Inline.Code_span (cs, _) -> code_span c cs; true +| Inline.Emphasis (e, _) -> emphasis c e; true +| Inline.Image (i, _) -> image c i; true +| Inline.Inlines (is, _) -> List.iter (C.inline c) is; true +| Inline.Link (l, _) -> link c l; true +| Inline.Raw_html (_, _) -> comment c "Raw CommonMark HTML omitted"; true +| Inline.Strong_emphasis (e, _) -> strong_emphasis c e; true +| Inline.Text (t, _) -> text c t; true +| Inline.Ext_strikethrough (s, _) -> strikethrough c s; true +| Inline.Ext_math_span (ms, _) -> math_span c ms; true +| _ -> comment c "Unknown Cmarkit inline"; true + +(* Block rendering *) + +let block_quote c bq = + newline c; + C.string c "\\begin{quote}"; + C.block c (Block.Block_quote.block bq); + C.string c "\\end{quote}"; + newline c + +let code_block c cb = + let info = Option.map fst (Block.Code_block.info_string cb) in + let lang = Option.bind info Block.Code_block.language_of_info_string in + let code = Block.Code_block.code cb in + let raw_line (l, _) = C.string c l; newline c in + let line = raw_line (* XXX: escape or not ? *) in + match lang with + | Some (lang, _env) when backend_blocks c && lang.[0] = '=' -> + if lang = "=latex" then block_lines c code else () + | _ -> + newline c; + begin match lang with + | None -> + C.string c "\\begin{verbatim}"; newline c; + List.iter line code; + C.string c "\\end{verbatim}" + | Some (lang, _env) -> + C.string c "\\begin{lstlisting}[language="; + C.string c lang; C.byte c ']'; newline c; + List.iter line code; + C.string c "\\end{lstlisting}" + end; + newline c + +let heading c h = + let cmd = match Block.Heading.level h with + | 1 -> "section{" | 2 -> "subsection{" | 3 -> "subsubsection{" + | 4 -> "paragraph{" | 5 -> "subparagraph{" | 6 -> "subparagraph{" + | _ -> "subparagraph{" + in + let i = Block.Heading.inline h in + newline c; + C.byte c '\\'; C.string c cmd; C.inline c i; C.byte c '}'; + begin match Block.Heading.id h with + | None -> () + | Some (`Auto id | `Id id) -> + let label = unique_label c (make_label id) in + C.string c "\\label{"; latex_escaped_string c label; C.byte c '}' + end; + newline c + +let list_item c (i, _meta) = + C.string c "\\item{}"; + begin match Block.List_item.ext_task_marker i with + | None -> () + | Some (u, _) -> (* Something better can likely be done *) + C.string c " \\lbrack"; + begin match Uchar.to_int u = 0x0020 with + | true -> C.string c "\\phantom{x}" + | false -> C.byte c ' '; C.utf_8_uchar c u + end; + C.string c "\\rbrack \\enspace" + end; + C.block c (Block.List_item.block i) + +let list c l = match Block.List'.type' l with +| `Unordered _ -> + newline c; + C.string c "\\begin{itemize}"; newline c; + List.iter (list_item c) (Block.List'.items l); + C.string c "\\end{itemize}"; + newline c +| `Ordered (start, _) -> + newline c; + C.string c "\\begin{enumerate}"; + if start <> 1 + then (C.string c "[start="; C.string c (Int.to_string start); C.byte c ']'); + newline c; + List.iter (list_item c) (Block.List'.items l); + C.string c "\\end{enumerate}"; + newline c + +let html_block c _ = newline c; comment c "CommonMark HTML block omitted" + +let paragraph c p = + newline c; C.inline c (Block.Paragraph.inline p); newline c + +let thematic_break c = + newline c; + C.string c "\\begin{center}\\rule{0.5\\linewidth}{.25pt}\\end{center}"; + newline c + +let math_block c cb = + let line l = C.string c (Block_line.to_string l); newline c in + C.string c "\\["; newline c; + List.iter line (Block.Code_block.code cb); + C.string c "\\]"; newline c + +let table c t = + let start c align op = + begin match align with + | None -> C.byte c '{'; + | Some `Left -> C.string c "\\multicolumn{1}{l}{" + | Some `Center -> C.string c "\\multicolumn{1}{c}{" + | Some `Right -> C.string c "\\multicolumn{1}{r}{" + end; + if op <> "" then C.string c op; + in + let close c = C.byte c '}'; newline c in + let rec cols c op ~align count cs = match align, cs with + | ((a, _) :: align), (col, _) :: cs -> + start c (fst a) op; C.inline c col; close c; + if count > 1 then (C.string c " &"; newline c); + cols c op ~align (count - 1) cs + | [], (col, _) :: cs -> + start c None op; C.inline c col; close c; + if count > 1 then (C.string c " &"; newline c); + cols c op ~align:[] (count - 1) cs + | (a :: align), [] -> + if count > 1 then (C.string c "&"; newline c); + cols c op ~align (count - 1) [] + | [], [] -> + for i = count downto 2 do C.string c "&"; newline c done; + C.string c "\\\\"; newline c + in + let header c count ~align cs = cols c "\\bfseries{}" ~align count cs in + let data c count ~align cs = cols c "" ~align count cs in + let rec rows c col_count ~align = function + | ((`Header cols, _), _) :: rs -> + let align, rs = match rs with + | ((`Sep align, _), _) :: rs -> align, rs + | _ -> align, rs + in + header c col_count ~align cols; + C.string c "\\hline"; newline c; + rows c col_count ~align rs + | ((`Data cols, _), _) :: rs -> + data c col_count ~align cols; rows c col_count ~align rs + | ((`Sep align, _), _) :: rs -> rows c col_count ~align rs + | [] -> () + in + newline c; C.string c "\\bigskip"; newline c; + C.string c "\\begin{tabular}{"; + for i = 1 to Block.Table.col_count t do C.byte c 'l' done; + C.byte c '}'; newline c; + begin match Block.Table.rows t with + | (((`Data _ | `Sep _), _), _) :: _ -> C.string c "\\hline"; newline c + | _ -> () + end; + rows c (Block.Table.col_count t) ~align:[] (Block.Table.rows t); + C.string c "\\hline"; newline c; + C.string c "\\end{tabular}"; + newline c; C.string c "\\bigskip"; newline c + +let block c = function +| Block.Block_quote (bq, _) -> block_quote c bq; true +| Block.Blocks (bs, _) -> List.iter (C.block c) bs; true +| Block.Code_block (cb, _) -> code_block c cb; true +| Block.Heading (h, _) -> heading c h; true +| Block.Html_block (html, _) -> html_block c html; true +| Block.List (l, _) -> list c l; true +| Block.Paragraph (p, _) -> paragraph c p; true +| Block.Thematic_break _ -> thematic_break c; true +| Block.Ext_math_block (cb, _)-> math_block c cb; true +| Block.Ext_table (t, _)-> table c t; true +| Block.Blank_line _ -> true +| Block.Link_reference_definition _ +| Block.Ext_footnote_definition _ -> true; +| _ -> comment c "Unknown Cmarkit block"; true + +(* Document rendering *) + +let doc c d = C.block c (Doc.block d); true + +(* Renderer *) + +let renderer ?backend_blocks () = + let init_context = init_context ?backend_blocks in + Cmarkit_renderer.make ~init_context ~inline ~block ~doc () + +let of_doc ?backend_blocks d = + Cmarkit_renderer.doc_to_string (renderer ?backend_blocks ()) d + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_latex.mli b/ocaml-lsp-server/vendor/cmarkit/cmarkit_latex.mli new file mode 100644 index 000000000..09ededa89 --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_latex.mli @@ -0,0 +1,226 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +(** Rendering CommonMark to L{^A}T{_E}X. + + Generates L{^A}T{_E}X fragments, consult the {{!integration} + integration notes} for requirements on the document. + + See {{!page-index.quick}a quick example} and {{!doc_frame}another one}. + + {b Warning.} Rendering outputs are unstable, they may be tweaked even + between minor versions of the library. *) + +(** {1:rendering Rendering} *) + +val of_doc : ?backend_blocks:bool -> Cmarkit.Doc.t -> string +(** [of_doc d] is a L{^A}T{_E}X fragment for [d]. See {!val-renderer} + for more details and documentation about rendering options. *) + +(** {1:renderer Renderer} *) + +val renderer : ?backend_blocks:bool -> unit -> Cmarkit_renderer.t +(** [renderer] is a default L{^A}T{_E}X renderer. This renders + the strict CommonMark abstract syntax tree and the supported + Cmarkit {{!Cmarkit.extensions}extensions}. + + The inline, block and document renderers always return + [true]. Unknown block and inline values are rendered by a + L{^A}T{_E}X comment. + + The following options are available: + + {ul + {- [backend_blocks], if [true], code blocks with language [=latex] + are written verbatim in the output and any other code block whose + langage starts with [=] is dropped. Defaults to [false].}} + + See {{!Cmarkit_renderer.example}this example} to extend or + selectively override the renderer. *) + +(** {1:render Render functions} + + Only useful if you extend the renderer. *) + +val newline : Cmarkit_renderer.context -> unit +(** [newline c] starts a new line. Except on the first call on [c] which is + a nop. *) + +val latex_escaped_uchar : Cmarkit_renderer.context -> Uchar.t -> unit +(** [latex_escaped_uchar c u] renders the UTF-8 encoding of [u] on [c] + propertly escaped for L{^A}T{_E}X. That is the characters + [&] [%] [$] [#] [_] [{] [}] [~] [^] [\ ] + are escaped. This also renders U+0000 to {!Uchar.rep}. *) + +val buffer_add_latex_escaped_uchar : Buffer.t -> Uchar.t -> unit +(** [buffer_add_latex_escaped_uchar] is {!latex_escaped_uchar} but appends + to a buffer value. *) + +val latex_escaped_string : Cmarkit_renderer.context -> string -> unit +(** [latex_escaped_string c s] renders string [s] on [c] with + characters [&] [%] [$] [#] [_] [{] [}] [~] [^] [\ ] escaped. This + also escapes U+0000 to {!Uchar.rep}. *) + +val buffer_add_latex_escaped_string : Buffer.t -> string -> unit +(** [buffer_add_latex_escaped_string] is {!latex_escaped_string} + but acts on a buffer value. *) + +(** {1:integration L{^A}T{_E}X integration notes} + + Along with the built-in [graphicx] package, the following + L{^A}T{_E}X packages are needed to use the outputs of the default + renderer: +{v +tlmgr install enumitem listings hyperref # Required +tlmgr install ulem # Strikethrough extension +tlmgr install bera fontspec # Optional +v} + This means you should have at least the following in your + document preamble: +{v +% Required +\usepackage{graphicx} +\usepackage{enumitem} +\usepackage{listings} +\usepackage{hyperref} +\usepackage[normalem]{ulem} % Strikethrough extension + +% Optional +\usepackage[scaled=0.8]{beramono} % A font for code blocks +\usepackage{fontspec} % Supports more Unicode characters +v} + + See the sections below for more details. + + {2:char_encoding Character encoding} + + The output is UTF-8 encoded. + {{:https://tug.org/TUGboat/tb39-1/tb121ltnews28.pdf}It became} the + the default encoding for L{^A}T{_E}X in 2018. But if you are using + an older version a [\usepackage[utf8]{inputenc}] may be needed. + + Using [xelatex] rather than [pdflatex] will not get stuck on missing + glyphs. + + {2:links Autolinks and links} + + The {{:https://www.ctan.org/pkg/hyperref}[hyperref]} package is + used to render links ([\href]) and autolink ([\url]). Link + destination starting with a [#] are assumed to refer to + {{!labels}section labels} and are rendered using the [\hyperref] + macro, with the [#] chopped. + + {2:images Images} + + Images are inserted using the + {{:https://ctan.org/pkg/graphicx}graphicx}'s package. Only + images with relative URLs are supported, those that point + to external ressources on the www are turned into links. + + {2:labels Section labels} + + Section labels are added to the output whenever + {!Cmarkit.Block.Heading.val-id} holds a value. If the identifier + already exists it is made unique by appending ["-"] and the first + number starting from 1 that makes it unique. Also the character + [_] seems problematic in labels even when escaped, we map it to [-] + (if you know any better get in touch). + + {2:lists Lists} + + To support the starting point of ordereded lists without having to + fiddle with [enumi] counters, the + {{:https://www.ctan.org/pkg/enumitem}[enumitem]} package is used. + + {2:code_blocks Code blocks} + + If a language [lang] can be + {{!Cmarkit.Block.Code_block.language_of_info_string}extracted} + from a code block info string, the + {{:https://www.ctan.org/pkg/listings}[listings]} package is used + with the corresponding language in a [lstlisting] environment. + Otherwise the built-in [verbatim] environment is used. + + Note that the [listings] package has no definition for the [ocaml] + language, the default renderings are a bit subpar and + break on character literals with double quotes. This improves things: +{v +\lstset{ + columns=[c]fixed, + basicstyle=\small\ttfamily, + keywordstyle=\bfseries, + upquote=true, + commentstyle=\slshape, + breaklines=true, + showstringspaces=false} + +\lstdefinelanguage{ocaml}{language=[objective]caml, + % Fixes double quotes in char literals + literate={'"'}{\textquotesingle "\textquotesingle}3 + {'\\"'}{\textquotesingle \textbackslash"\textquotesingle}4, +} +v} + + {2:doc_frame Document frame} + + The default renderer only generates L{^A}T{_E}X fragments. You + may want to add a document frame. For example: +{[ +let latex_doc_of_md ?(title = "") md = + let doc = Cmarkit.Doc.of_string md in + let r = Cmarkit_latex.renderer () in + let buffer_add_doc = Cmarkit_renderer.buffer_add_doc r in + let buffer_add_title = Cmarkit_latex.buffer_add_latex_escaped_string in + let maketitle = if title = "" then "" else {|\maketitle|} in + Printf.kbprintf Buffer.contents (Buffer.create 1024) +{|\documentclass{article} + +\usepackage{graphicx} +\usepackage{enumitem} +\usepackage{listings} +\usepackage{hyperref} +\usepackage[normalem]{ulem} +\usepackage[scaled=0.8]{beramono} +\usepackage{fontspec} + +\lstset{ + columns=[c]fixed, + basicstyle=\small\ttfamily, + keywordstyle=\bfseries, + upquote=true, + commentstyle=\slshape, + breaklines=true, + showstringspaces=false} + +\lstdefinelanguage{ocaml}{language=[objective]caml, + literate={'"'}{\textquotesingle "\textquotesingle}3 + {'\\"'}{\textquotesingle \textbackslash"\textquotesingle}4, +} + +\title{%a} +\begin{document} +%s +%a +\end{document}|} buffer_add_title title maketitle buffer_add_doc doc +]} + +Ignore this: ". +*) + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_renderer.ml b/ocaml-lsp-server/vendor/cmarkit/cmarkit_renderer.ml new file mode 100644 index 000000000..e52648a6f --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_renderer.ml @@ -0,0 +1,104 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +(* Renderers *) + +module Dict = Cmarkit_base.Dict + +type t = + { init_context : context -> Cmarkit.Doc.t -> unit; + inline : inline; + block : block; + doc : doc; } + +and context = + { renderer : t; + mutable state : Dict.t; + b : Buffer.t; + mutable doc : Cmarkit.Doc.t } + +and inline = context -> Cmarkit.Inline.t -> bool +and block = context -> Cmarkit.Block.t -> bool +and doc = context -> Cmarkit.Doc.t -> bool + +let nop _ _ = () +let none _ _ = false + +let make + ?(init_context = nop) ?(inline = none) ?(block = none) ?(doc = none) () + = + { init_context; inline; block; doc } + +let compose g f = + let init_context c d = g.init_context c d; f.init_context c d in + let block c b = f.block c b || g.block c b in + let inline c i = f.inline c i || g.inline c i in + let doc c d = f.doc c d || g.doc c d in + { init_context; inline; block; doc } + +let init_context r = r.init_context +let inline r = r.inline +let block r = r.block +let doc r = r.doc + +module Context = struct + type t = context + let make renderer b = + { renderer; b; state = Dict.empty; doc = Cmarkit.Doc.empty } + + let buffer c = c.b + let renderer c = c.renderer + let get_doc (c : context) = c.doc + let get_defs (c : context) = Cmarkit.Doc.defs c.doc + + module State = struct + type 'a t = 'a Dict.key + let make = Dict.key + let find c st = Dict.find st c.state + let get c st = Option.get (Dict.find st c.state) + let set c st = function + | None -> c.state <- Dict.remove st c.state + | Some s -> c.state <- Dict.add st s c.state + end + + let init c d = c.renderer.init_context c d + + let invalid_inline _ = invalid_arg "Unknown Cmarkit.Inline.t case" + let invalid_block _ = invalid_arg "Unknown Cmarkit.Block.t case" + let unhandled_doc _ = invalid_arg "Unhandled Cmarkit.Doc.t" + + let byte r c = Buffer.add_char r.b c + let utf_8_uchar r u = Buffer.add_utf_8_uchar r.b u + let string c s = Buffer.add_string c.b s + let inline c i = ignore (c.renderer.inline c i || invalid_inline i) + let block c b = ignore (c.renderer.block c b || invalid_block b) + let doc (c : context) d = + c.doc <- d; init c d; + ignore (c.renderer.doc c d || unhandled_doc d); + c.doc <- Cmarkit.Doc.empty +end + +let doc_to_string r d = + let b = Buffer.create 1024 in + let c = Context.make r b in + Context.doc c d; Buffer.contents b + +let buffer_add_doc r b d = Context.doc (Context.make r b) d + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/cmarkit_renderer.mli b/ocaml-lsp-server/vendor/cmarkit/cmarkit_renderer.mli new file mode 100644 index 000000000..1f14ec234 --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/cmarkit_renderer.mli @@ -0,0 +1,275 @@ +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers. All rights reserved. + Distributed under the ISC license, see terms at the end of the file. + ---------------------------------------------------------------------------*) + +(** Renderer abstraction. + + Stateful renderer abstraction to render documents in {!Stdlib.Buffer.t} + values. + + {b Note.} This is a low-level interface. For quick and standard + renderings see {!Cmarkit_html.of_doc}, {!Cmarkit_latex.of_doc} and + {!Cmarkit_commonmark.of_doc}. If you want to extend them, + see {{!example}this example}. *) + +(** {1:rendering Rendering} *) + +type t +(** The type for renderers. *) + +val doc_to_string : t -> Cmarkit.Doc.t -> string +(** [doc_to_string r d] renders document [d] to a string using renderer [r]. *) + +val buffer_add_doc : t -> Buffer.t -> Cmarkit.Doc.t -> unit +(** [buffer_add_doc r b d] renders document [d] on buffer [b] using + renderer [r]. *) + +(** {1:renderers Renderers} *) + +type context +(** The type for rendering contexts, holds a renderer, a + {!Stdlib.Buffer.t} value to act on and rendering state. *) + +type inline = context -> Cmarkit.Inline.t -> bool +(** The type for inline renderers. + + Return [false] if you are not interested in rendering the given + inline. Use {!Context.inline} and {!Context.block} on the given + context if you need to invoke the renderer recursively. *) + +type block = context -> Cmarkit.Block.t -> bool +(** The type for block renderers. + + Return [false] if you are not interested in rendering the given + block. Use {!Context.inline} and {!Context.block} with the given + context if you need to invoke the renderer recursively. *) + +type doc = context -> Cmarkit.Doc.t -> bool +(** The type for document renderers. + + Return [false] if you are not interested in rendering the given + document. Use {!Context.inline}, {!Context.block} and {!Context.doc} + with the given context if you need to invoke the renderer recursively. *) + +val make : + ?init_context:(context -> Cmarkit.Doc.t -> unit) -> + ?inline:inline -> ?block:block -> ?doc:doc -> unit -> t +(** [make ?init_context ?inline ?block ?doc ()] is a renderer using + [inline], [block], [doc] to render documents. They all default to + [(fun _ _ -> false)], which means that by default they defer to + next renderer (see {!compose}). + + [init_context] is used to initialize the context for the renderer + before a document render. It defaults to [fun _ _ -> ()]. *) + +val compose : t -> t -> t +(** [compose g f] renders first with [f] and if a renderer returns [false], + falls back on its counterpart in [g]. + + The {!init_context} of the result calls [g]'s initialization + context function first, followed by the one of [f]. This means + [f]'s initialization function can assume the context is already + setup for [g]. *) + +(** {2:accessors Accessors} + + Normally you should not need these but you may want to peek + into other renderers. *) + +val init_context : t -> (context -> Cmarkit.Doc.t -> unit) +(** [init_context r] is the context initalization function for [r]. *) + +val inline : t -> inline +(** [inline r] is the inline renderer of [r]. *) + +val block : t -> block +(** [block_renderer r] is the block renderer of [r]. *) + +val doc : t -> doc +(** [doc_renderer r] is the documentation renderer of [r]. *) + +(** {1:context Rendering contexts} *) + +(** Rendering contexts. *) +module Context : sig + + (** {1:contexts Contexts} *) + + type renderer := t + + type t = context + (** The type for rendering contexts. *) + + val make : renderer -> Buffer.t -> t + (** [make r b] is a context using renderer [r] to render documents + on buffer [b]. + + The renderer [r] must be able to handle any inline, block and + document values (i.e. its renderers should always return [true]) + otherwise [Invalid_argument] may raise on renders. + + This means the last renderer you {{!compose}compose with} should + always have catch all cases returning [true]; after possibly + indicating in the output that something was missed. The built-in + renderers {!Cmarkit_commonmark.val-renderer}, + {!Cmarkit_html.val-renderer} and {!Cmarkit_latex.val-renderer} + do have these catch all cases. *) + + val renderer : t -> renderer + (** [renderer c] is the renderer of [c]. *) + + val buffer : t -> Buffer.t + (** [buffer c] is the buffer of [c]. *) + + val get_doc : t -> Cmarkit.Doc.t + (** [get_doc c] is the document being rendered. *) + + val get_defs : t -> Cmarkit.Label.defs + (** [get_defs c] is [Doc.defs (get_doc c)]. *) + + (** Custom context state. *) + module State : sig + + type 'a t + (** The type for custom state of type ['a]. *) + + val make : unit -> 'a t + (** [make ()] is a new bit of context state. *) + + val find : context -> 'a t -> 'a option + (** [find c state] is the state [state] of context [c], if any. *) + + val get : context -> 'a t -> 'a + (** [get c state] is the state [state] of context [c], raises + [Invalid_argument] if there is no state [state] in [c]. *) + + val set : context -> 'a t -> 'a option -> unit + (** [set c state s] sets the state [state] of [c] to [s]. [state] is + cleared in [c] if [s] is [None]. *) + end + + val init : t -> Cmarkit.Doc.t -> unit + (** [init c] calls the initialisation function of [c]'s + {!val-renderer}. Note, this is done automatically by {!val-doc}. *) + + (** {1:render Rendering functions} + + These function append data to the {!buffer} of the context. For more + specialized rendering functions, see the corresponding rendering + backends. *) + + val byte : t -> char -> unit + (** [byte c b] renders byte [b] verbatim on [c]. *) + + val utf_8_uchar : t -> Uchar.t -> unit + (** [utf_8_uchar c u] renders the UTF-8 encoding of [u] on [c]. *) + + val string : t -> string -> unit + (** [string c s] renders string [s] verbatim on [c]. *) + + val inline : t -> Cmarkit.Inline.t -> unit + (** [inline c i] renders inline [i] on [c]. This invokes the + {{!compose}composition} of inline renderers of [c]. *) + + val block : t -> Cmarkit.Block.t -> unit + (** [block c b] renders block [b] on [c]. This invokes the + {{!compose}composition} of block renderers of [c]. *) + + val doc : t -> Cmarkit.Doc.t -> unit + (** [doc c d] initializes [c] with {!init} and renders document [d] on [c]. + This invokes the {{!compose}composition} of document renderers of [c]. *) +end + +(** {1:example Extending renderers} + + This example extends the {!Cmarkit_html.val-renderer} but it + applies {e mutatis mutandis} to the other backend document + renderers. + + Let's assume you want to: + + {ul + {- Extend the abstract syntax tree with a [Doc] block which + allows to splice documents in another one (note that + splicing is already built-in via the {!Cmarkit.Block.extension-Blocks} + block case).} + {- Change the rendering of {!Cmarkit.Inline.extension-Image} inlines to + render HTML [video] or [audio] elements depending on the link's + destination suffix.} + {- For the rest use the built-in {!Cmarkit_html.renderer} renderer + as it exists.}} + + This boils down to: + + {ol + {- Add a new case to the abstract syntax tree.} + {- Define a [custom_html] renderer which treats + {!Cmarkit.Inline.Image} and the new [Doc] case the way we + see it fit and return [false] otherwise to use the built-in renderer. } + {- {!compose} [custom_html] with {!Cmarkit_html.val-renderer}}} + +{[ +type Cmarkit.Block.t += Doc of Cmarkit.Doc.t (* 1 *) + +let media_link c l = + let has_ext s ext = String.ends_with ~suffix:ext s in + let is_video s = List.exists (has_ext s) [".mp4"; ".webm"] in + let is_audio s = List.exists (has_ext s) [".mp3"; ".flac"] in + let defs = Cmarkit_renderer.Context.get_defs c in + match Cmarkit.Inline.Link.reference_definition defs l with + | Some Cmarkit.Link_definition.Def (ld, _) -> + let start_tag = match Cmarkit.Link_definition.dest ld with + | Some (src, _) when is_video src -> Some (" Some (" None + in + begin match start_tag with + | None -> false (* let the default HTML renderer handle that *) + | Some (start_tag, src) -> + (* More could be done with the reference title and link text *) + Cmarkit_renderer.Context.string c start_tag; + Cmarkit_renderer.Context.string c {| src="|}; + Cmarkit_html.pct_encoded_string c src; + Cmarkit_renderer.Context.string c {|" />|}; + true + end + | None | Some _ -> false (* let the default HTML renderer that *) + +let custom_html = + let inline c = function + | Cmarkit.Inline.Image (l, _) -> media_link c l + | _ -> false (* let the default HTML renderer handle that *) + in + let block c = function + | Doc d -> + (* It's important to recurse via Cmarkit_renderer.Context.block *) + Cmarkit_renderer.Context.block c (Cmarkit.Doc.block d); true + | _ -> false (* let the default HTML renderer handle that *) + in + Cmarkit_renderer.make ~inline ~block () (* 2 *) + +let custom_html_of_doc ~safe doc = + let default = Cmarkit_html.renderer ~safe () in + let r = Cmarkit_renderer.compose default custom_html in (* 3 *) + Cmarkit_renderer.doc_to_string r doc +]} + +The [custom_html_of_doc] function performs your extended +renderings. *) + +(*--------------------------------------------------------------------------- + Copyright (c) 2021 The cmarkit programmers + + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + ---------------------------------------------------------------------------*) diff --git a/ocaml-lsp-server/vendor/cmarkit/dune b/ocaml-lsp-server/vendor/cmarkit/dune new file mode 100644 index 000000000..c1a666805 --- /dev/null +++ b/ocaml-lsp-server/vendor/cmarkit/dune @@ -0,0 +1,3 @@ +(library + (name cmarkit) + (wrapped false))