Skip to content
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
Cannot retrieve contributors at this time
;;; tuareg.el --- OCaml mode -*- coding: utf-8; lexical-binding:t -*-
;; Copyright (C) 1997-2006 Albert Cohen, all rights reserved.
;; Copyright (C) 2011-2022 Free Software Foundation, Inc.
;; Copyright (C) 2009-2010 Jane Street Holding, LLC.
;; Author: Albert Cohen <>
;; Sam Steingold <>
;; Christophe Troestler <>
;; Till Varoquaux <>
;; Sean McLaughlin <>
;; Stefan Monnier <>
;; Maintainer: Christophe Troestler <>
;; Stefan Monnier <>
;; Created: 8 Jan 1997
;; Version: 3.0.2-snapshot
;; Package-Requires: ((emacs "26.3") (caml "4.8"))
;; Keywords: ocaml languages
;; Homepage:
;; EmacsWiki: TuaregMode
;; This file is *NOT* part of GNU Emacs.
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <>.
;;; Commentary:
;; Tuareg helps editing OCaml code, to highlight important parts of
;; the code, to run an OCaml REPL, and to run the OCaml debugger
;; within Emacs.
;; See for customization tips.
;;; Installation:
;; If you have permissions to the local `site-lisp' directory, you
;; only have to copy `tuareg.el', `ocamldebug.el'
;; and `tuareg-site-file.el'. Otherwise, copy the previous files
;; to a local directory and add the following line to your `.emacs':
;; (add-to-list 'load-path "DIR")
;;; Usage:
;; Tuareg allows you to run batch OCaml compilations from Emacs (using
;; M-x compile) and browse the errors (C-x `). Typing C-x ` sets the
;; point at the beginning of the erroneous program fragment, and the
;; mark at the end. Under Emacs, the program fragment is temporarily
;; highlighted.
;; M-x tuareg-run-ocaml (or simply `run-ocaml') starts an OCaml
;; REPL (aka toplevel) with input and output in an Emacs buffer named
;; `*OCaml*. This gives you the full power of Emacs to edit
;; the input to the OCaml REPL. This mode is based on comint so
;; you get all the usual comint features, including command history. A
;; hook named `tuareg-interactive-mode-hook' may be used for
;; customization.
;; Typing C-c C-e in a buffer in tuareg mode sends the current phrase
;; (containing the point) to the OCaml REPL, and evaluates it. If
;; you type one of these commands before M-x tuareg-run-ocaml, the
;; REPL will be started automatically.
;; M-x ocamldebug FILE starts the OCaml debugger ocamldebug on the
;; executable FILE, with input and output in an Emacs buffer named
;; *ocamldebug-FILE*. It is similar to April 1996 version, with minor
;; changes to support XEmacs, Tuareg and OCaml. Furthermore, package
;; `thingatpt' is not required any more.
;;; Code:
(require 'cl-lib)
(require 'easymenu)
(require 'find-file)
(require 'subr-x)
(require 'seq)
(require 'caml-help nil t)
(require 'caml-types nil t)
(require 'tuareg-opam)
(require 'tuareg-compat)
;; Compatibility functions
(defun tuareg-editing-ls3 ()
"Tell whether we are editing Lucid Synchrone syntax."
(string-match-p "\\.ls\\'" (or buffer-file-name (buffer-name))))
(defun tuareg-editing-ocamllex ()
"Tell whether we are editing OCamlLex syntax."
(string-match-p "\\.mll\\'" (or buffer-file-name (buffer-name))))
;; Import types and help features
(defvar tuareg-with-caml-mode-p
(and (featurep 'caml-types) (featurep 'caml-help)))
;; User customizable variables
(require 'smie)
;; Use the standard `customize' interface or `tuareg-mode-hook' to
;; Configure these variables
(require 'custom)
(defgroup tuareg nil
"Support for the OCaml language."
:link '(url-link "")
:group 'languages)
;; Indentation defaults
(defcustom tuareg-default-indent 2
"Default indentation.
Global indentation variable (large values may lead to indentation overflows).
When no governing keyword is found, this value is used to indent the line
if it has to."
:group 'tuareg :type 'integer)
(defcustom tuareg-support-camllight nil
"If true, handle Caml Light character syntax (incompatible with labels)."
:group 'tuareg :type 'boolean
:set (lambda (var val)
(set-default var val)
(when (boundp 'tuareg-mode-syntax-table)
(modify-syntax-entry ?` (if val "\"" ".")
(defcustom tuareg-support-metaocaml nil
"If true, handle MetaOCaml syntax."
:group 'tuareg :type 'boolean
:set (lambda (var val)
(set-default var val)
(dolist (buf (buffer-list))
(with-current-buffer buf
(when (derived-mode-p 'tuareg-mode 'tuareg-interactive-mode)
(defcustom tuareg-in-indent 0 ; tuareg-default-indent
"How many spaces to indent from a `in' keyword.
Upstream <>
recommends 0, and this is what we default to since 2.0.1
instead of the historical `tuareg-default-indent'."
:group 'tuareg :type 'integer)
(defcustom tuareg-with-indent 0
"How many spaces to indent from a `with' keyword.
The examples at <>
show the `|' is aligned with `match', thus 0 is the default value."
:group 'tuareg :type 'integer)
(defcustom tuareg-match-clause-indent 1
"How many spaces to indent a clause of match after a pattern `| ... ->'
or `... ->' (pattern without preceding `|' in the first clause of a matching).
To respect <>
the default is 1."
:type 'integer)
(defcustom tuareg-match-when-indent (+ 4 tuareg-match-clause-indent)
"How many spaces from `|' to indent `when' in a pattern match
| patt
when cond ->
:type 'integer)
(defcustom tuareg-match-patterns-aligned nil
"Non-nil means that the pipes for multiple patterns of a single case
are aligned instead of being slightly shifted to spot the multiple
patterns better.
function v.s. function
| A | A
| B -> ... | B -> ...
| C -> ... | C -> ... "
:group 'tuareg :type 'boolean)
;; Tuareg-Interactive
;; Configure via `tuareg-mode-hook'
;; Automatic indentation
(make-obsolete-variable 'tuareg-use-abbrev-mode
"Use `electric-indent-mode' instead." "2.2.0")
(defcustom tuareg-electric-indent nil
"Whether to automatically indent the line after typing one of
the words in `tuareg-electric-indent-keywords'. Lines starting
with `|', `)', `]`, and `}' are always indented when the
`electric-indent-mode' is turned on."
:group 'tuareg :type 'boolean)
(defcustom tuareg-electric-close-vector t
"Non-nil means electrically insert `|' before a vector-closing `]' or
`>' before an object-closing `}'.
Many people find electric keys irritating, so you can disable them by
setting this variable to nil. You should probably have this on,
though, if you also have `tuareg-electric-indent' on."
:group 'tuareg :type 'boolean)
(defcustom tuareg-highlight-all-operators nil
"If t, highlight all operators (as opposed to unusual ones).
This is not turned on by default because this makes font-lock
much less efficient."
:group 'tuareg :type 'boolean)
(defcustom tuareg-other-file-alist
'(("\\.pp\\.mli\\'" (".ml" ".mll" ".mly" ""))
("\\.mli\\'" (".ml" ".mll" ".mly" ""))
("\\.pp\\.ml\\'" (".mli"))
("\\.ml\\'" (".mli"))
("\\.mll\\'" (".mli"))
("\\.mly\\'" (".mli"))
("\\.eliomi\\'" (".eliom"))
("\\.eliom\\'" (".eliomi")))
"Associative list of alternate extensions to find.
See `ff-other-file-alist'."
:group 'tuareg
:type '(repeat (list regexp (choice (repeat string) function))))
(defcustom tuareg-comment-show-paren t
"Highlight comment delimiters in `show-paren-mode' if non-nil."
:group 'tuareg
:type 'boolean)
(defcustom tuareg-interactive-scroll-to-bottom-on-output nil
"Controls when to scroll to the bottom of the interactive buffer
upon evaluating an expression.
See `comint-scroll-to-bottom-on-output' for details."
:group 'tuareg :type 'boolean
:set (lambda (var val)
(set-default var val)
(when (boundp 'comint-scroll-to-bottom-on-output)
(dolist (buf (buffer-list))
(with-current-buffer buf
(when (derived-mode-p 'tuareg-interactive-mode)
(setq-local comint-scroll-to-bottom-on-output val)))))))
(defcustom tuareg-skip-after-eval-phrase t
"Non-nil means skip to the end of the phrase after evaluation in the
OCaml REPL."
:group 'tuareg :type 'boolean)
(defcustom tuareg-interactive-read-only-input nil
"Non-nil means input sent to the OCaml REPL is read-only."
:group 'tuareg :type 'boolean)
(defcustom tuareg-interactive-echo-phrase t
"Non-nil means echo phrases in the REPL buffer when sending
them to the OCaml REPL."
:group 'tuareg :type 'boolean)
(defcustom tuareg-interactive-input-font-lock t
"Non nil means Font-Lock for REPL input phrases."
:group 'tuareg :type 'boolean)
(defcustom tuareg-interactive-output-font-lock t
"Non nil means Font-Lock for REPL output messages."
:group 'tuareg :type 'boolean)
(defcustom tuareg-interactive-error-font-lock t
"Non nil means Font-Lock for REPL error messages."
:group 'tuareg :type 'boolean)
(defcustom tuareg-display-buffer-on-eval t
"Non nil means pop up the OCaml REPL when evaluating code."
:group 'tuareg :type 'boolean)
(defcustom tuareg-manual-url
"URL to the OCaml reference manual."
:group 'tuareg :type 'string)
(defcustom tuareg-browser #'browse-url
"Name of function that displays the OCaml reference manual.
Valid names are `browse-url', `browse-url-firefox', etc."
:group 'tuareg :type 'function)
(defcustom tuareg-library-path "/usr/local/lib/ocaml/"
"Name of directory holding the OCaml library."
:group 'tuareg :type 'string)
(defcustom tuareg-mode-line-other-file nil
"If non-nil, display the (extension of the) alternative file in mode line."
:type 'boolean)
(defvar tuareg-options-list
`(["Prettify symbols" prettify-symbols-mode
:style toggle :selected prettify-symbols-mode :active t])
"List of menu-configurable Tuareg options.")
(defvar tuareg-interactive-options-list
'(("Skip phrase after evaluation" . 'tuareg-skip-after-eval-phrase)
("Echo phrase in interactive buffer" . 'tuareg-interactive-echo-phrase)
("Font-lock interactive input" . 'tuareg-interactive-input-font-lock)
("Font-lock interactive output" . 'tuareg-interactive-output-font-lock)
("Font-lock interactive error" . 'tuareg-interactive-error-font-lock)
("Read only input" . 'tuareg-interactive-read-only-input))
"List of menu-configurable Tuareg options.")
(defvar tuareg-interactive-program "ocaml -nopromptcont"
"Default program name for invoking an OCaml REPL (aka toplevel) from Emacs.")
;; Could be interesting to have this variable buffer-local
;; (e.g., ocaml vs. metaocaml buffers)
;; (make-variable-buffer-local 'tuareg-interactive-program)
(defgroup tuareg-faces nil
"Special faces for the Tuareg mode."
:group 'tuareg)
(defface tuareg-font-lock-governing-face
'((((class color) (type tty)) (:bold t))
(((background light)) (:foreground "black" :bold t))
(t (:foreground "wheat" :bold t)))
"Face description for governing/leading keywords."
:group 'tuareg-faces)
(defvar tuareg-font-lock-governing-face
(defface tuareg-font-lock-multistage-face
'((((background light))
(:foreground "darkblue" :background "lightgray" :bold t))
(t (:foreground "steelblue" :background "darkgray" :bold t)))
"Face description for MetaOCaml staging operators."
:group 'tuareg-faces)
(defvar tuareg-font-lock-multistage-face
(defface tuareg-font-lock-line-number-face
'((((background light)) (:foreground "dark gray"))
(t (:foreground "gray60")))
"Face description for line numbering directives."
:group 'tuareg-faces)
(defvar tuareg-font-lock-line-number-face
(defface tuareg-font-lock-operator-face
'((((background light)) (:foreground "brown"))
(t (:foreground "khaki")))
"Face description for all operators."
:group 'tuareg-faces)
(defvar tuareg-font-lock-operator-face
(defface tuareg-font-lock-module-face
'((t (:inherit font-lock-type-face))); backward compatibility
"Face description for modules and module paths."
:group 'tuareg-faces)
(defvar tuareg-font-lock-module-face
(defface tuareg-font-lock-constructor-face
'((t (:inherit default))) ;FIXME: Why not just nil?
"Face description for constructors of (polymorphic) variants and exceptions."
:group 'tuareg-faces)
(defvar tuareg-font-lock-constructor-face
(defface tuareg-font-lock-label-face
'((t (:inherit font-lock-constant-face keep)))
"Face description for labels."
:group 'tuareg-faces)
(defvar tuareg-font-lock-label-face
(defface tuareg-font-double-semicolon-face
'((t (:foreground "OrangeRed")))
"Face description for ;; which is not needed in standard code."
:group 'tuareg-faces)
(defvar tuareg-font-double-semicolon-face
(defface tuareg-font-lock-error-face
'((t (:foreground "yellow" :background "red" :bold t)))
"Face description for all errors reported to the source."
:group 'tuareg-faces)
(defvar tuareg-font-lock-error-face
(defface tuareg-font-lock-interactive-output-face
'((((background light))
(:foreground "blue4"))
(t (:foreground "grey")))
"Face description for all outputs in the REPL."
:group 'tuareg-faces)
(defvar tuareg-font-lock-interactive-output-face
(defface tuareg-font-lock-interactive-error-face
'((t :inherit font-lock-warning-face))
"Face description for all REPL errors."
:group 'tuareg-faces)
(defvar tuareg-font-lock-interactive-error-face
(defface tuareg-font-lock-interactive-directive-face
'((((background light)) (:foreground "slate gray"))
(t (:foreground "light slate gray")))
"Face description for all REPL directives such as #load."
:group 'tuareg-faces)
(defvar tuareg-font-lock-interactive-directive-face
(defface tuareg-font-lock-attribute-face
'((t :inherit font-lock-preprocessor-face))
"Face description for OCaml attribute annotations."
:group 'tuareg-faces)
(defvar tuareg-font-lock-attribute-face
(defface tuareg-font-lock-infix-extension-node-face
'((t :inherit font-lock-preprocessor-face))
"Face description for OCaml the infix extension node."
:group 'tuareg-faces)
(defvar tuareg-font-lock-infix-extension-node-face
(defface tuareg-font-lock-extension-node-face
'((default :inherit tuareg-font-lock-infix-extension-node-face)
(((background dark)) :foreground "LightSteelBlue")
(t :background "gray92"))
"Face description for OCaml extension nodes."
:group 'tuareg-faces)
(defvar tuareg-font-lock-extension-node-face
(defface tuareg-font-lock-doc-markup-face
`((t :inherit ,(if (facep 'font-lock-doc-markup-face)
'font-lock-doc-markup-face ; Emacs ≥28.
"Face for mark-up syntax in OCaml doc comments."
:group 'tuareg-faces)
(defface tuareg-font-lock-doc-verbatim-face
'((t :inherit fixed-pitch)) ; FIXME: find something better
"Face for verbatim text in OCaml doc comments (inside {v ... v})."
:group 'tuareg-faces)
;; Support definitions
;; This function is different from the standard in that it does NOT signal
;; errors at beginning-of-buffer.
(defun tuareg-backward-char (&optional step)
(if step (goto-char (- (point) step))
(goto-char (1- (point)))))
(defun tuareg-in-indentation-p ()
"Return non-nil if all chars between beginning of line and point are blanks."
(skip-chars-backward " \t")
(defun tuareg-in-literal-or-comment-p (&optional pos)
"Return non-nil if point is inside an OCaml literal or comment."
(nth 8 (syntax-ppss pos)))
(defun tuareg--point-after-comment-p ()
"Return non-nil if a comment precedes the point."
(and (eq (char-before) ?\))
(eq (char-before (1- (point))) ?*) ; implies position is in range
(nth 4 (syntax-ppss (1- (point)))))))
(defun tuareg-backward-up-list ()
;; FIXME: not clear if moving out of a string/comment should count as 1 or no.
(condition-case nil
(scan-error (goto-char (point-min)))))
;; Font-lock in Emacs
;; Originally by Stefan Monnier
(defcustom tuareg-font-lock-symbols nil
"Display fun and -> and such using symbols in fonts.
This may sound like a neat trick, but note that it can change the
alignment and can thus lead to surprises. On recent Emacs >= 24.4,
use `prettify-symbols-mode'."
:group 'tuareg :type 'boolean)
(make-obsolete-variable 'tuareg-font-lock-symbols
'prettify-symbols-mode "Emacs-24.4")
(defcustom tuareg-prettify-symbols-full nil
"If non-nil, add fun and -> and such to be prettified with symbols.
This may sound like a neat trick, but note that it can change the
alignment and can thus lead to surprises. By default, only symbols that
do not perturb in essential ways the alignment are used. See
`tuareg-prettify-symbols-basic-alist' and
:group 'tuareg :type 'boolean)
(defvar tuareg-prettify-symbols-basic-alist
`(("sqrt" . ?√)
("cbrt" . ?∛)
("&&" . ?∧) ; 'LOGICAL AND' (U+2227)
("||" . ?∨) ; 'LOGICAL OR' (U+2228)
("+." . ?∔) ;DOT PLUS (U+2214)
("-." . ?∸) ;DOT MINUS (U+2238)
;;("*." . ?×)
("*." . ?∙) ; BULLET OPERATOR
("/." . )
("+:" . "̈+"); (⨥ + ➕ ⨁ ⨢)
("-:" . "̈-"); COMBINING DIAERESIS ̈- (⨪ - ➖)
("*:" . "̈∙"); (⨱ * ✕ ✖ ⁑ ◦ ⨰ ⦿ ⨀ ⨂)
("/:" . "̈÷"); (➗)
("+^" . ?⨣)
("*^" . "̂∙")
("/^" . "̂÷")
("+~" . ?⨤)
("-~" . "̃-") ; COMBINING TILDE
("*~" . "̃∙")
("/~" . "̃÷")
("<-" . ?←)
("<=" . ?≤)
(">=" . ?≥)
("<>" . ?≠)
("==" . ?≡)
("!=" . ?≢)
("<=>" . ?⇔)
("infinity" . ?∞)
;; Some greek letters for type parameters.
("'a" . )
("'b" . )
("'c" . )
("'d" . )
("'e" . )
("'f" . )
("'i" . )
("'k" . )
("'m" . )
("'n" . )
("'o" . )
("'p" . )
("'r" . )
("'s" . )
("'t" . )
("'x" . )))
(defvar tuareg-prettify-symbols-extra-alist
`(("fun" . )
("not" . )
;;("or" . ?∨); should not be used as ||
("[|" . ?〚) ;; 〚
("|]" . ?〛) ;; 〛
("->" . ?→)
(":=" . ?⇐)
("::" . ?∷)))
(defun tuareg--prettify-symbols-compose-p (start end match)
"Return true iff the symbol MATCH should be composed.
See `prettify-symbols-compose-predicate'."
;; Refine `prettify-symbols-default-compose-p' so as not to compose
;; symbols for errors,...
(and (fboundp 'prettify-symbols-default-compose-p)
(prettify-symbols-default-compose-p start end match)
(not (memq (get-text-property start 'face)
(defun tuareg-font-lock-compose-symbol (alist)
"Compose a sequence of ascii chars into a symbol.
Regexp match data 0 points to the chars."
;; Check that the chars should really be composed into a symbol.
(let* ((mbegin (match-beginning 0))
(mend (match-end 0))
(syntax (char-syntax (char-after mbegin))))
(if (or (eq (char-syntax (or (char-before mbegin) ?\ )) syntax)
(eq (char-syntax (or (char-after mend) ?\ )) syntax)
(memq (get-text-property mbegin 'face)
;; No composition for you. Let's actually remove any composition
;; we may have added earlier and which is now incorrect.
(remove-text-properties mbegin mend '(composition))
;; That's a symbol alright, so add the composition.
(compose-region mbegin mend (cdr (assoc (match-string 0) alist)))))
;; Return nil because we're not adding any face property.
(defun tuareg-font-lock-symbols-keywords ()
(let ((alist (if tuareg-prettify-symbols-full
(append tuareg-prettify-symbols-basic-alist
(dolist (x alist)
(when (and (or (and (number-or-marker-p (cdr x))
(char-displayable-p (cdr x)))
(seq-every-p #'char-displayable-p (cdr x)))
(not (assoc (car x) alist))) ; not yet in alist.
(push x alist)))
(when alist
`((,(regexp-opt (mapcar #'car alist) t)
(0 (tuareg-font-lock-compose-symbol ',alist)))))))
(defvar tuareg-mode-syntax-table
(let ((st (make-syntax-table)))
(modify-syntax-entry ?_ "_" st)
(modify-syntax-entry ?. "'" st) ;Make qualified names a single symbol.
(modify-syntax-entry ?# "." st)
(modify-syntax-entry ?? ". p" st)
(modify-syntax-entry ?~ ". p" st)
;; See
(dolist (c '(?! ?$ ?% ?& ?+ ?- ?/ ?: ?< ?= ?> ?@ ?^ ?|))
(modify-syntax-entry c "." st))
(modify-syntax-entry ?' "_" st) ; ' is part of symbols (for primes).
;; ` is punctuation or character delimiter (Caml Light compatibility).
?` (if tuareg-support-camllight "\"" ".") st)
(modify-syntax-entry ?\" "\"" st) ; " is a string delimiter
(modify-syntax-entry ?\\ "\\" st)
(modify-syntax-entry ?* ". 23" st)
(modify-syntax-entry ?\( "()1n" st)
(modify-syntax-entry ?\) ")(4n" st)
"Syntax table in use in Tuareg mode buffers.")
;; Font-Lock
(defconst tuareg-font-lock-syntactic-keywords
;; Char constants start with ' but ' can also appear in identifiers.
;; Beware not to match things like '*)hel' or '"hel' since the first '
;; might be inside a string or comment.
;; Note: for compatibility with Emacs<23, we use "\\<" rather than "\\_<",
;; which depends on tuareg-font-lock-syntax turning all "_" into "w".
'(("\\<\\('\\)\\([^'\\\n]\\|\\\\.[^\\'\n \")]*\\)\\('\\)"
(1 '(7)) (3 '(7)))))
(defun tuareg-syntax-propertize (start end)
(goto-char start)
(tuareg--syntax-quotation end)
;; When we see a '"', knowing whether it's a literal char (as opposed to
;; the end of a string followed by the beginning of a literal char)
;; requires checking syntax-ppss as in:
;; ("\\_<\\('\"'\\)"
;; (1 (unless (nth 3 (save-excursion (syntax-ppss (match-beginning 0))))
;; (string-to-syntax "\""))))
;; Not sure if it's worth the trouble since adding a space between the
;; string and the literal char is easy enough and is the usual
;; style anyway.
;; For all other cases we don't need to check syntax-ppss because, if the
;; first quote is within a string (or comment), the whole match is within
;; the string (or comment), so the syntax-properties don't hurt.
;; Note: we can't just use "\\<" here because syntax-propertize is also
;; used outside of font-lock.
("\\_<\\('\\)\\(?:[^'\\\n]\\|\\\\.[^\\'\n \")]*\\)\\('\\)"
(1 "\"") (2 "\""))
(1 (prog1 "|" (tuareg--syntax-quotation end))))
(point) end))
(defun tuareg--syntax-quotation (end)
(let ((ppss (syntax-ppss)))
(when (eq t (nth 3 ppss))
(pcase (char-after (nth 8 ppss))
;; We're indeed inside a quotation.
(when (re-search-forward ">>" end 'move)
(put-text-property (1- (point)) (point)
'syntax-table (string-to-syntax "|"))))
;; We're inside a quoted string
(let ((id (save-excursion
(goto-char (1+ (nth 8 ppss)))
(buffer-substring (point)
(progn (skip-chars-forward "a-z_")
(when (search-forward (concat "|" id "}") end 'move)
(put-text-property (1- (point)) (point)
'syntax-table (string-to-syntax "|")))))
(c (error "Unexpected char '%c' starting delimited string" c))))))
(defmacro tuareg--syntax-rules (&rest rules)
"Generate a function to parse according to RULES.
Each argument has the form (RE BODY...) where RE is a regexp to
match and BODY what to execute upon match. BODY is executed with
point at the end of the match, `start' bound to the start of the
match and `group' to the number of the first group in RE, if any.
The returned function takes the two arguments BEGIN and END
delimiting the region of interest. "
(let ((group-number 1)
(clauses nil)
(regexps nil))
(dolist (rule rules)
(let* ((re (macroexpand (car rule)))
(body (cdr rule))
(re-ngroups (regexp-opt-depth re))
(if (> re-ngroups 0)
`((let ((group ,(1+ group-number)))
(push re regexps)
(push `((match-beginning ,group-number) . ,clause-body)
(setq group-number (+ group-number 1 re-ngroups))))
(let ((combined-re (mapconcat (lambda (re) (concat "\\(" re "\\)"))
(nreverse regexps) "\\|"))
(begin (make-symbol "begin"))
(end (make-symbol "end")))
`(lambda (,begin ,end)
(goto-char ,begin)
(while (and (< (point) ,end)
(re-search-forward ,combined-re ,end t)
(let ((start (match-beginning 0)))
(cond . ,(nreverse clauses))
;; FIXME: using nil here is a tad unstable -- sometimes we get a full
;; fontification as code (which is nice!), sometimes not.
(defconst tuareg-font-lock-doc-code-face nil
"Face to use for parts of a doc comment marked up as code (ie, [TEXT]).")
(defun tuareg-fontify-doc-comment (state)
(let ((beg (nth 8 state))
(end (save-excursion
(parse-partial-sexp (point) (point-max) nil nil state
(put-text-property beg end 'face 'font-lock-doc-face)
(when (and (eq (char-after (- end 2)) ?*)
(eq (char-after (- end 1)) ?\)))
(setq end (- end 2))) ; stop before closing "*)"
(let ((case-fold-search nil))
((rx (or "[" "{["))
;; Fontify opening bracket.
(put-text-property start (point) 'face
;; Skip balanced set of brackets.
(let ((start-end (point))
(level 1))
(while (and (< (point) end)
(re-search-forward (rx (? "\\") (in "[]"))
end 'noerror)
(let ((next (char-after (match-beginning 0))))
((eq next ?\[)
(setq level (1+ level))
((eq next ?\])
(setq level (1- level))
(if (> level 0)
(forward-char -1)
(t t)))))
(put-text-property start-end (point) 'face
(if (> level 0)
;; Highlight unbalanced opening bracket.
(put-text-property start start-end 'face
;; Fontify closing bracket.
(put-text-property (point) (1+ (point)) 'face
(forward-char 1))))
((rx "]")
(put-text-property start (1+ start) 'face
;; @-tag.
((rx "@" (group (or "author" "deprecated" "param" "raise" "return"
"see" "since" "before" "version"))
(put-text-property start (point) 'face
;; Use code face for the first argument of some tags.
(when (and (member (match-string group)
'("param" "raise" "before"))
(looking-at (rx (+ space)
(+ (in "a-zA-Z0-9" "_.'-"))))))
(put-text-property (match-beginning 1) (match-end 1) 'face
(goto-char (match-end 0))))
;; Cross-reference.
((rx (or "{!" "{{!")
(? (or "tag" "module" "modtype" "class" "classtype" "val" "type"
"exception" "attribute" "method" "section" "const"
(group (* (in "a-zA-Z0-9" "_.'"))))
(put-text-property start (match-beginning group) 'face
;; Use code face for the reference.
(put-text-property (match-beginning group) (match-end group) 'face
;; {v ... v}
((rx "{v" (in " \t\n"))
(put-text-property start (+ 3 start) 'face
(let ((verbatim-end end))
(when (re-search-forward (rx (in " \t\n") "v}")
end 'noerror)
(setq verbatim-end (match-beginning 0))
(put-text-property verbatim-end (point) 'face
(put-text-property (+ 3 start) verbatim-end 'face
;; Other {..} and <..> constructs.
((rx (or (seq "{"
(or (or "-" ":" "_" "^"
"b" "i" "e" "C" "L" "R"
"ul" "ol" "%"
;; Section header with optional label.
(seq (+ digit)
(? ":"
(+ (in "a-zA-Z0-9" "_"))))))
;; HTML-style tags
(seq "<" (? "/")
(or "b" "i" "code" "ul" "ol" "li"
"center" "left" "right"
(seq "h" (+ digit)))
(put-text-property start (point) 'face
;; Escaped syntax characters.
((rx "\\" (in "{}[]@"))))
beg end))))
(defun tuareg-font-lock-syntactic-face-function (state)
"`font-lock-syntactic-face-function' for Tuareg."
(if (nth 3 state)
(let ((start (nth 8 state)))
(if (and (> (point-max) (+ start 2))
(eq (char-after (+ start 2)) ?*)
(not (eq (char-after (+ start 3)) ?*)))
;; This is a documentation comment
(tuareg-fontify-doc-comment state)
;; Initially empty, set in `tuareg--install-font-lock-1'
(defvar tuareg-font-lock-keywords ()
"Font-Lock patterns for Tuareg mode (basic level).")
(defvar tuareg-font-lock-keywords-1 ()
"Font-Lock patterns for Tuareg mode (intermediate level).")
(defvar tuareg-font-lock-keywords-2 ()
"Font-Lock patterns for Tuareg mode (maximum level).")
(defconst tuareg-font-lock-syntax
;; Note: as a general rule, changing syntax-table during font-lock
;; is a potential problem for syntax-ppss.
`((?_ . "w") (?' . "w"))
"Syntax changes for Font-Lock.")
(defconst tuareg--whitespace-re
;; QUESTION: Why not just "[ \t\n]*"?
;; It used to be " *[\t\n]? *" but this is inefficient since it can match
;; N spaces in N+1 different ways :-(
" *\\(?:[\t\n] *\\)?")
(defconst tuareg--id-re "\\_<[A-Za-z_][A-Za-z0-9_']*\\_>"
"Regular expression for identifiers.")
(defconst tuareg--lid-re "\\_<[a-z_][A-Za-z0-9_']*\\_>"
"Regular expression for variable names.")
(defconst tuareg--uid-re "\\_<[A-Z][A-Za-z0-9_']*\\_>"
"Regular expression for module and constructor names.")
(defun tuareg--install-font-lock (&optional interactive-p)
"Setup `font-lock-defaults'. INTERACTIVE-P says whether it is
for the interactive mode."
(let* ((id tuareg--id-re)
(lid tuareg--lid-re)
(uid tuareg--uid-re)
(attr-id1 "\\<[A-Za-z_][A-Za-z0-9_']*\\>")
(attr-id (concat attr-id1 "\\(?:\\." attr-id1 "\\)*"))
(maybe-infix-extension (concat "\\(?:%" attr-id "\\)?")); at most 1
;; Matches braces balanced on max 3 levels.
(let ((b "\\(?:[^()]\\|(")
(e ")\\)*"))
(concat b b b "[^()]*" e e e)))
(let ((b "\\(?:[^()\"]\\|(")
(e ")\\)*"))
(concat b b b "[^()\"]*" e e e)))
(balanced-braces-no-end-operator ; non-empty
(let* ((b "\\(?:[^()]\\|(")
(e ")\\)*")
(braces (concat b b "[^()]*" e e))
(end-op (concat "\\(?:[^()!$%&*+-./:<=>?@^|~]\\|("
braces ")\\)")))
(concat "\\(?:[^()!$%&*+-./:<=>?@^|~]"
;; Operator not starting with ~
"\\|[!$%&*+-./:<=>?@^|][!$%&*+-./:<=>?@^|~]*" end-op
;; Operator or label starting with ~
"\\|~\\(?:[!$%&*+-./:<=>?@^|~]+" end-op
"\\|[a-z][a-zA-Z0-9]*[: ]\\)"
"\\|(" braces e)))
(let ((b "\\(?:[^][]\\|\\[")
(e "\\]\\)*"))
(concat b b b "[^][]*" e e e)))
(concat "\\(?:\\[@" attr-id balanced-brackets "\\]\\)*"))
(concat maybe-infix-extension maybe-infix-attribute))
;; FIXME: module paths with functor applications
(module-path (concat uid "\\(?:\\." uid "\\)*"))
(typeconstr (concat "\\(?:" module-path "\\.\\)?" lid))
(concat uid "\\(?: *([ A-Z]" balanced-braces ")\\)*"))
(concat extended-module-name
"\\(?: *\\. *" extended-module-name "\\)*"))
(modtype-path (concat "\\(?:" extended-module-path "\\.\\)*" id))
(typevar "'[A-Za-z_][A-Za-z0-9_']*\\>")
(typeparam (concat "\\(?:[+-]?" typevar "\\|_\\)"))
(typeparams (concat "\\(?:" typeparam "\\|( *"
typeparam " *\\(?:, *" typeparam " *\\)*)\\)"))
(typedef (concat "\\(?:" typeparams " *\\)?" lid))
;; Define 2 groups: possible path, variables
(let-ls3 (regexp-opt '("clock" "node" "static"
"present" "automaton" "where" "match"
"with" "do" "done" "unless" "until"
"reset" "every")))
(before-operator-char "[^-!$%&*+./:<=>?@^|~#?]")
(operator-char "[-!$%&*+./:<=>?@^|~]")
(operator-char-no> "[-!$%&*+./:<=?@^|~]"); for "->"
(concat "\\(?:[-$&*+/<=>@^|]" operator-char "*\\)"))
(let-binding-g4 ; 4 groups
(concat "\\_<\\(?:\\(let\\_>" binding-operator-char "?\\)"
"\\(" maybe-infix-ext+attr
"\\)\\(?: +\\(" (if (tuareg-editing-ls3) let-ls3 "rec\\_>")
"\\)\\)?\\|\\(and\\_>" binding-operator-char "?\\)\\)"))
;; group for possible class param
(concat "\\(\\_<class\\(?: +type\\)?\\(?: +virtual\\)?\\_>\\)"
" *\\(\\[ *" typevar " *\\(?:, *" typevar " *\\)*\\] *\\)?"))
;; font-lock rules common to all levels
`(("^#[0-9]+ *\\(?:\"[^\"]+\"\\)?"
0 tuareg-font-lock-line-number-face t)
;; cppo
(,(concat "^ *#"
(regexp-opt '("define" "undef" "if" "ifdef" "ifndef"
"else" "elif" "endif" "include"
"warning" "error" "ext" "endext")
. font-lock-preprocessor-face)
;; Directives
,@(if interactive-p
`((,(concat "^# +\\(#" lid "\\)")
1 tuareg-font-lock-interactive-directive-face)
(,(concat "^ *\\(#" lid "\\)")
1 tuareg-font-lock-interactive-directive-face))
`((,(concat "^\\(#" lid "\\)")
. tuareg-font-lock-interactive-directive-face)))
(,(concat (if interactive-p "^ *#\\(?: +#\\)?" "^#")
"show\\(?:_module\\)? +\\(" uid "\\)")
1 tuareg-font-lock-module-face)
(";;+" 0 tuareg-font-double-semicolon-face)
;; Attributes (`keep' to highlight except strings & chars)
(,(concat "\\[@\\(?:@@?\\)?" attr-id balanced-brackets "\\]")
0 tuareg-font-lock-attribute-face keep)
;; Extension nodes.
(,(concat "\\(\\[%%?" attr-id "\\)" balanced-brackets "\\(\\]\\)")
(1 tuareg-font-lock-extension-node-face)
(2 tuareg-font-lock-extension-node-face))
(,(concat "[^;];\\(" maybe-infix-extension "\\)")
1 tuareg-font-lock-infix-extension-node-face)
(,(concat "\\_<\\(function\\)\\_>\\(" maybe-infix-ext+attr "\\)"
tuareg--whitespace-re "\\(" lid "\\)?")
(1 font-lock-keyword-face)
(2 tuareg-font-lock-infix-extension-node-face keep)
(3 font-lock-variable-name-face nil t))
(,(concat "\\_<\\(fun\\|match\\)\\_>\\(" maybe-infix-ext+attr "\\)")
(1 font-lock-keyword-face)
(2 tuareg-font-lock-infix-extension-node-face keep))
;; "type" to introduce a local abstract type considered a keyword
(,(concat "( *\\(type\\) +\\(" lid " *\\)+)")
(1 font-lock-keyword-face)
(2 font-lock-type-face))
(":[\n]? *\\(\\<type\\>\\)"
(1 font-lock-keyword-face))
;; (lid: t), before function definitions
(,(concat "(" lid " *:\\(['_A-Za-z]"
balanced-braces-no-string "\\))")
1 font-lock-type-face keep)
;; "module type of" module-expr (here "of" is a governing
;; keyword). Must be before the modules highlighting.
(,(concat "\\<\\(module +type +of\\)\\>\\(?: +\\("
module-path "\\)\\)?")
(1 tuareg-font-lock-governing-face keep)
(2 tuareg-font-lock-module-face keep t))
;; First class modules. In these contexts, "val" and "module"
;; are not considered as "governing" (main structure of the code).
(,(concat "( *\\(module\\) +\\(" module-path "\\) *\\(?:: *\\("
balanced-braces-no-string "\\)\\)?)")
(1 font-lock-keyword-face)
(2 tuareg-font-lock-module-face)
(3 tuareg-font-lock-module-face keep t))
(,(concat "( *\\(val\\) +\\("
balanced-braces-no-end-operator "\\): +\\("
balanced-braces-no-string "\\))")
(1 font-lock-keyword-face)
(2 tuareg-font-lock-module-face)
(3 tuareg-font-lock-module-face))
(,(concat "\\_<\\(module\\)\\(" maybe-infix-ext+attr "\\)"
"\\(\\(?: +type\\)?\\(?: +rec\\)?\\)\\>\\(?: *\\("
uid "\\)\\)?")
(1 tuareg-font-lock-governing-face)
(2 tuareg-font-lock-infix-extension-node-face)
(3 tuareg-font-lock-governing-face)
(4 tuareg-font-lock-module-face keep t))
("\\_<let +exception\\_>" . tuareg-font-lock-governing-face)
(,(concat (regexp-opt '("sig" "struct" "functor" "inherit"
"initializer" "object" "begin")
"\\(" maybe-infix-ext+attr "\\)")
(1 tuareg-font-lock-governing-face)
(2 tuareg-font-lock-infix-extension-node-face keep))
(,(regexp-opt '("constraint" "in" "end") 'symbols)
. tuareg-font-lock-governing-face)
,@(if (tuareg-editing-ls3)
`((,(concat "\\<\\(let[ \t]+" let-ls3 "\\)\\>")
. tuareg-font-lock-governing-face)))
;; "with type": "with" treated as a governing keyword
(,(concat "\\<\\(\\(?:with\\|and\\) +type\\(?: +nonrec\\)?\\_>\\) *"
"\\(" typeconstr "\\)?")
(1 tuareg-font-lock-governing-face keep)
(2 font-lock-type-face keep t))
(,(concat "\\<\\(\\(?:with\\|and\\) +module\\>\\) *\\(?:\\("
module-path "\\) *\\)?\\(?:= *\\("
extended-module-path "\\)\\)?")
(1 tuareg-font-lock-governing-face keep)
(2 tuareg-font-lock-module-face keep t)
(3 tuareg-font-lock-module-face keep t))
;; "!", "mutable", "virtual" treated as governing keywords
(,(concat "\\<\\(\\(?:val\\(" maybe-infix-ext+attr "\\)"
(if (tuareg-editing-ls3) "\\|reset\\|do")
"\\)!? +\\(?:mutable\\(?: +virtual\\)?\\_>"
"\\|virtual\\(?: +mutable\\)?\\_>\\)"
"\\|val!\\(" maybe-infix-ext+attr "\\)\\)"
"\\(?: *\\(" lid "\\)\\)?")
(2 tuareg-font-lock-infix-extension-node-face keep t)
(3 tuareg-font-lock-infix-extension-node-face keep t)
(1 tuareg-font-lock-governing-face keep t)
(4 font-lock-variable-name-face nil t))
;; "val" without "!", "mutable" or "virtual"
(,(concat "\\_<\\(val\\)\\_>\\(" maybe-infix-ext+attr "\\)"
"\\(?: +\\(" lid "\\)\\)?")
(1 tuareg-font-lock-governing-face keep)
(2 tuareg-font-lock-infix-extension-node-face keep)
(3 font-lock-function-name-face keep t))
;; "private" treated as governing keyword
(,(concat "\\(\\<method!?\\(?: +\\(?:private\\(?: +virtual\\)?"
"\\|virtual\\(?: +private\\)?\\)\\)?\\>\\)"
" *\\(" lid "\\)?")
(1 tuareg-font-lock-governing-face keep t)
(2 font-lock-function-name-face keep t)); method name
(,(concat "\\<\\(open\\(?:! +\\|\\> *\\)\\)\\(" module-path "\\)?")
(1 tuareg-font-lock-governing-face)
(2 tuareg-font-lock-module-face keep t))
;; (expr: t) and (expr :> t) If `t' is longer then one
;; word, require a space before. Not only this is more
;; readable but it also avoids that `~label:expr var` is
;; taken as a type annotation when surrounded by
;; parentheses. Done last so that it does not apply if
;; already highlighted (let x : t = u in ...) but before
;; module paths (expr : X.t).
(,(concat "(" balanced-braces-no-end-operator ":>? *\\(?:\n *\\)?"
"\\(['_A-Za-z]" balanced-braces-no-string
"\\|(" balanced-braces-no-string ")"
1 font-lock-type-face)
;; module paths A.B.
(,(concat module-path "\\.") . tuareg-font-lock-module-face)
,@(and tuareg-support-metaocaml
1 tuareg-font-lock-multistage-face)))
;; External function declaration
(,(concat "\\_<\\(external\\)\\_>\\(?: +\\(" lid "\\)\\)?")
(1 tuareg-font-lock-governing-face)
(2 font-lock-function-name-face keep t))
;; Binding operators
(,(concat "( *\\(\\(?:let\\|and\\)\\_>"
binding-operator-char "\\) *)")
1 font-lock-function-name-face)
;; Highlight "let" and function names (their argument
;; patterns can then be treated uniformly with variable bindings)
(,(concat let-binding-g4 " *\\(?:\\(" lid "\\) *"
"\\(?:[^ =,:a]\\|a\\(?:[^s]\\|s[^[:space:]]\\)\\)\\)?")
(1 tuareg-font-lock-governing-face keep t)
(2 tuareg-font-lock-infix-extension-node-face keep t)
(3 tuareg-font-lock-governing-face keep t)
(4 tuareg-font-lock-governing-face keep t)
(5 font-lock-function-name-face keep t))
(,(concat "\\_<\\(include\\)\\_>\\(?: +\\("
extended-module-path "\\|( *"
extended-module-path " *: *" balanced-braces " *)\\)\\)?")
(1 tuareg-font-lock-governing-face)
(2 tuareg-font-lock-module-face keep t))
;; module type A = B
(,(concat "\\_<\\(module +type\\)\\_>\\(?: +" id
" *= *\\(" modtype-path "\\)\\)?")
(1 tuareg-font-lock-governing-face)
(2 tuareg-font-lock-module-face keep t))
;; "class [params] name"
(,(concat gclass-gparams "\\(" lid "\\)?")
(1 tuareg-font-lock-governing-face keep)
(2 font-lock-type-face keep t)
(3 font-lock-function-name-face keep t))
;; "type lid" anywhere (e.g. "let f (type t) x =")
;; introduces a new type
(,(concat "\\_<\\(type\\_>\\)\\(" maybe-infix-ext+attr
"\\)\\(?: +\\(nonrec\\_>\\)\\)?\\(?:"
"\\(" typedef "\\)\\)?")
(1 tuareg-font-lock-governing-face)
(2 tuareg-font-lock-infix-extension-node-face keep)
(3 tuareg-font-lock-governing-face keep t)
(4 font-lock-type-face keep t))))
`(;; Basic way of matching functions
(,(concat let-binding-g4 " *\\("
lid "\\) *= *\\(fun\\(?:ction\\)?\\)\\>")
(5 font-lock-function-name-face)
(6 font-lock-keyword-face))
`((,(regexp-opt '("true" "false" "__LOC__" "__FILE__" "__LINE__"
"__MODULE__" "__POS__" "__LOC_OF__" "__LINE_OF__"
. font-lock-constant-face)
(,(let ((kwd '("as" "do" "done" "downto" "else" "for" "if"
"then" "to" "try" "when" "while" "new"
"lazy" "assert" "exception")))
(if (tuareg-editing-ls3)
(progn (push "reset" kwd) (push "merge" kwd)
(push "emit" kwd) (push "period" kwd)))
(regexp-opt kwd 'symbols))
. font-lock-keyword-face)
(,(concat "\\_<exception +\\(" uid "\\)")
1 tuareg-font-lock-constructor-face)
;; (M: S) -- only color S here (may be "A.T with type t = s")
(,(concat "( *" uid " *: *\\("
modtype-path "\\(?: *\\_<with\\_>"
balanced-braces "\\)?\\) *)")
1 tuareg-font-lock-module-face keep)
;; module A(B: _)(C: _) : D = E, including "module A : E"
(,(concat "\\_<module +" uid tuareg--whitespace-re
"\\(\\(?:( *" uid " *: *"
modtype-path "\\(?: *\\_<with\\_>" balanced-braces "\\)?"
" *)" tuareg--whitespace-re "\\)*\\)\\(?::"
tuareg--whitespace-re "\\(" modtype-path
"\\) *\\)?\\(?:=" tuareg--whitespace-re
"\\(" extended-module-path "\\)\\)?")
(1 font-lock-variable-name-face keep); functor (module) variable
(2 tuareg-font-lock-module-face keep t)
(3 tuareg-font-lock-module-face keep t))
(,(concat "\\_<functor\\> *( *\\(" uid "\\) *: *\\("
modtype-path "\\) *)")
(1 font-lock-variable-name-face keep); functor (module) variable
(2 tuareg-font-lock-module-face keep))
;; Other uses of "with", "mutable", "private", "virtual"
(,(regexp-opt '("of" "with" "mutable" "private" "virtual") 'symbols)
. font-lock-keyword-face)
;; labels
(,(concat "\\([?~]" lid "\\)" tuareg--whitespace-re ":[^:>=]")
1 tuareg-font-lock-label-face keep)
;; label in a type signature
(,(concat "\\(?:->\\|:[^:>=]\\)" tuareg--whitespace-re
"\\(" lid "\\)[ \t]*:[^:>=]")
1 tuareg-font-lock-label-face keep)
;; Polymorphic variants (take precedence on builtin names)
(,(concat "`" id) . tuareg-font-lock-constructor-face)
(,(regexp-opt '("failwith" "failwithf" "exit" "at_exit" "invalid_arg"
"parser" "raise" "raise_notrace" "ref" "ignore"
"Match_failure" "Assert_failure" "Invalid_argument"
"Failure" "Not_found" "Out_of_memory" "Stack_overflow"
"Sys_error" "End_of_file" "Division_by_zero"
"Sys_blocked_io" "Undefined_recursive_module")
. font-lock-builtin-face)
("\\[[ \t]*\\]" . tuareg-font-lock-constructor-face) ; []
("[])a-zA-Z0-9 \t]\\(::\\)[[(a-zA-Z0-9 \t]" ; :: (not not ::…)
1 tuareg-font-lock-constructor-face)
;; Constructors
(,(concat "\\(" uid "\\)[^.]") 1 tuareg-font-lock-constructor-face)
(,(concat "\\_<let +exception +\\(" uid "\\)")
1 tuareg-font-lock-constructor-face)
;; let-bindings (let f : type = fun)
(,(concat let-binding-g4 " *\\(" lid "\\) *\\(?:: *\\([^=]+\\)\\)?= *"
(5 font-lock-function-name-face nil t)
(6 font-lock-type-face keep t))
;; let binding variables
(,(concat "\\(?:" let-binding-g4 "\\|" gclass-gparams "\\)")
(tuareg--pattern-vars-matcher (tuareg--pattern-pre-form-let) nil
(0 font-lock-variable-name-face keep))
(tuareg--pattern-maybe-type-matcher nil nil ; def followed by type
(1 font-lock-type-face keep)))
(,(concat "\\_<fun\\_>" maybe-infix-ext+attr)
(tuareg--pattern-vars-matcher (tuareg--pattern-pre-form-fun) nil
(0 font-lock-variable-name-face keep)))
(,(concat "\\_<method!? +\\(" lid "\\)")
(1 font-lock-function-name-face keep t); method name
(tuareg--pattern-vars-matcher (tuareg--pattern-pre-form-let) nil
(0 font-lock-variable-name-face keep))
(tuareg--pattern-maybe-type-matcher nil nil ; method followed by type
(1 font-lock-type-face keep)))
(,(concat "\\_<object *(\\(" lid "\\) *\\(?:: *\\("
balanced-braces "\\)\\)?)")
(1 font-lock-variable-name-face)
(2 font-lock-type-face keep t))
(,(concat "\\_<object *( *\\(" typevar "\\|_\\) *)")
1 font-lock-type-face)
,@(and tuareg-font-lock-symbols
(append common-keywords
(,(concat "( *\\([-=<>@^|&+*/$%!]" operator-char
"*\\|[#?~]" operator-char "+\\) *)")
1 font-lock-function-name-face)
;; By default do no highlight relation operators (=, <, >) nor
;; arithmetic operators because it is slow. However,
;; optionally allow it by popular demand.
,@(if tuareg-highlight-all-operators
;; Highlight "@", "+",... after "let…[@…]" but before
;; "let" rules remove the highlighting of "=".
`((,(concat before-operator-char
"\\([=<>@^&+*/$%!]" operator-char "*\\|:=\\|"
"[|#?~]" operator-char "+\\)")
1 tuareg-font-lock-operator-face)
;; "-" is special: avoid "->" and "-13"
(,(concat "\\(-\\)\\(?:[^0-9>]\\|\\("
operator-char-no> operator-char "*\\)\\)")
(1 tuareg-font-lock-operator-face)
(2 tuareg-font-lock-operator-face keep t))
(,(regexp-opt '("type" "module" "module type"
"val" "val mutable")
(tuareg--pattern-equal-matcher nil nil nil)))
`((,(concat "[@^&$%!]" operator-char "*\\|"
"[|#?~]" operator-char "+")
. tuareg-font-lock-operator-face)))
(if (tuareg-editing-ls3)
'("asr" "asl" "lsr" "lsl" "or" "lor" "and" "land" "lxor"
"not" "lnot" "mod" "fby" "pre" "last" "at")
'("asr" "asl" "lsr" "lsl" "or" "lor" "land"
"lxor" "not" "lnot" "mod"))
1 tuareg-font-lock-operator-face)
(setq font-lock-defaults
nil nil
,tuareg-font-lock-syntax nil
. tuareg-font-lock-syntactic-face-function)))
;; (push 'smie-backward-sexp-command font-lock-extend-region-functions)
(defvar tuareg--pattern-matcher-limit 0
"Limit for the matcher of function arguments")
(make-variable-buffer-local 'tuareg--pattern-matcher-limit)
(defvar tuareg--pattern-matcher-type-limit 0
"Limit for the type of a let bound definition.")
(make-variable-buffer-local 'tuareg--pattern-matcher-type-limit)
(defun tuareg--font-lock-in-string-or-comment ()
"Returns t if the point is inside a string or a comment.
This based on the fontification and is faster than calling `syntax-ppss'.
It must not be used outside fontification purposes."
(let* ((face (get-text-property (point) 'face)))
(and (symbolp face)
(memq face '(font-lock-comment-face
(defun tuareg--pattern-pre-form-let ()
"Return the position of \"=\" marking the end of \"let\"."
(if (or (tuareg--font-lock-in-string-or-comment)
(looking-at "[ \t\n]*open\\_>") ; "let open"
(looking-at "[ \t\n]*exception\\_>")) ; "let exception"
(progn ; bail out
(setq tuareg--pattern-matcher-limit (point))
(setq tuareg--pattern-matcher-type-limit (point)))
(let* ((opoint (point))
(limit (+ opoint 800))
(setq tuareg--pattern-matcher-limit nil)
(while (and
(setq pos (re-search-forward "[=({:]" limit t))
((memq (char-after) '(?\( ?\{))
;; Skip balanced braces
(if (ignore-errors (forward-list))
(goto-char (1- pos))
nil)) ; If braces are not balanced, stop.
((char-equal ?: (char-after))
;; Make sure it is not a label
(skip-chars-backward "a-zA-Z0-9_'")
(if (not (memq (char-before) '(?~ ??)))
(setq tuareg--pattern-matcher-limit (1- pos)))
(goto-char pos)
(t nil)))))
(setq tuareg--pattern-matcher-type-limit (1+ (point))); include "="
(unless tuareg--pattern-matcher-limit
(setq tuareg--pattern-matcher-limit (point)))
;; Remove any possible highlighting on "="
(unless (eobp)
(put-text-property (point) (1+ (point)) 'face nil))
;; move the point back for the sub-matcher
(goto-char opoint))
(put-text-property (point) tuareg--pattern-matcher-limit
'font-lock-multiline t)
(defun tuareg--pattern-pre-form-fun ()
"Return the position of \"->\" marking the end of \"fun\"."
(if (tuareg--font-lock-in-string-or-comment)
(setq tuareg--pattern-matcher-limit (point))
(let* ((opoint (point))
(limit (+ opoint 800))
(while (and
(setq pos (re-search-forward "[-({]" limit t))
((or (char-equal ?\( (char-before))
(char-equal ?{ (char-before)))
(if (ignore-errors (forward-list))
(goto-char (1- pos))
nil)) ; If braces are not balanced, stop.
(t (not (char-equal ?> (char-after)))))))
(setq tuareg--pattern-matcher-limit (point))
;; move the point back for the sub-matcher
(goto-char opoint)
(put-text-property (point) tuareg--pattern-matcher-limit
'font-lock-multiline t))
(defun tuareg--pattern-equal-matcher (limit)
"Find \"=\" and \"+=\" and remove its highlithing."
(unless (tuareg--font-lock-in-string-or-comment)
(let (pos)
(while (and
(<= (point) limit)
(setq pos (re-search-forward "[+=({[]" limit t))
((or (char-equal ?\( (char-after))
(char-equal ?{ (char-after))
(char-equal ?\[ (char-after)))
;; Skip balanced braces
(if (ignore-errors (forward-list))
(goto-char (1- pos))
nil)) ; If braces are not balanced, stop.
((char-equal ?+ (char-after))
(if (char-equal ?= (char-after pos))
(put-text-property (point) (1+ pos) 'face nil)
(put-text-property (point) (1+ (point)) 'face nil))))))
(defun tuareg--pattern-vars-matcher (_limit)
"Match a variable name after the point.
If it succeeds, it moves the point after the variable name and set
`match-data'. See e.g., `font-lock-keywords'."
(when (and (<= (point) tuareg--pattern-matcher-limit)
(re-search-forward tuareg--lid-re tuareg--pattern-matcher-limit t))
(skip-chars-forward " \t\n")
(if (>= (point) tuareg--pattern-matcher-limit)
((char-equal ?= (char-after))
;; Remove possible fontification of "=" (e.g. as an operator)
(put-text-property (point) (1+ (point)) 'face nil)
;; Decide whether it is ?(v =...) or {x = v; x = v}
(goto-char (match-beginning 0))
(skip-chars-backward " \t\n")
(if (char-equal (char-before) ?\() ; (var = expr)
(progn (up-list) ; keep match, skip expr
;; This is a label record, variable after
(goto-char (match-end 0))
(re-search-forward tuareg--lid-re tuareg--pattern-matcher-limit t)))
((char-equal ?: (char-after))
(let ((beg-ty (1+ (point))))
(goto-char (match-beginning 0))
(skip-chars-backward " \t\n")
(if (char-equal (char-before) ?\() ; (var : t)
(up-list) ; keep match, skip type
(put-text-property beg-ty (1- (point)) 'face
(goto-char (match-end 0)))))
(t t)))))
(defun tuareg--pattern-maybe-type-matcher (limit)
"Match a possible type after a let binding.
Run only once."
;; This function is needed because we want to ensure that the search
;; is bounded by the detected "=".
(when (and (<= (point) tuareg--pattern-matcher-type-limit)
(re-search-forward "[ \t\n]*:\\([^=]+\\)="
tuareg--pattern-matcher-type-limit t))
(goto-char limit) ; Do not run a second time
;; Keymap
;; Functions from the caml-mode package that may or may not be
;; available during compilation.
(declare-function caml-help "caml-help" (arg))
(declare-function ocaml-open-module "caml-help" (arg))
(declare-function ocaml-close-module "caml-help" (arg))
(declare-function ocaml-add-path "caml-help" (dir &optional path))
(declare-function caml-types-explore "caml-types" (event))
(declare-function caml-types-mouse-ignore "caml-types" (event))
(declare-function caml-types-show-ident "caml-types" (arg))
(declare-function caml-types-show-call "caml-types" (arg))
(declare-function caml-types-show-type "caml-types" (arg))
(defvar tuareg-mode-map
(let ((map (make-sparse-keymap)))
(define-key map "\M-q" #'tuareg-indent-phrase)
(define-key map [?\C-c ?\C-\;] #'tuareg-comment-dwim)
(define-key map "\C-c\C-q" #'tuareg-indent-phrase)
(define-key map "\C-c\C-a" #'tuareg-find-alternate-file)
(define-key map "\C-c\C-c" #'compile)
(define-key map "\C-c\C-w" #'tuareg-opam-update-env)
(define-key map "\M-\C-x" #'tuareg-eval-phrase)
(define-key map "\C-x\C-e" #'tuareg-eval-phrase)
(define-key map "\C-c\C-e" #'tuareg-eval-phrase)
(define-key map "\C-c\C-r" #'tuareg-eval-region)
(define-key map "\C-c\C-b" #'tuareg-eval-buffer)
(define-key map "\C-c\C-s" #'tuareg-run-ocaml)
(define-key map "\C-c\C-z" #'tuareg-switch-to-repl)
(define-key map "\C-c\C-i" #'tuareg-interrupt-ocaml)
(define-key map "\C-c\C-k" #'tuareg-kill-ocaml)
(define-key map "\C-c`" #'tuareg-interactive-next-error-source)
(define-key map "\C-c?" #'tuareg-interactive-next-error-source)
(define-key map "\C-c.c" #'tuareg-insert-class-form)
(define-key map "\C-c.b" #'tuareg-insert-begin-form)
(define-key map "\C-c.f" #'tuareg-insert-for-form)
(define-key map "\C-c.w" #'tuareg-insert-while-form)
(define-key map "\C-c.i" #'tuareg-insert-if-form)
(define-key map "\C-c.l" #'tuareg-insert-let-form)
(define-key map "\C-c.m" #'tuareg-insert-match-form)
(define-key map "\C-c.t" #'tuareg-insert-try-form)
(when tuareg-with-caml-mode-p
;; Trigger caml-types
(define-key map [?\C-c ?\C-t] #'caml-types-show-type) ; "type"
(define-key map [?\C-c ?\C-f] #'caml-types-show-call) ; "function"
(define-key map [?\C-c ?\C-l] #'caml-types-show-ident) ; "let"
;; To prevent misbehavior in case of error during exploration.
(define-key map [?\C-c mouse-1] #'caml-types-mouse-ignore)
(define-key map [?\C-c down-mouse-1] #'caml-types-explore)
;; Trigger caml-help
(define-key map [?\C-c ?\C-i] #'ocaml-add-path)
(define-key map [?\C-c ?\[] #'ocaml-open-module)
(define-key map [?\C-c ?\]] #'ocaml-close-module)
(define-key map [?\C-c ?\C-h] #'caml-help)
(define-key map [?\C-c ?\t] #'tuareg-complete))
"Keymap used in Tuareg mode.")
(defvar tuareg-electric-indent-keywords
'("module" "class" "functor" "object" "type" "val" "inherit"
"include" "virtual" "constraint" "exception" "external" "open"
"method" "and" "initializer" "to" "downto" "do" "done" "else"
"begin" "end" "let" "in" "then" "with"))
(defun tuareg--electric-indent-predicate (char)
"Check whether we should auto-indent.
For use on `electric-indent-functions'."
(tuareg-backward-char);; Go before the inserted char.
(let ((syntax (char-syntax char)))
(if (tuareg-in-indentation-p)
(or (eq char ?|) (eq syntax ?\)))
(or (pcase char
(`?\) (char-equal ?* (preceding-char)))
(`?\} (and (char-equal ?> (preceding-char))
(progn (tuareg-backward-char)
(`?\] (and (char-equal ?| (preceding-char))
(progn (tuareg-backward-char)
(and tuareg-electric-indent
(not (eq syntax ?w))
(let ((end (point)))
(skip-syntax-backward "w_")
(member (buffer-substring (point) end)
(defun tuareg--electric-close-vector ()
;; Function for use on post-self-insert-hook.
(when tuareg-electric-close-vector
(let ((inners (cdr (assq last-command-event
'((?\} ?> "{<") (?\] ?| "\\[|"))))))
(and inners
(eq (char-before) last-command-event) ;; Sanity check.
(not (eq (car inners) (char-before (1- (point)))))
(not (tuareg-in-literal-or-comment-p))
(when (ignore-errors (backward-sexp 1) t)
(looking-at (nth 1 inners))))
(goto-char (1- (point)))
(insert (car inners)))))))
;;; SMIE
;; TODO:
;; - Obey tuareg-*-indent customization variables.
;; - Fix use of tuareg-indent-command in tuareg-auto-fill-insert-leading-star.
;; - Use it by default (when possible).
;; - Move the old indentation code to a separate file.
(defconst tuareg-smie-grammar
;; Problems:
;; - "let D in E" expression vs "let D" declaration. This is solved
;; by making the lexer return "d-let" for the second case.
;; - FIXME: SMIE assumes that concatenation binds tighter than
;; everything else, whereas OCaml gives tighter precedence to ".".
;; - "x : t1; (y : (t2 -> t3)); z : t4" but
;; "when (x1; x2) -> (z1; z2)". We solve this by distinguishing
;; the two kinds of arrows, using "t->" for the type arrow.
;; - The "with" in modules's "with type" has different precedence.
;; - Big problem with "if...then": because of SMIE's transitivity of the
;; precedence relation, we can't properly parse both "if A then B; C" and
;; "if A then let x = E in B; C else D" (IOW I think a non-transitive OPG
;; could do it). We could try and fix the problem in the lexer, but it's
;; far from obvious how (we'd probably end up having to pre-parse the text
;; in the lexer to decide which kind of "if" and "then" we're looking
;; at). A good solution could be found maybe if SMIE let us disambiguate
;; lexemes late, i.e. at a time where we have access to the relevant parse
;; stack. Or maybe by allowing smie-grammar to use a non-transitive
;; precedence relation. But until that happens, we will live with an
;; incorrect parse, and instead we try to patch up the result with ad-hoc
;; hacks in tuareg-smie-rules.
;; - The "<module-type> with <mod-constraints>" syntax introduces many
;; conflicts:
;; "... with module M = A with module B = C"
;; vs "... module M = A with module B = C"
;; In the first, the second "with" should either have the first "with" as
;; sibling, or have some earlier construct as parent, whereas in the second
;; the "with" should have the first "=" (or maybe the first "module", tho
;; that would not correspond to the actual language syntax and would
;; probably break other cases) as parent. Other problems in this
;; mod-constraints syntax: we need a precedence along the lines of
;; "with" < "and" < "module/type", whereas the rest of the syntax wants
;; "module/type" < "and" < "with", so basically all the keywords involved
;; in mod-constraints need to be handled specially in the lexer :-(
;; - and then some...
(let ((bnfprec2
'((decls (decls "type" decls) (decls "d-let" decls)
(decls "and" decls) (decls ";;" decls)
(decls "exception" decls)
(decls "module" decls)
(decls "class" decls)
(decls "val" decls) (decls "external" decls)
(decls "open" decls) (decls "include" decls)
;; Hack: at the top-level, a "let D in E" can appear in
;; decls as well, but the lexer classifies it as "d-let",
;; so we need to make sure that "d-let D in E" doesn't
;; end up matching the "in" with some far away thingy.
(def-in-exp (defs "in" exp))
(def (var "d=" exp) (id "d=" datatype) (id "d=" module))
(idtype (id ":" type))
(var (id) ("m-type" var) ("d-type" var) ("c-type" var) ("rec" var)
("private" var) (idtype)
("l-module" var) ("l-class" var))
(exception (id "of" type))
(datatype ("{" typefields "}") (typebranches)
(typebranches "with" id))
(typebranches (typebranches "|" typebranches) (id "of" type))
(typefields (typefields ";" typefields) (idtype))
(type (type "*…" type) (type "t->" type)
;; ("<" ... ">") ;; FIXME!
(type "as" id))
(module ("struct" decls "end")
("sig" decls "end")
("functor" id "->" module)
(module "m-with" mod-constraints))
(simpledef (id "c=" type))
(mod-constraints (mod-constraints "m-and" mod-constraints)
("w-type" simpledef)
("w-module" simpledef))
;; exp1 is "all exps except for `if exp then'".
(exp1 ("begin" exp "end")
("(" exp:type ")")
("[|" exp "|]")
("{" fields "}")
("if" exp "then" exp1 "else" exp1)
;; ("if" exp "then" exp)
("while" exp "do" exp "done")
("for" forbounds "do" exp "done")
(exp1 ";" exp1)
("match" exp "with" branches)
("function" branches)
("fun" patterns* "->" exp1)
("try" exp "with" branches)
("let" defs "in" exp1)
("let" "exception-let" exception "in" exp1)
("object" class-body "end")
("(" exp:>type ")")
("{<" fields ">}")
;; MetaOCaml thingies.
;; Let's not do anything special for .~ for now,
;; as for !. it's deprecated anyway!
(".<" exp ">."))
;; Like `exp' but additionally allow if-then without else.
(exp (exp1) ("if" exp "then" exp))
(forbounds (iddef "to" exp) (iddef "downto" exp))
(defs (def) (defs "and" defs) ("l-open" id))
(exp:>type (exp:type ":>" type))
(exp:type (exp)) ;; (exp ":" type)
(fields (fields1) (exp "with" fields1))
(fields1 (fields1 ";" fields1) (iddef))
(iddef (id "f=" exp1))
(branches (branches "|" branches) (branch))
(branch (patterns "->" exp1))
(patterns* ("-dlpd-" patterns*) (patterns)) ;See use of "-dlpd-".
(patterns (pattern) (pattern "when" exp1)
;; Since OCaml 4.02, `match' expressions allow
;; `exception' branches.
("exception-case" pattern))
(pattern (id) (pattern "as" id) (pattern "|-or" pattern)
(pattern "," pattern))
(class-body (class-body "inherit" class-body)
(class-body "method" class-body)
(class-body "initializer" class-body)
(class-body "val" class-body)
(class-body "constraint" class-body)
(class-field (exp) ("mutable" idtype)
("virtual" idtype) ("private" idtype))
;; We get cyclic dependencies between ; and | because things like
;; "branches | branches" implies that "; > |" whereas "exp ; exp"
;; implies "| > ;" and while those two do not directly conflict
;; because they're constraints on precedences of different sides,
;; they do introduce a cycle later on because those operators are
;; declared associative, which adds a constraint that both sides
;; must be of equal precedence. So we declare here a dummy rule
;; to force a direct conflict, that we can later resolve with
;; explicit precedence rules.
(foo1 (foo1 ";" foo1) (foo1 "|" foo1))
;; "mutable x : int ; y : int".
(foo2 ("mutable" id) (foo2 ";" foo2))
;; Type precedence rules.
'((nonassoc "as") (assoc "t->") (assoc "*…"))
;; Pattern precedence rules.
'((nonassoc "as") (assoc "|-or") (assoc ",") (assoc "::"))
;; Resolve "{a=(1;b=2)}" vs "{(a=1);(b=2)}".
'((nonassoc ";") (nonassoc "f="))
;; Resolve "(function a -> b) | c -> d".
'((nonassoc "function") (nonassoc "|"))
;; Resolve "when (function a -> b) -> c".
'((nonassoc "function") (nonassoc "->"))
;; Resolve ambiguity "(let d in e2); e3" vs "let d in (e2; e3)".
'((nonassoc "in" "match" "->" "with") (nonassoc ";"))
;; Resolve "(if a then b else c);d" vs "if a then b else (c; d)".
'((nonassoc ";") (nonassoc "else")) ;; ("else" > ";")
;; Resolve "match e1 with a → (match e2 with b → e3 | c → e4)"
;; vs "match e1 with a → (match e2 with b → e3) | c → e4"
'((nonassoc "with") (nonassoc "|"))
;; Resolve "functor A -> (M with MC)".
'((nonassoc "->") (nonassoc "m-with"))
;; Resolve the conflicts caused by "when" and by SMIE's assumption
;; that all non-terminals can match the empty string.
'((nonassoc "with") (nonassoc "->")) ; "when (match a with) -> e"
'((nonassoc "|") (nonassoc "->")) ; "when (match a with a|b) -> e"
;; Fix up conflict between (decls "and" decls) and (defs "in" exp).
'((nonassoc "in") (nonassoc "and"))
;; Resolve the "artificial" conflict introduced by the `foo1' rule.
'((assoc "|") (assoc ";"))
;; Fix up associative declaration keywords.
'((assoc "type" "d-let" "exception" "module" "val" "open"
"external" "include" "class" ";;")
(assoc "and"))
'((assoc "val" "method" "inherit" "constraint" "initializer"))
;; Declare associativity of remaining sequence separators.
'((assoc ";")) '((assoc "|")) '((assoc "m-and")))))
;; (dolist (pair '()) ;; ("then" . "|") ("|" . "then")
;; (display-warning 'prec2 (format "%s %s %s"
;; (car pair)
;; (gethash pair bnfprec2)
;; (cdr pair))))
;; SMIE takes for granted that all non-terminals can match the empty
;; string, which can lead to the addition of unnecessary constraints.
;; Let's remove the ones that cause cycles without causing conflicts.
;; This comes from "exp ; exp" and "function branches", where
;; SMIE doesn't realize that `branches' has to have a -> before ;.
(cl-assert (eq '> (gethash (cons "function" ";") bnfprec2)))
(remhash (cons "function" ";") bnfprec2))
;; Precedence of operators.
'((nonassoc ".")
;; function application, constructor application, assert, lazy
;; - -. (prefix) –
(right "**…" "lsl" "lsr" "asr")
(nonassoc "*…" "/…" "%…" "mod" "land" "lor" "lxor")
(left "+…" "-…")
(assoc "::")
(right "@…" "^…")
(left "=…" "<…" ">…" "|…" "&…" "$…")
(right "&" "&&")
(right "or" "||")
(assoc ",")
(right "<-" ":=")
(assoc ";"))))))))
(defun tuareg-smie--search-backward (tokens)
(let (tok)
(while (progn
(setq tok (tuareg-smie--backward-token))
(if (not (zerop (length tok)))
(not (member tok tokens))
(unless (bobp)
(condition-case err
(progn (backward-sexp) t)
(setq tok (buffer-substring (nth 3 err) (1+ (nth 3 err))))
(defun tuareg-smie--search-forward (tokens)
(let (tok)
(while (progn
(setq tok (tuareg-smie--forward-token))
(if (not (zerop (length tok)))
(not (member tok tokens))
(unless (eobp)
(condition-case err
(progn (forward-sexp) t)
(setq tok (buffer-substring (nth 2 err) (nth 3 err)))
(defun tuareg-skip-blank-and-comments ()
(forward-comment (point-max)))
(defconst tuareg-smie--type-label-leader
'("->" ":" "=" ""))
(defconst tuareg-smie--exp-operator-leader
(delq nil (mapcar (lambda (x) (if (numberp (nth 2 x)) (car x)))
(defconst tuareg-smie--float-re "[0-9]+\\(?:\\.[0-9]*\\)?\\(?:e[-+]?[0-9]+\\)")
(defun tuareg-smie--forward-token ()
(let ((start (point))
(end nil))
(if (zerop (skip-syntax-forward "."))
(let ((start (point)))
(skip-syntax-forward "w_'")
;; Watch out for floats!
(and (memq (char-after) '(?- ?+))
(eq (char-before) ?e)
(goto-char start)
(looking-at tuareg-smie--float-re))
(goto-char (match-end 0))))
;; The "." char is given symbol property so that "M.x" is
;; considered as a single symbol, but in reality, it's part of the
;; operator chars, since "+." and friends are operators.
(while (not (and (zerop (skip-chars-forward "."))
(zerop (skip-syntax-forward ".")))))
(when (and (eq (char-before) ?%)
(looking-at "[[:alpha:]]+"))
;; Infix extension nodes, bug#121
(setq end (1- (point)))
(goto-char (match-end 0))))
(buffer-substring-no-properties start (or end (point)))))
(defun tuareg-smie--backward-token ()
(forward-comment (- (point)))
(let ((end (point)))
(if (and (zerop (skip-chars-backward "."))
(zerop (skip-syntax-backward ".")))
(skip-syntax-backward "w_'")
;; Watch out for floats!
(pcase (char-before)
((or `?- `?+)
(and (memq (char-after) '(?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 ?0))
(forward-char -1) (skip-syntax-backward "w_")
(looking-at tuareg-smie--float-re))
(>= (match-end 0) (point))
(goto-char (match-beginning 0))))
(`?% ;extension node, bug#121
(let ((pos (point)))
(forward-char -1)
(if (and (zerop (skip-chars-backward "."))
(zerop (skip-syntax-backward ".")))
(goto-char pos)
(setq end (1- pos)))))))
((memq (char-after) '(?\; ?,)) nil) ; ".;" is not a token.
((and (eq (char-after) ?\.)
(memq (char-before) '(?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 ?0)))
(skip-chars-backward "0-9")) ; A float number!
(t ;; The "." char is given symbol property so that "M.x" is
;; considered as a single symbol, but in reality, it's part of
;; the operator chars, since "+." and friends are operators.
(while (not (and (zerop (skip-chars-backward "."))
(zerop (skip-syntax-backward "."))))))))
(buffer-substring-no-properties (point) end)))
(defun tuareg-smie-forward-token ()
"Move point to the end of the next token and return its SMIE name."
(let ((tok (tuareg-smie--forward-token)))
((zerop (length tok))
(if (not (looking-at "{<\\|\\[|"))
(goto-char (match-end 0))
(match-string 0)))
((and (equal tok "|") (looking-at-p "\\]")) (forward-char 1) "|]")
((and (equal tok ">") (looking-at-p "}")) (forward-char 1) ">}")
((and (equal tok ".") (memq (char-after) '(?< ?~)))
(forward-char 1) (string ?. (char-before)))
((or (member tok '("let" "=" "->"
"module" "class" "open" "type" "with" "and"
;; lists
;; the tokens whose precedence is based on their prefix.
(memq (aref tok 0) '(?* ?/ ?% ?+ ?- ?@ ?^ ?= ?< ?> ?| ?& ?$)))
;; When indenting, the movement is mainly backward, so it's OK to make
;; the forward tokenizer a bit slower.
(save-excursion (tuareg-smie-backward-token)))
((and (member tok '("~" "?"))
(looking-at "[[:alpha:]_][[:alnum:]'_]*:"))
(goto-char (match-end 0))
((and (looking-at-p ":\\(?:[^:]\\|\\'\\)")
(string-match-p "\\`[[:alpha:]_]" tok)
(tuareg-smie--backward-token) ;Go back.
(member (tuareg-smie--backward-token)
(forward-char 1)
((string-match-p "\\`[[:alpha:]_].*\\.\\'" tok)
(forward-char -1) (substring tok 0 -1))
(t tok))))
(defconst tuareg-smie--exp-leaders
;; (let ((leaders ()))
;; (dolist (cat tuareg-smie-bnf)
;; (dolist (rule (cdr cat))
;; (setq rule (reverse rule))
;; (while (setq rule (cdr (cl-member 'dummy rule
;; :test (lambda (_ x)
;; (memq x '(exp exp1))))))
;; (push (car rule) leaders))))
;; (prin1-to-string (sort (delete-dups leaders) #'string-lessp)))
;; BEWARE: In let-disambiguate, we compare this against the output of
;; tuareg-smie--backward-token which never returns refined tokens like "d=",
;; so we manually replace those with just "=" here!
'("->" ".<" ";" "[|" "begin" "=" "do" "downto" "else" "if" "in"
"match" "then" "to" "try" "when" "while"))
(defun tuareg-smie--let-disambiguate ()
"Return \"d-let\" if \"let\" at point is a decl, or just \"let\" if it's an exp."
(let ((prev (tuareg-smie--backward-token)))
(if (or (member prev tuareg-smie--exp-leaders)
(if (zerop (length prev))
(and (not (bobp))
;; See if prev char has open-paren syntax.
(eq 4 (mod (car (syntax-after (1- (point)))) 256)))
(and (eq ?. (char-syntax (aref prev 0)))
(and (not (equal prev ";;"))
(let ((tokinfo (assoc prev smie-grammar)))
;; Check that prev is not a closing token like ">."
(or (null tokinfo)
(integerp (nth 2 tokinfo))))))))
(defun tuareg-smie--label-colon-p ()
(and (not (zerop (skip-chars-backward "[:alnum:]_")))
(or (not (zerop (skip-chars-backward "?~")))
(member (tuareg-smie--backward-token)
(defun tuareg-smie--=-disambiguate ()
"Return which kind of \"=\" we've just found.
Point is not moved and should be right in front of the equality.
Return values can be
\"f=\" for field definition,
\"d=\" for a normal definition,
\"c=\" for a type equality constraint, and
\"=…\" for an equality test."
(let* ((pos (point))
(telltale '("type" "let" "module" "class" "and" "external"
"val" "method" "=" ":="
"if" "then" "else" "->" ";" ))
(nearest (tuareg-smie--search-backward telltale)))
((and (member nearest '("{" ";"))
(let ((field t))
(let ((x (tuareg-smie--forward-token)))
(and (< (point) pos)
((zerop (length x)) (setq field nil))
((memq (char-syntax (aref x 0)) '(?w ?_)))
((member x '("." ";")))
(t (setq field nil))))))
(while (and (equal nearest "->")
(forward-char 2)
(equal (tuareg-smie-backward-token) "t->")))
(setq nearest (tuareg-smie--search-backward telltale)))
((and (member nearest '("=" ":="))
(member (tuareg-smie--search-backward telltale)
'("type" "module")))
;; Second equality in "type t = M.t = C" or after mod-constraint
((not (member nearest '("type" "let" "module" "class" "and"
"external" "val" "method")))
((and (member nearest '("type" "module"))
;; Maybe a module's type equality constraint?
(or (member (tuareg-smie--backward-token) '("with" "and"))
;; Or maybe an alias as part of a definition?
(and (equal nearest "type")
(goto-char (1+ pos)) ;"1+" to skip the `=' itself!
(let ((tok (tuareg-smie--search-forward
(cons "=" (mapcar #'car
(equal tok "=")))))
(t "d=")))))
(defun tuareg-smie--:=-disambiguate ()
"Return which kind of \":=\" we've just found.
Point is not moved and should be right in front of the equality.
Return values can be
\":=\" for assignment definition,
\"c=\" for destructive equality constraint."
(let* ((telltale '("type" "let" "module" "class" "and" "external"
"val" "method" "=" ":="
"if" "then" "else" "->" ";" ))
(nearest (tuareg-smie--search-backward telltale)))
(cond ;Issue #7
((and (member nearest '("type" "module"))
(member (tuareg-smie--backward-token) '("with" "and"))) "c=")
(t ":=")))))
(defun tuareg-smie--|-or-p ()
"Return non-nil if we're just in front of an or pattern \"|\"."
(let ((tok (tuareg-smie--search-backward
;; Stop at the first | or any token which should
;; never appear between a "|" and a "|-or".
'("|" "[" "->" "with" "function" "=" "of" "in" "then"))))
((equal tok "(") t)
((equal tok "|")
;; Maybe we have a "|-or". Then again maybe not. We should make sure
;; that `tok' is really either a "|-or" or the | of a match (and not
;; the | of a datatype definition).
(equal "|"
(setq tok
'("|" "with" "function" "=" "of" "in" "then")))))
((equal tok "=")
(not (equal (tuareg-smie--=-disambiguate) "d=")))
((equal tok "of") nil)
((member tok '("[" "{" "(")) nil)
(t t)))))))
(defun tuareg-smie-backward-token ()
"Move point to the beginning of the previous token and return its SMIE name."
(let ((tok (tuareg-smie--backward-token)))
;; Distinguish a let expression from a let declaration.
((equal tok "let") (tuareg-smie--let-disambiguate))
((equal ".<.~" tok) (forward-char 2) ".~") ;FIXME: Likely too ad-hoc!
;; Handle "let module" and friends.
((member tok '("module" "class" "open"))
(let ((prev (save-excursion (tuareg-smie--backward-token))))
((equal prev "let") (concat "l-" tok))
((and (member prev '("with" "and")) (equal tok "module")) "w-module")
(t tok))))
;; Distinguish a "type ->" from a "case ->".
((equal tok "->")
(let (nearest)
(while (progn
(setq nearest (tuareg-smie--search-backward
'("with" "|" "fun" "function" "functor"
"type" ":" "of")))
(and (equal nearest ":")
(if (member nearest '("with" "|" "fun" "function" "functor"))
tok "t->"))))
;; Handle "module type", "class type", mod-constraint's "with/and type"
;; and polymorphic syntax.
((equal tok "type")
(let ((prev (tuareg-smie--backward-token)))
(cond ((equal prev "module") "m-type")
((equal prev "class") "c-type")
((member prev '("and" "with")) "w-type")
((equal prev ":") "d-type"); ": type a. ..."
(t tok)))))
;; Disambiguate mod-constraint's "and" and "with".
((member tok '("with" "and"))
(if (member (tuareg-smie--forward-token) '("type" "module"))
(concat "m-" tok) tok)))
;; Distinguish a defining = from a comparison-=.
((equal tok "=")
((equal tok ":=") (tuareg-smie--:=-disambiguate))
((zerop (length tok))
(if (not (and (memq (char-before) '(?\} ?\]))
(save-excursion (forward-char -2)
(looking-at ">}\\||\\]"))))
(goto-char (match-beginning 0))
(match-string 0)))
((and (equal tok "|") (eq (char-before) ?\[)) (forward-char -1) "[|")
((and (equal tok "<") (eq (char-before) ?\{)) (forward-char -1) "{<")
((equal tok "|")
;; Check if it's the | of an or-pattern, since it has a slightly
;; different precedence (see Issue #71 for an example).
(if (tuareg-smie--|-or-p) "|-or" "|"))
;; Some infix operators get a precedence based on their prefix, so we
;; collapse them into a canonical representative.
;; See
((memq (aref tok 0) '(?* ?/ ?% ?+ ?- ?@ ?^ ?= ?< ?> ?| ?& ?$))
((member tok '("|" "||" "&" "&&" "<-" "->" ">.")) tok)
((and (eq (aref tok 0) ?*) (> (length tok) 1) (eq (aref tok 1) ?*))
(t (string (aref tok 0) ?…))))
((equal tok ":")
(let ((pos (point)))
(if (tuareg-smie--label-colon-p)
(goto-char pos)
((equal tok "exception")
(let ((back-tok (save-excursion (tuareg-smie--backward-token))))
((member back-tok '("|" "with")) "exception-case")
((equal back-tok "let") "exception-let")
(t tok))))
((string-match-p "\\`[[:alpha:]_].*\\.\\'" tok)
(forward-char (1- (length tok))) ".")
(t tok))))
(defun tuareg-smie-rules (kind token)
;; FIXME: Handling of "= |", "with |", "function |", and "[ |" is
;; problematic.
;; FIXME: Start with (pcase (cons kind token) ...) so Edebug jumps
;; straight to the appropriate branch!
;; Special indentation for module fields.
((and (eq kind :after) (member token '("." ";"))
(smie-rule-parent-p "with")
((and (eq kind :after) (equal token ";;"))
;; Special indentation for monadic >>>, >>|, >>=, and >|= operators.
((and (eq kind :before) (tuareg-smie--monadic-rule token)))
((and (equal token "and") (smie-rule-parent-p "type"))
((member token '(";" "|" "," "and" "m-and"))
((and (eq kind :before) (member token '("|" ";"))
(smie-rule-parent-p "then")
;; We have misparsed the code: TOKEN is not a child of `then' but
;; should have closed the "if E1 then E2" instead!
(tuareg-smie--if-then-hack token)))
;; FIXME: smie-rule-separator doesn't behave correctly when the separator
;; is right after the parent (on another line).
((and (smie-rule-bolp) (smie-rule-prev-p "d=" "with" "[" "function"))
(if (and (eq kind :before) (smie-rule-bolp)
(smie-rule-prev-p "[" "d=" "function"))
0 tuareg-with-indent))
((and (equal token "|") (smie-rule-bolp) (not (smie-rule-prev-p "d="))
(smie-rule-parent-p "d="))
;; FIXME: Need a comment explaining what this tries to do.
;; FIXME: Should this only apply when (eq kind :before)?
;; FIXME: Don't use smie--parent.
(when (bound-and-true-p smie--parent)
(goto-char (cadr smie--parent))
`(column . ,(- (current-column) 2))))
(t (smie-rule-separator kind))))
(pcase kind
(`:elem (cond
((eq token 'basic) tuareg-default-indent)
;; The default tends to indent much too deep.
((eq token 'empty-line-token) ";;")))
(`:list-intro (member token '("fun")))
(`:close-all t)
((equal token "d=") (smie-rule-parent 2))
((member token '("fun" "match"))
(and (not (smie-rule-bolp))
(cond ((smie-rule-prev-p "d=")
(smie-rule-parent tuareg-default-indent))
((smie-rule-prev-p "begin") (smie-rule-parent)))))
((equal token "then") (smie-rule-parent))
((equal token "if") (if (and (not (smie-rule-bolp))
(smie-rule-prev-p "else"))
((and (equal token "with") (smie-rule-parent-p "{"))
((and (equal token "with") (smie-rule-parent-p "d="))
(let ((td (smie-backward-sexp "with")))
(cl-assert (equal (nth 2 td) "d="))
(goto-char (nth 1 td))
(setq td (smie-backward-sexp "d="))
;; Presumably (equal (nth 1 td) "type").
(goto-char (nth 1 td))
`(column . ,(smie-indent-virtual))))
;; Align the "with" of "module type A = B \n with ..." w.r.t "module".
((and (equal token "m-with") (smie-rule-parent-p "d="))
(smie-backward-sexp token)
(goto-char (nth 1 (smie-backward-sexp 'halfsexp)))
(cons 'column (+ 2 (current-column)))))
;; Treat purely syntactic block-constructs as being part of their
;; parent, when the opening statement is hanging.
((member token '("let" "(" "[" "{" "sig" "struct" "begin"))
(when (and (smie-rule-hanging-p)
(apply #'smie-rule-prev-p
(if (let ((openers '("{" "(" "{<" "[" "[|")))
(or (apply #'smie-rule-prev-p openers)
(not (apply #'smie-rule-parent-p openers))))
(let ((offset (if (and (member token '("(" "struct" "sig"))
(not (smie-rule-parent-p "let" "d-let")))
(smie-rule-parent offset))
;; In "{ a = (", "{" and "a =" are not part of the same
;; syntax rule, so "(" is part of "a =" but not of the
;; surrounding "{".
(smie-backward-sexp 'halfsexp)
(cons 'column (smie-indent-virtual))))))
((and tuareg-match-patterns-aligned
(equal token "|-or") (smie-rule-parent-p "|"))
;; If we're looking at the first class-field-spec
;; in a "object(type)...end", don't rely on the default behavior which
;; will treat (type) as a previous element with which to align.
((tuareg-smie--object-hanging-rule token))
;; Apparently, people like their `| pattern when test -> body' to have
;; the `when' indented deeper than the body.
((equal token "when") (smie-rule-parent tuareg-match-when-indent))))
((equal token "d=")
(and (smie-rule-parent-p "type")
(not (smie-rule-next-p "["))
((equal token "->")
((smie-rule-parent-p "with")
;; Align with "with" but only if it's the only branch (often
;; the case in try..with), since otherwise subsequent
;; branches can't be both indented well and aligned.
(if (save-excursion
(and (not (equal "|" (nth 2 (smie-forward-sexp "|"))))
;; Since we may misparse "if..then.." we need to
;; double check that smie-forward-sexp indeed got us
;; to the right place.
(equal (nth 2 (smie-backward-sexp "|")) "with")))
(smie-rule-parent 2)
;; Align with other clauses, even with no preceding "|"
((smie-rule-parent-p "function")
;; Similar to the previous rule but for "function"
(if (save-excursion
(and (not (equal "|" (nth 2 (smie-forward-sexp "|"))))
(equal (nth 2 (smie-backward-sexp "|")) "function")))
(smie-rule-parent tuareg-default-indent)
((smie-rule-parent-p "|") tuareg-match-clause-indent)
;; Special case for "CPS style" code.
((smie-rule-parent-p "fun")
(smie-backward-sexp "->")
(if (eq ?\( (char-before))
(cons 'column
(+ tuareg-default-indent
(backward-char 1)
(t 0)))
((equal token ":")
((smie-rule-parent-p "val" "external") (smie-rule-parent 2))
((smie-rule-parent-p "module") (smie-rule-parent))
(t 2)))
((equal token "in") tuareg-in-indent) ;;(if (smie-rule-hanging-p)
((equal token "with")
;; ((smie-rule-next-p "|") 2)
((smie-rule-parent-p "{") nil)
(t (+ 2 tuareg-with-indent))))
((or (member token '("." "t->" "]"))
(consp (nth 2 (assoc token tuareg-smie-grammar)))) ;; Closer.
((member token '("{" "("))
;; The virtual indent after ( can be higher than the actual one
;; because it might be "column + tuareg-default-indent", whereas
;; the token only occupies a single column. So make sure we don't
;; get caught in this trap.
(let ((vi (smie-indent-virtual)))
(forward-char 1) ;Skip paren.
(skip-chars-forward " \t")
(unless (eolp)
. ,(min (current-column)
(+ tuareg-default-indent vi))))))
(t tuareg-default-indent)))))))
(defun tuareg-smie--with-module-fields-rule ()
;; Indentation of fields after "{ E with Module." where the "Module."
;; syntactically only applies to the first field, but has
;; semantically a higher position since it applies to all fields.
(forward-char 1)
(smie-backward-sexp 'halfsexp)
(when (looking-at "\\(?:\\sw\\|\\s_\\)+\\.[ \t]*$")
(smie-backward-sexp 'halfsexp)
(cons 'column (current-column)))))
(defconst tuareg-smie--monadic-operators '(">>|" ">>=" ">>>" ">|=")
"Monadic infix operators")
(defconst tuareg-smie--monadic-op-re
(regexp-opt tuareg-smie--monadic-operators))
(defun tuareg-smie--monadic-rule (token)
;; When trying to indent a >>=, try to look back to find any earlier
;; >>= in a sequence of "monadic steps".
(or (and (equal token ">…") (looking-at tuareg-smie--monadic-op-re)
(let ((indent nil))
(let ((parent-data (smie-backward-sexp 'halfsexp)))
((car parent-data) (member (nth 2 parent-data) '("->")))
((member (nth 2 parent-data) '(";" "d=")) nil)
((member (nth 2 parent-data) '("fun" "function"))
(if (member (tuareg-smie--backward-token)
(setq indent (cons 'column
;; In "foo >>= fun x -> bar" indent `bar' relative to `foo'.
(and (member token '("fun" "function")) (not (smie-rule-bolp))
(let ((prev (tuareg-smie-backward-token)))
;; FIXME: Should we use the same loop as above?
(and (equal prev ">…") (looking-at tuareg-smie--monadic-op-re)
(progn (smie-backward-sexp prev)
(cons 'column (current-column)))))))))
(defun tuareg-smie--object-hanging-rule (token)
;; If we're looking at the first class-field-spec
;; in a "object(type)...end", don't rely on the default behavior which
;; will treat (type) as a previous element with which to align.
;; An important role of this first condition is to call smie-indent-virtual
;; so that we get called back to compute the (virtual) indentation of
;; "object", thus making sure we get called back to apply the second rule.
((and (member token '("inherit" "val" "method" "constraint" "initializer"))
(smie-rule-parent-p "object"))
(forward-word 1)
(goto-char (nth 1 (smie-backward-sexp 'halfsexp)))
(let ((col (smie-indent-virtual)))
`(column . ,(+ tuareg-default-indent col)))))
;; For "class foo = object(type)...end", align object...end with class.
((and (equal token "object") (smie-rule-parent-p "class")
(not (smie-rule-bolp)))
(defun tuareg-smie--if-then-hack (token)
;; Getting SMIE's parser to properly parse "if E1 then E2" is difficult, so
;; instead we live with a confused parser and try to work around the mess
;; here, although it clearly won't help other uses of the parser
;; (e.g. navigation).
(let (pd)
(while (equal (nth 2 (setq pd (smie-backward-sexp token))) "then")
(let ((pdi (smie-backward-sexp 'halfsexp)))
(cl-assert (equal (nth 2 pdi) "if"))))
((equal (nth 2 pd) token)
(goto-char (nth 1 pd))
(cons 'column (smie-indent-virtual)))
((and (equal token "|") (equal (nth 2 pd) "with")
(not (smie-rule-bolp)))
(goto-char (nth 1 pd))
(cons 'column (+ 3 (current-column))))
(t (cons 'column (current-column)))))))
(defun tuareg-smie--inside-string ()
(when (nth 3 (syntax-ppss))
(goto-char (1+ (nth 8 (syntax-ppss))))
(defcustom tuareg-indent-align-with-first-arg nil
"Non-nil if indentation should try to align arguments on the first one.
With a non-nil value you get
let x = (fun x -> 5)
my list
whereas with a nil value you get