src/lexer.co

# The Lexer Object
# ----------------
# Reads a string of Coco code and divvies it up into tagged tokens.
# Tokens are in the form:
#
#     ['TAG', 'value', lineNumber = 0]
#
# which is a format that can be fed directly into
# [Jison](http://github.com/zaach/jison) generated [parser](../lib/parser.js).
# Some potential ambiguity in the grammar has been avoided by
# pushing some extra smarts into Lexer.

exports import
  #### Public Methods

  # The entry point.
  lex: (
    # Coco source to be parsed into an array of tokens.
    code
    #  - `.raw`  <br> Suppresses token rewriting if truthy.
    #  - `.line` <br> Specifies the starting line. Default `0`.
    options
  # Copy, set, go!
  ) -> (^exports)tokenize code||'' options||{}

  # Rewrites the token stream in multiple passes, one logical filter at a time.
  # The order of these passes matters--indentation must be
  # corrected before implicit parentheses can be wrapped around blocks of code.
  rewrite: (it || @tokens) ->
    addImplicitIndentation it
    rewriteBlockless       it
    addImplicitParentheses it
    addImplicitBraces      it
    expandLiterals         it
    it.shift! if it.0?0 is \NEWLINE
    it

  #### Main Loop

  tokenize: (code, o) ->
    @inter or code.=replace /[\r\u2028\u2029\uFEFF]/g ''
    # Prepend a newline to handle leading INDENT.
    code = \\n + code
    # Stream of parsed tokens,
    # initialized with a NEWLINE token to ensure `@last` always exists.
    @tokens = [@last = [\NEWLINE \\n 0]]
    # The current line number. Starts from -1 to setoff the prepended newline.
    @line = ~-o.line
    # The stack of all current indentation levels.
    @dents = []
    # The stack for pairing tokens.
    @closes = []
    # The stack for tagging parameters.
    @parens = []
    # The stack of flags per nesting level.
    @flags = []
    # Call tokenizers based on the character at current `i`ndex.
    # Each tokenizing method is responsible for
    # returning the number of characters it has consumed.
    i = 0
    while c = code.charAt i
      switch c
      case ' '      then i += @doSpace     code, i
      case \\n      then i += @doLine      code, i
      case \\       then i += @doBackslash code, i
      case \' \"    then i += @doString    code, i, c
      case \0 to \9 then i += @doNumber    code, i
      case \/
        switch code.charAt i+1
        case \* then i += @doComment code, i
        case \/ then i += @doHeregex code, i
        default      i += @doRegex code, i or @doLiteral code, i
      case \` then i += @doJS code, i
      default i += @doID code, i or @doLiteral code, i or @doSpace code, i
    # Close up all remaining open blocks.
    @dedent @dent
    @carp "missing `#that`" if @closes.pop!
    if @inter
    then @rest ? @carp 'unterminated interpolation'
    else @last.spaced = true; @newline!
    # Rewrite the token stream unless explicitly asked not to.
    o.raw or @rewrite!
    @tokens

  # The current indentation level.
  dent: 0

  #### Tokenizers

  # Matches an identifying literal: variables, keywords, accessors, etc.
  doID: (code, index) ->
    [input] = match = (ID <<< lastIndex: index)exec code
    return 0 unless input
    id = camelize match.1
    if NONASCII.test id
      try Function "var #id" catch @carp "invalid identifier \"#id\""
    {last} = this
    # `id:_` `_.id` `@id`
    if match.2 or last.0 is \DOT or @adi!
      @token \ID if id of KEYWORDS then Object(id) <<< {+reserved} else id
      @token \: \: if match.2
      return input.length
    # keywords
    switch id
    case \true \false \null \void \arguments \debugger then tag = \LITERAL
    case \new \do \typeof \delete                      then tag = \UNARY
    case \return \throw                                then tag = \HURL
    case \break  \continue                             then tag = \JUMP
    case \this \eval \super then return @token(\LITERAL id, true)length
    case \for
      id = []
      @fset \for true
      @fset \to false
    case \then
      @fset \for false
      @fset \to false
    case \catch \function then id = ''
    case \in \of
      if @fget \for
        @fset \for false
        if id is \of
          @fset \by true
          # OF holds the index variable.
          id = ''
          if last.0 is \ID and @tokens[*-2]0 of <[ , ] } ]>
            id = @tokens.pop!1
            @tokens.pop! if @tokens[*-1]0 is \,
        break
      fallthrough
    case \instanceof
      id  = @tokens.pop!1 + id if last.1 is \!
      tag = \RELATION
    case \not
      return (last.1 = \!==; 3) if last.alias and last.1 is \===
      tag = \UNARY; id = \!
    case \and \or \is
      @unline!
      if id is \is
      then @token \COMPARE \===
      else @token \LOGIC if id is \or then \|| else \&&
      @last.alias = true
      return id.length
    case \unless then tag = \IF
    case \until  then tag = \WHILE
    case \import
      if able @tokens then id = \<<<; break
      fallthrough
    case \export \const \var then tag = \DECL
    case \let \own
      if last.0 is \FOR and id not of last.1
        last.1.push id
        return 3
      fallthrough
    default
      break if id of KEYWORDS_SHARED
      @carp "reserved word \"#id\"" if id of KEYWORDS_UNUSED
      if not last.1 and last.0 of <[ CATCH FUNCTION LABEL ]>
        last <<< {1: id, -spaced}
        return input.length
      tag = \ID
      # contextual keywords (reserved only in specific places)
      switch id
      case \all
        if last.1 is \<<<    and \<
        or last.1 is \import and \All
          last.1 += that
          return 3
      case \from then @forange! and tag = \FROM
      case \to \til
        @forange! and @tokens.push [\FROM '' @line] [\STRNUM \0 @line]
        if @fget \from
          @fset \from false
          @fset \by   true
          tag = \TO
        else if last.0 is \STRNUM and not last.callable
          last <<< 0:\RANGE op:id
          return id.length
      case \by
        if last.0 is \STRNUM and @tokens[*-2]0 is \RANGE
          tag = \RANGE_BY
        else if @fget \by
          tag = \BY
          @fset \by false
      case \ever then if last.0 is \FOR
        @fset \for false
        last.0 = \WHILE; tag = \LITERAL; id = \true
    tag ||= match.1.toUpperCase!
    @unline! if tag of <[ RELATION THEN ELSE CASE DEFAULT CATCH FINALLY
                          IN OF FROM TO BY EXTENDS IMPLEMENTS ]>
    if tag of <[ THEN IF WHILE ]>
      @fset \for false
      @fset \by  false
    @token tag, id
    input.length

  # Matches a number, including decimal, hex and exponential notation.
  doNumber: (code, NUMBER.lastIndex) ->
    return 0 unless input = (match = NUMBER.exec code)0
    {last} = this
    # `. 0.0` => `. 0 . 0`
    if match.5 and (last.0 is \DOT or @adi!)
      @token \STRNUM match.4.replace NUMBER_OMIT, ''
      return match.4.length
    if radix = match.1
      num = parseInt rnum = match.2.replace(NUMBER_OMIT, ''), radix
      if isNaN num or num is parseInt rnum.slice(0 -1), radix
        @carp "invalid number #rnum in base #radix"
      num += ''
    else
      num = (match.3 or input)replace NUMBER_OMIT, ''
      if match.3 and num.charAt! is \0 and num.charAt(1) not of ['' \.]
        @carp "deprecated octal literal #{match.4}"
    if not last.spaced and last.0 is \+-
      last.0 = \STRNUM; last.1 += num
      return input.length
    @strnum num
    input.length

  # Matches a string literal.
  doString: (code, index, q) ->
    if q is code.charAt index+1
      return if q is code.charAt index+2
             then @doHeredoc code, index, q
             else @strnum q+q; 2
    if q is \"
      parts = @interpolate code, index, q
      @addInterpolated parts, unlines
      return 1 + parts.size
    str = (SIMPLESTR <<< lastIndex: index)exec code .0
          or @carp 'unterminated string'
    @strnum unlines @string q, str.slice 1 -1
    @countLines(str)length

  # Matches heredocs, adjusting indentation to the correct level,
  # as they ignore indentation to the left.
  doHeredoc: (code, index, q) ->
    if q is \'
      ~(end = code.indexOf q+q+q, index+3) or @carp 'unterminated heredoc'
      raw = code.slice index+3 end
      # Remove trailing indent.
      doc = raw.replace LASTDENT, ''
      @strnum enlines @string q, lchomp detab doc, heretabs doc
      return @countLines(raw)length + 6
    parts = @interpolate code, index, q+q+q
    tabs  = heretabs code.slice(index+3 index+parts.size)replace LASTDENT, ''
    for t, i of parts then if t.0 is \S
      t.1.=replace LASTDENT, '' if i+1 is parts.length
      t.1 = detab  t.1, tabs
      t.1 = lchomp t.1 if i is 0
    @addInterpolated parts, enlines
    3 + parts.size

  # Matches block comments.
  doComment: (code, index) ->
    comment = if ~end = code.indexOf \*/ index+2
              then code.slice index, end+2
              else code.slice(index) + \*/
    if @last.0 of <[ NEWLINE INDENT THEN ]>
      @token \COMMENT detab comment, @dent
      @token \NEWLINE \\n
    @countLines(comment)length

  # Matches embedded JavaScript.
  doJS: (code, JSTOKEN.lastIndex) ->
    [lit,, js] = JSTOKEN.exec code
    lit or @carp 'unterminated JS literal'
    @token \LITERAL Object(detab js, @dent) <<< {+js}, true
    @countLines(lit)length

  # Matches a regular expression literal aka _regex_,
  # disambiguating from division operators.
  doRegex: (code, index) ->
    # To coexist with implicit call and ACI,
    # disallow leading space or equal sign when applicable.
    #
    #     f /re/ 9 /ex/   # f(/re/, 9, /ex/)
    #     a /= b / c / d  # division
    #
    if divisible = able @tokens or @last.0 is \CREMENT
      return 0 if not @last.spaced or code.charAt(index+1) of [' ' \=]
    [input, body, flag] = (REGEX <<< lastIndex: index)exec code
    if input
    then @regex body, flag
    else divisible or @carp 'unterminated regex'
    input.length

  # Matches a multiline, extended regex literal.
  doHeregex: (code, index) ->
    {tokens, last} = this
    parts = @interpolate code, index, \//
    rest  = code.slice index + 2 + parts.size
    flag  = @validate /^(?:[gimy]{1,4}|[?$]?)/exec(rest)0
    if parts.1
      if flag is \$
        @adi!; @token \( \"
      else
        tokens.push [\ID \RegExp last.2] [\CALL( '' last.2]
        if flag is \?
          for t, i of parts by -1 then if t.0 is \TOKENS
            dynaflag = parts.splice(i, 1).0.1
            break
      for t, i of parts
        if t.0 is \TOKENS
          tokens.push ...t.1
        else
          val = t.1.replace HEREGEX_OMIT, ''
          continue if one and not val
          one = tokens.push t <<< [\STRNUM @string \' enslash val]
        tokens.push [\+- \+ tokens[*-1]2]
      --tokens.length
      if dynaflag or flag >= \g
        @token \, \,
        if dynaflag then tokens.push ...dynaflag else @token \STRNUM "'#flag'"
      @token (if flag is \$ then \) else \)CALL), ''
    else @regex reslash(parts.0.1.replace HEREGEX_OMIT, ''), flag
    2 + parts.size + flag.length

  # Matches a word literal, or ignores a sequence of whitespaces.
  doBackslash: (code, BSTOKEN.lastIndex) ->
    [input, word] = BSTOKEN.exec code
    if word then @strnum @string \' word else @countLines input
    input.length

  # Matches newlines and {in,de}dents, determining which is which.
  # If we can detect that the current line is continued onto the next line,
  # then the newline is suppressed:
  #
  #     elements
  #     .map -> ...
  #     .get()
  #
  # Keeps track of the level of indentation, because a single dedent
  # can close multiple indents, so we need to know how far in we happen to be.
  doLine: (code, index) ->
    [input, tabs] = (MULTIDENT <<< lastIndex: index)exec code
    {length} = @countLines input
    {last} = this; last <<< {+eol, +spaced}
    return length if index + length >= code.length
    if tabs and (@emender ||= //[^#{ tabs.charAt! }]//)exec tabs
      @carp "contaminated indent #{ escape that }"
    if 0 > delta = tabs.length - @dent
      @dedent -delta
      @newline!
    else
      [tag, val] = last
      if tag is \ASSIGN  and val+'' not of <[ = := += ]>
      or tag is \BITWISE and val is not \&
      or tag of <[ +- DOT LOGIC MATH COMPARE RELATION SHIFT
                   IN OF TO BY FROM EXTENDS IMPLEMENTS ]>
        return length
      if delta then @indent delta else @newline!
    @fset \for false
    @fset \by  false
    length

  # Consumes non-newline whitespaces and/or a line comment.
  doSpace: (code, SPACE.lastIndex) ->
    @last.spaced = true if input = SPACE.exec(code)0
    input.length

  # We treat all other single characters as a token. e.g.: `( ) , . !`
  # Multi-character operators are also literal tokens, so that Jison can assign
  # the proper order of operations.
  # There are some symbols that we tag specially here.
  # `;` and newlines are both treated as a NEWLINE, we distinguish parentheses
  # that indicate a method call from regular parentheses, and so on.
  doLiteral: (code, index) ->
    return 0 unless sym = (SYMBOL <<< lastIndex: index)exec(code)0
    switch tag = val = sym
    case \+ \-        then tag = \+-
    case \&& \||      then tag = \LOGIC
    case \?  \!?      then tag = \LOGIC if @last.spaced
    case \/ \% \**    then tag = \MATH
    case \++ \--      then tag = \CREMENT
    case \<<< \<<<<   then tag = \IMPORT
    case \;
      @fset \by false
      tag = \NEWLINE
    case \.
      @last.0 = \? if @last.1 is \?
      tag = \DOT
    case \,
      switch @last.0
      case \, \[ \( \CALL( then @token \LITERAL \void
      case \FOR \OWN       then @token \ID ''
    case \!=
      unless able @tokens or @last.0 is \CREMENT
        @tokens.push [\UNARY \! @line] [\ASSIGN \= @line]
        return 2
      fallthrough
    case <[ === !== < > <= >= == ]> then tag = \COMPARE
    case <[ <<  >>  >>>   <?  >? ]> then tag = \SHIFT
    case \(
      unless @last.0 of <[ FUNCTION LET ]> or @able true
        @token \( \(
        @closes.push \)
        @parens.push @last
        return 1
      tag = \CALL(
      @closes.push \)CALL
    case \[ \{
      @adi!
      @closes.push ']}'charAt val is \{
    case \}
      if @inter and val is not @closes[*-1]
        @rest = code.slice index+1
        return 9e9
      fallthrough
    case \] \)
      if \) is tag = val = @pair val
        if @last is @lpar = @parens.pop!
          @last.0 = \CALL(
          tag     = \)CALL
    case \:
      switch @last.0
      case \ID \STRNUM \) then break
      case \...           then @last.0 = \STRNUM
      default tag = \LABEL; val = ''
    case <[ = := += -= *= /= %= &= ^= |= <<= >>= >>>= <?= >?= **= ]>
      if @last.1 is \. or @last.0 is \? and @adi!
        @last.1 += val
        return val.length
      if @last.0 is \LOGIC
        (val = Object val)logic = @tokens.pop!1
      else if val of <[ += -= ^= ]> and not able @tokens and
              @last.0 not of <[ +- ^ UNARY LABEL ]>
        @token \UNARY val.charAt!; val = \=
      tag = \ASSIGN
    case \*
      return that if @last.0 of <[ NEWLINE INDENT THEN ]>
                  and @doInlinedent code, index+1, \list
      tag = if able @tokens
            or @last.0 is \CREMENT and able @tokens, @tokens.length-1
            then \MATH else \STRNUM
    case \@ \@@
      @dotcat val or if val is \@
        then @token \LITERAL \this true
        else @token \LITERAL \arguments
      return val.length
    case \!
      switch then unless @last.spaced
        if able @tokens, null true
          @token \CALL( \!
          @token \)CALL \)
        else if @last.1 is \typeof
          @last.1 = \classof
        else break
        return 1
      tag = \UNARY
    case \<>
      @token \LITERAL \<> true
      return 2
    case \&
      unless able @tokens
        @token \LITERAL \& true
        return 1
      fallthrough
    case \| then tag = \BITWISE
    case \~
      return 1 if @dotcat val
      tag = \UNARY
    case \-> \~> then up = \->; fallthrough
    case \<- \<~ then @parameters tag = up || \<-
    case \::     then up = \prototype; fallthrough
    case \..     then @adi!; tag = \ID; val = up || \constructor
    case \=>
      @unline!
      @fset \for false
      return 1 + that if @doInlinedent code, index+2
      tag = \THEN
    default if \< is val.charAt 0
      @carp 'unterminated words' if val.length < 4
      @token \WORDS, val, @adi!
      return val.length
    @unline! if tag of <[ , |> DOT LOGIC COMPARE MATH IMPORT SHIFT BITWISE ]>
    @token tag, val
    sym.length

  doInlinedent: (code, index, list) ->
    return 0 unless d = (INLINEDENT <<< lastIndex: index)exec code .0.length
    list and @tokens.push [\LITERAL \void @line] [\ASSIGN \= @line]
    @indent index + d - @dent - 2 - code.lastIndexOf \\n index
    d

  #### Token Manipulators

  # Adds a token to the results,
  # taking note of the line number and returning `value`.
  token: (tag, value, callable) ->
    @tokens.push @last = [tag, value, @line]
    @last.callable = true if callable
    value

  # Records an INDENT.
  indent: !(delta) ->
    @dent += delta
    @dents .push @token \INDENT delta
    @closes.push \DEDENT

  # Records DEDENTs against matching INDENTs.
  dedent: !(debt) ->
    @dent -= debt
    while debt > 0 and dent = @dents.pop!
      if debt < dent and not @inter
        @carp "unmatched dedent (#debt for #dent)"
      @pair \DEDENT
      debt -= if typeof dent is \number then @token \DEDENT dent else dent

  # Generates a newline token. Consecutive newlines get merged together.
  newline: !-> @last.1 is \\n or
               @tokens.push @last = [\NEWLINE \\n @line] <<< {+spaced}

  # Cancels an immediate newline.
  unline: !->
    return unless @tokens.1
    switch @last.0
    case \INDENT
      # Mark the last indent as dummy.
      @dents[*-1] += ''
    case \NEWLINE
      return if @last.1 is \;
    default return
    @last = @tokens[--*-1]

  # (Re)tags function parameters.
  parameters: !(arrow) ->
    if @last.0 is \) is @last.1
      @lpar.0 = \PARAM(
      @last.0 = \)PARAM
      return
    if arrow is \-> then @token \PARAM( '' else
      break if t.0 of <[ NEWLINE INDENT THEN ( ]> for t, i of @tokens by -1
      @tokens.splice i+1 0 [\PARAM( '' t.2]
    @token \)PARAM ''

  # Expands variables and expressions inside double-quoted strings or heregexes
  # using Ruby-like notation for substitution of arbitrary expressions.
  #
  #     "Hello #{name.capitalize()}."
  #
  # Will recursively create a new lexer for each interpolation,
  # tokenizing the contents and merging them into the token stream.
  interpolate: !(str, idx, end) ->
    parts = []; end0 = end.charAt 0; pos = 0; i = -1
    str.=slice idx + end.length
    while ch = str.charAt ++i
      switch ch
      case end0
        continue unless end is str.slice i, i + end.length
        parts.push [\S; @countLines str.slice 0 i; @line]
        return parts <<< size: pos + i + end.length
      case \#
        c1 = str.charAt i+1
        id = c1 of <[ @ & ]> and c1
          or c1 is \< and \> is str.charAt i+2 and \<>
          or (ID <<< lastIndex: i+1)exec str .1
        continue unless id or c1 is \{
      case \\ then ++i; fallthrough
      default continue
      # `"#a#{b || c}"` => `a + "" + (b || c)`
      if i or nested and not stringified
        stringified = parts.push [\S; @countLines str.slice 0 i; @line]
      if id
        {length} = id
        id = \this if id is \@
        if id of <[ this & <> ]>
          tag = \LITERAL
        else
          id = camelize id
          try Function "'use strict'; var #id" catch
            @carp "invalid variable interpolation \"#id\""
          tag = \ID
        str.=slice delta = i + 1 + length
        parts.push [\TOKENS nested = [[tag, id, @line]]]
      else
        clone  = ^exports <<< {+inter, @emender}
        nested = clone.tokenize str.slice(i+2), {@line, +raw}
        delta  = str.length - clone.rest.length
        {rest: str, @line} = clone
        nested.shift! while nested.0?0 is \NEWLINE
        if nested.length
          nested.unshift [\( \( nested.0.2]
          nested.push    [\) \) @line]
          parts.push [\TOKENS nested]
      pos += delta; i = -1
    @carp "missing `#end`"

  # Merges `@interpolate`d strings.
  addInterpolated: !(parts, nlines) ->
    return @strnum nlines @string \" parts.0.1 unless parts.1
    {tokens, last} = this
    [left, right, joint] = if not last.spaced and last.1 is \%
      --tokens.length
      @last = last = tokens[*-1]
      [\[ \] [\,  \,]]
    else
      [\( \) [\+- \+]]
    callable = @adi!
    tokens.push [left, \", last.2]
    for t, i of parts
      if t.0 is \TOKENS
        tokens.push ...t.1
      else
        continue if i > 1 and not t.1
        tokens.push [\STRNUM; nlines @string \" t.1; t.2]
      tokens.push joint.concat tokens[*-1]2
    --tokens.length
    @token right, '', callable

  # Records a string/number token, supplying implicit dot if applicable.
  strnum: !-> @token \STRNUM it, @adi! || @last.0 is \DOT

  # Records a regex token.
  regex: (body, flag) ->
    try RegExp body catch @carp e.message
    return @strnum @string \' enslash body if flag is \$
    @token \LITERAL "/#{ body or '(?:)' }/#{ @validate flag }"

  # Supplies an implicit DOT if applicable.
  adi: ->
    return if @last.spaced
    if @last.0 is \!?
      @last.0 = \CALL(
      @token \)CALL ''
      @token \? \?
    else unless able @tokens
      return
    @token \DOT \.

  # Resolves `.~` etc.
  dotcat: -> @last.1 += it if @last.1 is \. or @adi!

  # Pairs up a closing token.
  pair: ->
    unless it is (wanted = @closes[*-1]) or \)CALL is wanted and it is \)
      @carp "unmatched `#it`" unless \DEDENT is wanted
      # Auto-close DEDENT to support code like:
      #
      #     [ a
      #       b ]
      #
      @dedent @dents[*-1]
      return @pair it
    @unline!
    @closes.pop!

  #### Helpers

  # Checks if the last token is
  #
  # - `f()`: `call`able via explicit parentheses.
  # - `x''`: indexable via implicit dots.
  able: (call) -> not @last.spaced and able @tokens, null call

  # Increments `@line` by the number of newlines in a string.
  countLines: -> ++@line while pos = 1 + it.indexOf \\n pos; it

  # Checks FOR for FROM/TO.
  forange: ->
    if @tokens[* - 2 - (@last.0 of <[NEWLINE INDENT]>)]?0 is \FOR
      @fset \for  false
      @fset \from true
      return true
    false

  # Complains on bad regex flag.
  validate: (flag) ->
    if flag and /(.).*\1/exec flag
      @carp "duplicate regex flag `#{that.1}`"
    flag

  # {G,S}ets a per-nest flag.
  fget: (key) ->
    @flags[@closes.length]?[key]
  fset: (key, val) ->
    @flags@[@closes.length][key] = val

  # Throws a syntax error with the current line number.
  carp: !-> carp it, @line

  string: (q, body) -> string q, body, @line

#### Helpers

function carp msg, lno then throw SyntaxError "#msg on line #{-~lno}"

# Checks whether or not the previous token is {index,`call`}able.
function able tokens, i ? tokens.length, call
  [tag] = token = tokens[i-1]
  return true if tag of <[ ID ] ? ]>
              or tag is \BITWISE and token.1 is \& and
                 not token.spaced and tokens[i-2]spaced
  if call then token.callable or tag of <[ ) )CALL ]> and token.1
          else tag of <[ } ) )CALL STRNUM LITERAL WORDS ]>

#### String Manipulators
# Constructs a string literal by (un)escaping quotes etc.
string = let do
  re = // ['"] |
    \\ (?: ([0-3]?[0-7]{2} | [1-7] | 0(?=[89]))
         | x[\dA-Fa-f]{2} | u[\dA-Fa-f]{4} | ([xu])
         | [\\0bfnrtv] | [^\n\S] | ([\w\W])
       )? //g
then (q, body, lno) ->
  body.=replace re, (it, oct, xu, rest) ->
    return \\ + it if it of [q, \\]
    # Convert octal to hex for strict mode.
    return \\\x + (0x100 + parseInt oct, 8)toString 16 .slice 1 if oct
    carp 'malformed character escape sequence' lno if xu
    if not rest or q is rest then it else rest
  q + body + q

# Detects the minimum indent count for a heredoc, ignoring empty lines.
function heretabs doc
  dent = 0/0
  dent <?= that.0.length - 1 while TABS.exec doc
  dent
TABS = /\n(?!$)[^\n\S]*/mg

# Erases all external indentations up to specified length.
function detab str, len
  if len then str.replace detab[len]||=//\n[^\n\S]{1,#len}//g \\n else str

function replacer re, to then -> it.replace re, to

# Erases all newlines and indentations.
unlines = replacer /\n[^\n\S]*/g ''

# Converts newlines/backslashes to their quoted form.
enlines = replacer /\n/g \\\n
enslash = replacer /\\/g \\\\

# Quotes slashes unless already quoted.
reslash = replacer /(\\.)|\//g -> @@1 or \\\/

# Transforms hyphenated-words to camelCase.
camelize = replacer /-[a-z]/ig -> it.char-at 1 .to-upper-case!

# Deletes the first character if newline.
function lchomp then it.slice 1 + it.lastIndexOf \\n 0

function decode val, lno
  return [+val] unless isNaN val
  val = if val.length > 8 then \ng else do Function \return + val
  val.length is 1 or carp 'bad string in range' lno
  [val.charCodeAt!, true]

function uxxxx then \"\\u + (\000 + it.toString 16)slice(-4) + \"
character = if JSON!? then uxxxx else ->
  switch it case 0x2028 0x2029 then uxxxx it
  default JSON.stringify String.fromCharCode it

#### Rewriters

# The Coco language has a good deal of optional, implicit, and/or shorthand
# syntax. This can greatly complicate a grammar and bloat the resulting parse
# table. Instead of making the parser handle it all, we take a series of passes
# over the token stream, convert shorthand into the unambiguous long form,
# add implicit indentation and parentheses, and generally clean things up.

# - Tag postfix conditionals.
# - Fill in empty blocks for bodyless `class`es.
!function rewriteBlockless tokens
  for [tag]:token, i of tokens
    detectEnd tokens, i+1, ok, go if tag of <[ IF CLASS ]>
  function  ok then it.0 of <[ NEWLINE INDENT ]>
  !function go it, i
    if tag is \IF
      token.0 = \POST_IF if it.0 is not \INDENT
                         or not it.1 and not it.then
                         or tokens[i-1]0 of BLOCK_USERS
    else unless it.0 is \INDENT
      tokens.splice i, 0 [\INDENT 0 lno = tokens[i-1]2] [\DEDENT 0 lno]

# Wrap single-line blocks with regular INDENT/DEDENT pairs.
# Because our grammar is LALR(1), it can't handle some sequences
# that lack ending delimiters.
!function addImplicitIndentation tokens
  i = 0
  while token = tokens[++i]
    [tag] = token
    continue unless tag of
      <[ -> THEN ELSE DEFAULT TRY CATCH FINALLY DECL ]>
    switch next = tokens[i+1]0
    case \IF then continue if tag is \ELSE
    case \INDENT \THEN
      tokens.splice i-- 1 if tag is \THEN
      continue
    indent = [\INDENT 0 token.2]; dedent = [\DEDENT 0]
    if tag is \THEN
    then (tokens[i] = indent)then = true
    else tokens.splice ++i, 0 indent
    switch
    case tag is \DECL then break
    # ->,
    case next of <[ DOT ? , |> ]> then --i; fallthrough
    # -> 0,
    case next of <[ ID STRNUM LITERAL ]> and \, is tokens[i+2]?0
      go 0 i+=2; ++i
      continue
    # -> [0],
    case next of <[ ( [ { ]>
     and \, is tokens[idx = 1 + indexOfPair tokens, i+1]?0
      go 0 idx; ++i
      continue
    detectEnd tokens, i+1, ok, go
  function ok [t0]:token, i
    # Handle nesting intuitively:
    #   `try try a finally b` => `try (try a finally b)`
    t = tag
    tag := '' if tag is t0 or tag is \THEN and t0 is \SWITCH
    switch t0
    case \NEWLINE       then token.1 is not \;
    case \DOT \? \, \|> then tokens[i-1]eol
    case \ELSE          then t is \THEN
    case \CATCH         then t is \TRY
    case \FINALLY       then t of <[ TRY CATCH THEN ]>
    case \CASE \DEFAULT then t of <[ CASE THEN ]>
  !function go [] i
    prev = tokens[i-1]
    tokens.splice if prev.0 is \, then i-1 else i, 0, dedent <<< {prev.2}

# Functions may be optionally called without parentheses for simple cases.
# Insert the missing parentheses here to aid the parser.
!function addImplicitParentheses tokens
  i = 0; brackets = []
  while token = tokens[++i]
    if token.1 is \do and tokens[i+1]0 is \INDENT
      endi = indexOfPair tokens, i+1
      if tokens[endi+1]0 is \NEWLINE and tokens[endi+2]?0 is \WHILE
        token.0 = \DO
        tokens[endi+2]done = true
        tokens.splice endi+1 1
      else
        (token = tokens[1+ i])0 = \(
        (tpair = tokens[endi])0 = \)
        token.doblock = true
        tokens.splice i, 1
    [tag] = token; prev = tokens[i-1]
    tag is \[ and brackets.push prev.0 is \DOT
    if prev.0 is \]
      if brackets.pop! then prev.index = true else continue
    if tag is \BITWISE and token.1 is \& and not exp tokens[i+1]
      tag = token.0 = \LITERAL
    continue unless prev.0 of <[ FUNCTION LET ]>
                 or prev.spaced and able tokens, i, true
    if token.doblock
      token.0 = \CALL(
      tpair.0 = \)CALL
      continue
    continue unless exp token
    if tag is \CREMENT
      continue if token.spaced or tokens[i+1]0 not of CHAIN
    skipBlock = seenSwitch = false
    tokens.splice i++, 0 [\CALL( '' token.2]
    detectEnd tokens, i, ok, go
  # Does the token start an expression?
  function exp [tag]:token
    tag of ARG or not token.spaced and tag of <[ +- ^ ]>
  function ok token, i
    tag = token.0
    return true if tag of <[ |> POST_IF ]>
    unless skipBlock
      return true if token.alias and token.1 of <[ && || ]>
                  or tag of <[ TO BY IMPLEMENTS ]>
    pre = tokens[i-1]
    switch tag
    case \NEWLINE
      return pre.0 is not \,
    case \DOT \?
      return not skipBlock and (pre.spaced or pre.0 is \DEDENT)
    case \SWITCH                         then seenSwitch := true; fallthrough
    case \IF \CLASS \FUNCTION \LET \WITH then skipBlock  := true
    case \CASE
      if seenSwitch then skipBlock := true else return true
    case \INDENT
      return skipBlock := false if skipBlock
      return pre.0 not of BLOCK_USERS
    case \WHILE
      return false if token.done
      fallthrough
    case \FOR
      skipBlock := true
      return able tokens, i
          or pre.0 is \CREMENT
          or pre.0 is \... and pre.spaced
    false
  !function go token, i then tokens.splice i, 0 [\)CALL '' tokens[i-1]2]

# Object literals may be written without braces for simple cases.
# Insert the missing braces here to aid the parser.
!function addImplicitBraces tokens
  stack = []; i = 0
  while token = tokens[++i]
    unless \: is tag = token.0
      switch
      case tag of CLOSERS then start = stack.pop!
      case tag of OPENERS
        tag = \{ if tag is \INDENT and tokens[i-1]0 is \{
        stack.push [tag, i]
      continue
    paren = tokens[i-1]0 is \)
    index = if paren then start.1 else i-1
    pre   = tokens[index-1]
    continue unless pre.0 of <[ : ASSIGN IMPORT ]> or stack[*-1]?0 is not \{
    stack.push [\{]
    inline = not pre.doblock and pre.0 not of <[ NEWLINE INDENT ]>
    index -= 2 while tokens[index-2]?0 is \COMMENT
    tokens.splice index, 0 [\{ \{ tokens[index]2]
    detectEnd tokens, ++i+1, ok, go
  function ok token, i
    switch tag = token.0
    case \,       then break
    case \NEWLINE then return true if inline
    case \DEDENT  then return true
    case \POST_IF \FOR \WHILE \|> then return inline
    default return false
    t1 = tokens[i+1]?0
    t1 is not (if tag is \, then \NEWLINE else \COMMENT) and
    \: is not tokens[if t1 is \( then 1 + indexOfPair tokens, i+1 else i+2]?0
  !function go token, i then tokens.splice i, 0 [\} '' token.2]

# - Slip unary {pl,min}uses off signed numbers.
# - Expand ranges and words.
# - Insert `()` after each `function`/`let` facing a block.
# - Insert `, ` after each non-callable token facing an argument token.
!function expandLiterals tokens
  i = 0
  while token = tokens[++i]
    switch token.0
    case \STRNUM
      if ~'-+'indexOf sig = token.1.charAt 0
        token.1.=slice 1
        tokens.splice i++ 0 [\+- sig, token.2]
      continue if token.callable
    case \RANGE
      next = tokens[i+1]
      lno  = token.2
      [from, char] = decode token.1, lno
      [to, tochar] = next.0 is \STRNUM and decode next.1, lno
      carp 'bad "to" in range' lno if to!? or char ^ tochar
      by = 1
      if byp = tokens[i+2]?0 is \RANGE_BY
        carp 'bad "by" in range' tokens[i+2]2 unless by = +tokens[i+3]?1
      else if from > to
        by = -1
      ts  = []
      enc = if char then character else String
      add = !->
         if 0x10000 < ts.push [\STRNUM enc n; lno] [\, \, lno]
           carp 'range limit exceeded' lno
      if token.op is \to
      then add! for n from from to  to by by
      else add! for n from from til to by by
      ts.pop! or carp 'empty range' lno
      tokens.splice i, 2 + 2 * byp, ...ts
      i += ts.length - 1
    case \WORDS
      ts = [[\[ \[ lno = token.2]]
      for word of token.1.slice 2 -2 .match /\S+/g or ''
        ts.push [\STRNUM; string \', word, lno; lno] [\, \, lno]
      tokens.splice i, 1, ...ts, [\] \] lno]
      i += ts.length
    case \INDENT
      if tokens[i-1]
        if that.1 is \new
          tokens.splice i++ 0 \
            [\PARAM( '' token.2] [\)PARAM '' token.2] [\-> '' token.2]
        else if that.0 of <[ FUNCTION LET ]>
          tokens.splice i, 0 [\CALL( '' token.2] [\)CALL '' token.2]
          i += 2
      continue
    case \LITERAL \} \!? then break
    case \) \)CALL then continue if token.1
    case \]        then continue if token.index
    case \CREMENT  then continue unless able tokens, i
    default continue
    if token.spaced and tokens[i+1]0 of ARG
      tokens.splice ++i, 0 [\, \, token.2]

# Seeks `tokens` from `i`ndex and `go` for a token of the same level
# that's `ok` or an unmatched closer.
!function detectEnd tokens, i, ok, go
  levels = 0
  while token = tokens[i], ++i
    return go token, i if not levels and ok token, i
    [tag] = token
    return go token, i if 0 > levels += tag of OPENERS or -(tag of CLOSERS)

function indexOfPair tokens, i
  level = 1; end = INVERSES[start = tokens[i]0]
  while tokens[++i]
    switch that.0
    case start then ++level
    case end   then return i unless --level
  -1

#### Constants

##### Keywords

# Keywords that Coco shares in common with JavaScript.
KEYWORDS_SHARED = <[
  true false null this void super return throw break continue
  if else for while switch case default try catch finally
  function class extends implements new do delete typeof in instanceof
  let with var const import export debugger
]>

# The list of keywords that are reserved by JavaScript, but not used.
# We throw a syntax error for these to avoid runtime errors.
KEYWORDS_UNUSED =
  <[ enum interface package private protected public static yield ]>

KEYWORDS = KEYWORDS_SHARED.concat KEYWORDS_UNUSED

##### Regexes
# Some of these are given `g` flag and made sure to match empty string
# so that they can lex from any index by receiving `.lastIndex` beforehand.

ID = //
  ( (?!\s)[a-z_$\xAA-\uFFDC](?:(?!\s)[\w$\xAA-\uFFDC]|-[a-z])* )
  ( [^\n\S]* : (?![:=]) )?  # Is this a property name?
|//ig
SYMBOL = //
  [-+*/%&|^:]=               # compound assign
| \.{1,3}                    # dot / `constructor` / splat/placeholder/yada*3
| ([-+&|@:])\1               # crement / logic / `prototype` / `arguments`
| [-~=|]>                    # (bound-)function / inlinedent / pipe
| [!=]==?                    # equality
| <(?: <(?:=|<{0,2}) | [-~>]
     | \[(?:[\s\S]*?\]>)? )  # left shift / import / backcall / lef / words
| >>>?=?                     # rite shift
| [<>]\??=?                  # {less,greater}-than(-or-equal-to) / min/max
| !\?                        # inexistence
| \*\*=?                     # pow
| [^\s#]?
//g
SPACE     = /[^\n\S]*(?:#.*)?/g
MULTIDENT = /(?:\s*#.*)*(?:\n([^\n\S]*))*/g
SIMPLESTR = /'[^\\']*(?:\\[\s\S][^\\']*)*'|/g
BSTOKEN   = // \\ (?: (\S[^\s,;)}\]]*) | \s* ) //g
JSTOKEN   = /(`+)([^`][\s\S]*?)\1|/g

NUMBER = //
  0x[\dA-Fa-f][\dA-Fa-f_]*                 # hex
| ([2-9]|[12]\d|3[0-6]) r ([\dA-Za-z]\w*)  # 2~36 base
| ( (\d[\d_]*)(\.\d[\d_]*)? (?:e[+-]?\d[\d_]*)? ) [$\w]*
|//g
NUMBER_OMIT = /_+/g

REGEX = //
  /( [^ [ / \n \\ ]*                              # every other thing
     (?: (?: \\.                                  # anything escaped
           | \[ [^\]\n\\]* (?:\\.[^\]\n\\]*)* \]  # or character class
         ) [^ [ / \n \\ ]*                        # every other thing again
     )*
  )/ ([gimy]{1,4}|\$?)
|//g
HEREGEX_OMIT = /\s+(?:#.*)?/g

LASTDENT   = /\n[^\n\S]*$/
INLINEDENT = /[^\n\S]*[^#\s]?/g

NONASCII = /[\x80-\uFFFF]/

##### Tokens

# Tokens that signal the start/end of a balanced pair.
OPENERS = <[ ( [ { CALL( PARAM( INDENT ]>
CLOSERS = <[ ) ] } )CALL )PARAM DEDENT ]>

# The inverse mappings of {OPEN,CLOS}ERS to look things up from either end.
INVERSES = new -> @[@[o] = CLOSERS[i]] = o for o, i of OPENERS

# Tokens that can start a dot/call chain.
CHAIN = <[ ( { [ ID STRNUM LITERAL LET WITH WORDS ]>

# Tokens that can start an argument list.
ARG = CHAIN.concat <[ ... UNARY CREMENT PARAM( FUNCTION
                      IF SWITCH TRY CLASS RANGE LABEL DECL DO ]>

# Tokens that expect INDENT on the right.
BLOCK_USERS = <[ , : -> ELSE ASSIGN IMPORT UNARY DEFAULT TRY CATCH FINALLY
                 HURL DECL DO LET FUNCTION ]>