no-context · tjvr · Sep 19, 2018 · Sep 16, 2018 · Sep 16, 2018 · Sep 16, 2018
diff --git a/README.md b/README.md
@@ -175,13 +175,13 @@ Moo makes it convenient to define literals.
 
 It'll automatically compile them into regular expressions, escaping them where necessary.
 
-**Keywords** should be written using the `keywords` attribute.
+**Keywords** should be written using the `keywords` transform.
 
 ```js
     moo.compile({
-      IDEN: {match: /[a-zA-Z]+/, keywords: {
+      IDEN: {match: /[a-zA-Z]+/, type: moo.keywords({
         KW: ['while', 'if', 'else', 'moo', 'cows'],
-      }},
+      })},
       SPACE: {match: /\s+/, lineBreaks: true},
     })
 ```
@@ -209,11 +209,11 @@ Keywords can also have **individual types**.
 
 ```js
     let lexer = moo.compile({
-      name: {match: /[a-zA-Z]+/, keywords: {
+      name: {match: /[a-zA-Z]+/, type: moo.keywords({
         'kw-class': 'class',
         'kw-def': 'def',
         'kw-if': 'if',
-      }},
+      })},
       // ...
     })
     lexer.reset('def foo')

diff --git a/moo.js b/moo.js
@@ -93,15 +93,15 @@
         }
         continue
       }
-      if (!obj.name) {
-        throw new Error('Rule has no name: ' + JSON.stringify(obj))
+      if (!obj.type) {
+        throw new Error('Rule has no type: ' + JSON.stringify(obj))
       }
-      result.push(ruleOptions(obj.name, obj))
+      result.push(ruleOptions(obj.type, obj))
     }
     return result
   }
 
-  function ruleOptions(name, obj) {
+  function ruleOptions(type, obj) {
     if (!isObject(obj)) {
       obj = { match: obj }
     }
@@ -111,15 +111,15 @@
 
     // nb. error and fallback imply lineBreaks
     var options = {
-      tokenType: name,
+      defaultType: type,
       lineBreaks: !!obj.error || !!obj.fallback,
       pop: false,
       next: null,
       push: null,
       error: false,
       fallback: false,
       value: null,
-      getType: null,
+      type: null,
       shouldThrow: false,
     }
 
@@ -130,16 +130,18 @@
       }
     }
 
+    // type transform cannot be a string
+    if (typeof options.type === 'string' && type !== options.type) {
+      throw new Error("Type transform cannot be a string (type '" + options.type + "' for token '" + type + "')")
+    }
+
     // convert to array
     var match = options.match
     options.match = Array.isArray(match) ? match : match ? [match] : []
     options.match.sort(function(a, b) {
       return isRegExp(a) && isRegExp(b) ? 0
            : isRegExp(b) ? -1 : isRegExp(a) ? +1 : b.length - a.length
     })
-    if (options.keywords) {
-      options.getType = keywordTransform(options.keywords)
-    }
     return options
   }
 
@@ -166,9 +168,9 @@
         // errorRule can only be set once
         if (errorRule) {
           if (!options.fallback === !errorRule.fallback) {
-            throw new Error("Multiple " + (options.fallback ? "fallback" : "error") + " rules not allowed (for token '" + options.tokenType + "')")
+            throw new Error("Multiple " + (options.fallback ? "fallback" : "error") + " rules not allowed (for token '" + options.defaultType + "')")
           } else {
-            throw new Error("fallback and error are mutually exclusive (for token '" + options.tokenType + "')")
+            throw new Error("fallback and error are mutually exclusive (for token '" + options.defaultType + "')")
           }
         }
         errorRule = options
@@ -185,10 +187,10 @@
       // Warn about inappropriate state-switching options
       if (options.pop || options.push || options.next) {
         if (!hasStates) {
-          throw new Error("State-switching options are not allowed in stateless lexers (for token '" + options.tokenType + "')")
+          throw new Error("State-switching options are not allowed in stateless lexers (for token '" + options.defaultType + "')")
         }
         if (options.fallback) {
-          throw new Error("State-switching options are not allowed on fallback tokens (for token '" + options.tokenType + "')")
+          throw new Error("State-switching options are not allowed on fallback tokens (for token '" + options.defaultType + "')")
         }
       }
 
@@ -244,10 +246,10 @@
   function checkStateGroup(g, name, map) {
     var state = g && (g.push || g.next)
     if (state && !map[state]) {
-      throw new Error("Missing state '" + state + "' (in token '" + g.tokenType + "' of state '" + name + "')")
+      throw new Error("Missing state '" + state + "' (in token '" + g.defaultType + "' of state '" + name + "')")
     }
     if (g && g.pop && +g.pop !== 1) {
-      throw new Error("pop must be 1 (in token '" + g.tokenType + "' of state '" + name + "')")
+      throw new Error("pop must be 1 (in token '" + g.defaultType + "' of state '" + name + "')")
     }
   }
   function compileStates(states, start) {
@@ -342,7 +344,7 @@
       source += '}\n'
     }
     source += '}\n'
-    return Function('value', source) // getType
+    return Function('value', source) // type
   }
 
   /***************************************************************************/
@@ -500,8 +502,8 @@
     }
 
     var token = {
-      type: (group.getType && group.getType(text)) || group.tokenType,
-      value: group.value ? group.value(text) : text,
+      type: (typeof group.type === 'function' && group.type(text)) || group.defaultType,
+      value: typeof group.value === 'function' ? group.value(text) : text,
       text: text,
       toString: tokenToString,
       offset: offset,
@@ -558,19 +560,7 @@
   }
 
   Lexer.prototype.has = function(tokenType) {
-    for (var s in this.states) {
-      var state = this.states[s]
-      if (state.error && state.error.tokenType === tokenType) return true
-      var groups = state.groups
-      for (var i = 0; i < groups.length; i++) {
-        var group = groups[i]
-        if (group.tokenType === tokenType) return true
-        if (group.keywords && hasOwnProperty.call(group.keywords, tokenType)) {
-          return true
-        }
-      }
-    }
-    return false
+    return true
   }
 
 
@@ -579,6 +569,7 @@
     states: compileStates,
     error: Object.freeze({error: true}),
     fallback: Object.freeze({fallback: true}),
+    keywords: keywordTransform,
   }
 
 }));
diff --git a/test/test.js b/test/test.js
@@ -110,10 +110,10 @@ describe('compiler', () => {
 
   test('accepts rules in an array', () => {
     const lexer = compile([
-      { name: 'keyword', match: 'Bob'},
-      { name: 'word', match: /[a-z]+/},
-      { name: 'number', match: /[0-9]+/},
-      { name: 'space', match: / +/},
+      { type: 'keyword', match: 'Bob'},
+      { type: 'word', match: /[a-z]+/},
+      { type: 'number', match: /[0-9]+/},
+      { type: 'space', match: / +/},
     ])
     lexer.reset('Bob ducks are 123 bad')
     expect(lexer.next()).toMatchObject({type: 'keyword', value: 'Bob'})
@@ -304,22 +304,22 @@ describe('keywords', () => {
     }
 
     check(compile({
-      identifier: {match: /[a-zA-Z]+/, keywords: {keyword: 'class'}},
+      identifier: {match: /[a-zA-Z]+/, type: moo.keywords({keyword: 'class'})},
     }))
     check(compile({
-      identifier: {match: /[a-zA-Z]+/, keywords: {keyword: ['class']}},
+      identifier: {match: /[a-zA-Z]+/, type: moo.keywords({keyword: ['class']})},
     }))
   })
 
   test('keywords can have individual tokenTypes', () => {
     let lexer = compile({
       identifier: {
         match: /[a-zA-Z]+/,
-        keywords: {
+        type: moo.keywords({
           'kw-class': 'class',
           'kw-def': 'def',
           'kw-if': 'if',
-        },
+        }),
       },
       space: {match: /\s+/, lineBreaks: true},
     })
@@ -335,15 +335,95 @@ describe('keywords', () => {
     expect(() => compile({
       identifier: {
         match: /[a-zA-Z]+/,
-        keywords: {
+        type: moo.keywords({
           'kw-class': {foo: 'bar'},
-        },
+        }),
       },
     })).toThrow("keyword must be string (in keyword 'kw-class')")
   })
 
 })
 
+describe('type transforms', () => {
+
+  test('can use moo.keywords as type', () => {
+    let lexer = compile({
+      identifier: {
+        match: /[a-zA-Z]+/,
+        type: moo.keywords({
+          'kw-class': 'class',
+          'kw-def': 'def',
+          'kw-if': 'if',
+        }),
+      },
+      space: {match: /\s+/, lineBreaks: true},
+    })
+    lexer.reset('foo def')
+    expect(Array.from(lexer).map(t => t.type)).toEqual([
+        'identifier',
+        'space',
+        'kw-def',
+    ])
+  })
+
+  test('type can be a function', () => {
+    let lexer = compile({
+      identifier: {
+        match: /[a-zA-Z]+/,
+        type: () => 'moo',
+      },
+    })
+    lexer.reset('baa')
+    expect(lexer.next()).toMatchObject({ type: 'moo' })
+  })
+
+  test('supports case-insensitive keywords', () => {
+    const caseInsensitiveKeywords = map => {
+      const transform = moo.keywords(map)
+      return text => transform(text.toLowerCase())
+    }
+    let lexer = compile({
+      space: ' ',
+      identifier: {
+        match: /[a-zA-Z]+/,
+        type: caseInsensitiveKeywords({
+          keyword: ['moo'],
+        }),
+      },
+    })
+    lexer.reset('mOo')
+    expect(lexer.next()).toMatchObject({ type: 'keyword', value: 'mOo' })
+    lexer.reset('cheese')
+    expect(lexer.next()).toMatchObject({ type: 'identifier', value: 'cheese'})
+  })
+
+  test('cannot set type to a string', () => {
+    expect(() => compile({
+      identifier: {
+        type: 'moo',
+      },
+    })).toThrow("Type transform cannot be a string (type 'moo' for token 'identifier')")
+  })
+
+  test('can be used in an array', () => {
+    const lexer = compile([
+      { type: (name) => 'word-' + name, match: /[a-z]+/},
+      { type: 'space', match: / +/},
+    ])
+    lexer.reset('foo ')
+    expect(lexer.next()).toMatchObject({type: 'word-foo', value: 'foo'})
+    expect(lexer.next()).toMatchObject({type: 'space', value: ' '})
+  })
+
+  test('may result in questionable errors', () => {
+    const myTransform = function() {}
+    expect(() => compile([
+      { type: myTransform, next: 'moo'},
+    ])).toThrow("State-switching options are not allowed in stateless lexers (for token 'function () {}')")
+  })
+
+})
+
 describe('value transforms', () => {
 
   test('forbid capture groups', () => {
@@ -454,7 +534,7 @@ describe('lexer', () => {
     // TODO: why does toString() return the value?
     const lexer = compile({
       apples: 'a',
-      name: {match: /[a-z]/, keywords: { kw: ['m'] }},
+      name: {match: /[a-z]/, type: moo.keywords({ kw: ['m'] })},
     }).reset('azm')
     expect(String(lexer.next())).toBe('a')
     expect(String(lexer.next())).toBe('z')
@@ -498,27 +578,22 @@ describe('Lexer#has', () => {
     expect(basicLexer.has('error')).toBe(true)
   })
 
-  test('returns false for nonexistent junk', () => {
-    expect(basicLexer.has('random')).toBe(false)
-  })
-
-  test('returns false for stuff inherited from Object', () => {
-    expect(basicLexer.has('hasOwnProperty')).toBe(false)
+  test('returns true even for nonexistent junk', () => {
+    expect(basicLexer.has('random')).toBe(true)
   })
 
   const keywordLexer = compile({
     identifier: {
       match: /[a-zA-Z]+/,
-      keywords: {
+      type: moo.keywords({
         'kw-class': 'class',
         'kw-def': 'def',
         'kw-if': 'if',
-      },
+      }),
     },
   })
 
-  test('works with keywords', () => {
-    expect(keywordLexer.has('identifier')).toBe(true)
+  test("returns true even for keywords", () => {
     expect(keywordLexer.has('kw-class')).toBe(true)
   })
 
@@ -550,20 +625,12 @@ describe('Lexer#has', () => {
     expect(statefulLexer.has('interp')).toEqual(true)
   })
 
-	test('works with error tokens - for first state', () => {
-		expect(statefulLexer.has('mainErr')).toEqual(true)
-	})
-
-	test('works with error tokens - for second state', () => {
-		expect(statefulLexer.has('litErr')).toEqual(true)
-	})
-
-  test('returns false for the state names themselves', () => {
-    expect(statefulLexer.has('main')).toEqual(false)
+  test('works with error tokens - for first state', () => {
+    expect(statefulLexer.has('mainErr')).toEqual(true)
   })
 
-  test('returns false for stuff inherited from Object when using states', () => {
-    expect(statefulLexer.has('toString')).toEqual(false)
+  test('works with error tokens - for second state', () => {
+    expect(statefulLexer.has('litErr')).toEqual(true)
   })
 
 })
@@ -857,7 +924,6 @@ describe('errors', () => {
       digits: /[0-9]+/,
       error: moo.error,
     })
-    expect(lexer.error).toMatchObject({tokenType: 'error'})
     lexer.reset('123foo')
     expect(lexer.next()).toMatchObject({type: 'digits', value: '123'})
     expect(lexer.next()).toMatchObject({type: 'error', value: 'foo', offset: 3})