mapbox · miccolis · Dec 6, 2018 · Nov 30, 2018 · Dec 3, 2018 · Dec 6, 2018
diff --git a/API.md b/API.md
@@ -79,7 +79,7 @@ slightly in specific field names and types.
 
 -   `phrase` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The matched string
 -   `weight` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** A float between 0 and 1 representing how much of the query this string covers
--   `prefix` **[Boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** whether or not to do a prefix scan (as opposed to an exact match scan); used for autocomplete
+-   `prefix` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** whether or do an exact match (0), prefix scan(1), or word boundary scan(2); used for autocomplete
 -   `idx` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** an identifier of the index the match came from; opaque to carmen-cache but returned in results
 -   `zoom` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** the configured tile zoom level for the index
 -   `mask` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** a bitmask representing which tokens in the original query the subquery covers
@@ -142,7 +142,7 @@ Retrieves data exactly matching phrase and language settings by id
 **Parameters**
 
 -   `id` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 
--   `matches_prefixes` **[Boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** : T if it matches exactly, F: if it does not
+-   `matches_prefixes` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** whether or do an exact match (0), prefix scan(1), or word boundary scan(2); used for autocomplete
 -   `optional` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)** ; array of languages
 
 **Examples**
@@ -185,7 +185,7 @@ and with a relevance penalty applied to languages that don't match those request
 **Parameters**
 
 -   `id` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 
--   `matches_prefixes` **[Boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** : T if it matches exactly, F: if it does not
+-   `matches_prefixes` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** whether or do an exact match (0), prefix scan(1), or word boundary scan(2); used for autocomplete
 -   `optional` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)** ; array of languages
 
 **Examples**
@@ -368,7 +368,7 @@ Retrieves data exactly matching phrase and language settings by id
 **Parameters**
 
 -   `id` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 
--   `matches_prefixes` **[Boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** : T if it matches exactly, F: if it does not
+-   `matches_prefixes` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** whether or do an exact match (0), prefix scan(1), or word boundary scan(2); used for autocomplete
 -   `optional` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)** ; array of languages
 
 **Examples**
@@ -390,7 +390,7 @@ Retrieves grid that at least partially matches phrase and/or language inputs
 **Parameters**
 
 -   `id` **[String](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** 
--   `matches_prefixes` **[Boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)** : T if it matches exactly, F: if it does not
+-   `matches_prefixes` **[Number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** whether or do an exact match (0), prefix scan(1), or word boundary scan(2); used for autocomplete
 -   `optional` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)** ; array of languages
 
 **Examples**

diff --git a/bench/coalesce.bench.test.js b/bench/coalesce.bench.test.js
@@ -14,7 +14,7 @@ const test = require('tape');
         zoom: 14,
         weight: 1,
         phrase: '3848571113',
-        prefix: false,
+        prefix: 0,
         mask: 1 << 0
     }];
     test('coalesceSingle', (t) => {
@@ -84,15 +84,15 @@ const test = require('tape');
         zoom: 12,
         weight: 0.25,
         phrase: '1965155344',
-        prefix: false
+        prefix: 0
     }, {
         cache: b,
         mask: 1 << 1,
         idx: 1,
         zoom: 14,
         weight: 0.75,
         phrase: '3848571113',
-        prefix: false
+        prefix: 0
     }];
     test('coalesceMulti', (t) => {
         const time = +new Date;

diff --git a/index.js b/index.js
@@ -1,2 +1,8 @@
 'use strict';
 exports = module.exports = require('./lib/carmen.node');
+
+exports.PREFIX_SCAN = {
+    disabled: 0,
+    enabled: 1,
+    word_boundary: 2
+};
diff --git a/src/binding.cpp b/src/binding.cpp
@@ -225,7 +225,6 @@ NAN_METHOD(JSCache<MemoryCache>::New) {
   * @name get
   * @memberof JSCache
   * @param {String} id
-  * @param {Boolean} matches_prefixes: T if it matches exactly, F: if it does not
   * @param {Array} optional; array of languages
   * @returns {Array} integers referring to grids
   * @example
@@ -287,7 +286,7 @@ NAN_METHOD(JSCache<T>::_get) {
  * @name get
  * @memberof JSCache
  * @param {String} id
- * @param {Boolean} matches_prefixes: T if it matches exactly, F: if it does not
+ * @param {Number} matches_prefix - whether or do an exact match (0), prefix scan(1), or word boundary scan(2); used for autocomplete 
  * @param {Array} optional; array of languages
  * @returns {Array} integers referring to grids
  * @example
@@ -307,8 +306,8 @@ NAN_METHOD(JSCache<T>::_getmatching) {
     if (!info[0]->IsString()) {
         return Nan::ThrowTypeError("first arg must be a String");
     }
-    if (!info[1]->IsBoolean()) {
-        return Nan::ThrowTypeError("second arg must be a Bool");
+    if (!info[1]->IsNumber()) {
+        return Nan::ThrowTypeError("second arg must be an integer between 0 - 2");
     }
     try {
         Nan::Utf8String utf8_id(info[0]);
@@ -317,12 +316,16 @@ NAN_METHOD(JSCache<T>::_getmatching) {
         }
         std::string id(*utf8_id);
 
-        bool match_prefixes = info[1]->BooleanValue();
+        int32_t int32_prefix = info[1]->Int32Value();
+        if (int32_prefix < 0 || int32_prefix > 2) {
+            return Nan::ThrowTypeError("second arg must be an integer between 0 - 2");
+        }
+        PrefixMatch match_prefixes = static_cast<PrefixMatch>(int32_prefix);
 
         langfield_type langfield;
         if (info.Length() > 2 && !(info[2]->IsNull() || info[2]->IsUndefined())) {
             if (!info[2]->IsArray()) {
-                return Nan::ThrowTypeError("third arg, if supplied must be an Array");
+                return Nan::ThrowTypeError("third arg, if supplied, must be an Array");
             }
             langfield = langarrayToLangfield(Local<Array>::Cast(info[2]));
         } else {
@@ -419,7 +422,7 @@ NAN_METHOD(JSCache<MemoryCache>::_set) {
  * @type {Object}
  * @property {String} phrase - The matched string
  * @property {Number} weight - A float between 0 and 1 representing how much of the query this string covers
- * @property {Boolean} prefix - whether or not to do a prefix scan (as opposed to an exact match scan); used for autocomplete
+ * @property {Number} prefix - whether or do an exact match (0), prefix scan(1), or word boundary scan(2); used for autocomplete
  * @property {Number} idx - an identifier of the index the match came from; opaque to carmen-cache but returned in results
  * @property {Number} zoom - the configured tile zoom level for the index
  * @property {Number} mask - a bitmask representing which tokens in the original query the subquery covers
@@ -514,7 +517,7 @@ NAN_METHOD(JSCoalesce) {
 
             double weight;
             std::string phrase;
-            bool prefix;
+            PrefixMatch prefix;
             unsigned short idx;
             unsigned short zoom;
             uint32_t mask;
@@ -581,10 +584,15 @@ NAN_METHOD(JSCoalesce) {
                 return Nan::ThrowTypeError("missing prefix property");
             } else {
                 Local<Value> prop_val = jsStack->Get(Nan::New("prefix").ToLocalChecked());
-                if (!prop_val->IsBoolean()) {
-                    return Nan::ThrowTypeError("prefix value must be a boolean");
+                if (!prop_val->IsNumber()) {
+                    return Nan::ThrowTypeError("prefix value must be a integer between 0 - 2");
+                }
+
+                int32_t int32_prefix = prop_val->Int32Value();
+                if (int32_prefix < 0 || int32_prefix > 2) {
+                    return Nan::ThrowTypeError("prefix value must be a integer between 0 - 2");
                 }
-                prefix = prop_val->BooleanValue();
+                prefix = static_cast<PrefixMatch>(int32_prefix);
             }
 
             if (!jsStack->Has(Nan::New("mask").ToLocalChecked())) {

diff --git a/src/cpp_util.hpp b/src/cpp_util.hpp
@@ -43,6 +43,12 @@ class noncopyable {
     noncopyable& operator=(noncopyable const&) = delete;
 };
 
+typedef enum {
+    disabled,
+    enabled,
+    word_boundary
+} PrefixMatch;
+
 typedef unsigned __int128 langfield_type;
 constexpr uint64_t LANGUAGE_MATCH_BOOST = static_cast<const uint64_t>(1) << 63;
 
@@ -70,7 +76,7 @@ struct PhrasematchSubq {
                     char t,
                     double w,
                     std::string p,
-                    bool pf,
+                    PrefixMatch pf,
                     unsigned short i,
                     unsigned short z,
                     uint32_t m,
@@ -87,7 +93,7 @@ struct PhrasematchSubq {
     char type;
     double weight;
     std::string phrase;
-    bool prefix;
+    PrefixMatch prefix;
     unsigned short idx;
     unsigned short zoom;
     uint32_t mask;

diff --git a/src/memorycache.cpp b/src/memorycache.cpp
@@ -18,11 +18,11 @@ intarray MemoryCache::__get(const std::string& phrase, langfield_type langfield)
     return array;
 }
 
-intarray MemoryCache::__getmatching(const std::string& phrase_ref, bool match_prefixes, langfield_type langfield) {
+intarray MemoryCache::__getmatching(const std::string& phrase_ref, PrefixMatch match_prefixes, langfield_type langfield) {
     intarray array;
     std::string phrase = phrase_ref;
 
-    if (!match_prefixes) phrase.push_back(LANGFIELD_SEPARATOR);
+    if (match_prefixes == PrefixMatch::disabled) phrase.push_back(LANGFIELD_SEPARATOR);
     size_t phrase_length = phrase.length();
     const char* phrase_data = phrase.data();
     // Load values from memory cache
@@ -34,6 +34,12 @@ intarray MemoryCache::__getmatching(const std::string& phrase_ref, bool match_pr
         if (item_length < phrase_length) continue;
 
         if (memcmp(phrase_data, item_data, phrase_length) == 0) {
+            if (match_prefixes == PrefixMatch::word_boundary) {
+                size_t end = phrase_length;
+                if (item_data[end] != LANGFIELD_SEPARATOR && item_data[end] != ' ') {
+                    continue;
+                }
+            }
             langfield_type message_langfield = extract_langfield(item.first);
 
             if ((message_langfield & langfield) != 0u) {

diff --git a/src/memorycache.hpp b/src/memorycache.hpp
@@ -16,10 +16,10 @@ class MemoryCache {
     void _set(std::string key_id, std::vector<uint64_t>, langfield_type langfield, bool append);
 
     std::vector<uint64_t> _get(std::string& phrase, std::vector<uint64_t> languages);
-    std::vector<uint64_t> _getmatching(std::string phrase, bool match_prefixes, std::vector<uint64_t> languages);
+    std::vector<uint64_t> _getmatching(std::string phrase, PrefixMatch match_prefixes, std::vector<uint64_t> languages);
 
     std::vector<uint64_t> __get(const std::string& phrase, langfield_type langfield);
-    std::vector<uint64_t> __getmatching(const std::string& phrase_ref, bool match_prefixes, langfield_type langfield);
+    std::vector<uint64_t> __getmatching(const std::string& phrase_ref, PrefixMatch match_prefixes, langfield_type langfield);
 
     arraycache cache_;
 };

diff --git a/src/rocksdbcache.cpp b/src/rocksdbcache.cpp
@@ -18,20 +18,27 @@ intarray RocksDBCache::__get(const std::string& phrase, langfield_type langfield
     return array;
 }
 
-intarray RocksDBCache::__getmatching(const std::string& phrase_ref, bool match_prefixes, langfield_type langfield) {
+intarray RocksDBCache::__getmatching(const std::string& phrase_ref, PrefixMatch match_prefixes, langfield_type langfield) {
     intarray array;
     std::string phrase = phrase_ref;
 
-    if (!match_prefixes) {
+    if (match_prefixes == PrefixMatch::disabled) {
         phrase.push_back(LANGFIELD_SEPARATOR);
     }
+
     size_t phrase_length = phrase.length();
+    if (match_prefixes == PrefixMatch::word_boundary) {
+        // If we're looking for a word boundary we need have one more character
+        // available than the phrase is long. Incrementing this lengh ensures we
+        // don't use a prefix cache that could cut off the word break.
+        phrase_length++;
+    }
 
     // Load values from message cache
     std::vector<std::tuple<std::string, bool>> messages;
     std::vector<sortableGrid> grids;
 
-    if (match_prefixes) {
+    if (match_prefixes != PrefixMatch::disabled) {
         // if this is an autocomplete scan, use the prefix cache
         if (phrase_length <= MEMO_PREFIX_LENGTH_T1) {
             phrase = "=1" + phrase.substr(0, MEMO_PREFIX_LENGTH_T1);
@@ -46,6 +53,15 @@ intarray RocksDBCache::__getmatching(const std::string& phrase_ref, bool match_p
     for (rit->Seek(phrase); rit->Valid() && rit->key().ToString().compare(0, phrase.size(), phrase) == 0; rit->Next()) {
         std::string key = rit->key().ToString();
 
+        if (match_prefixes == PrefixMatch::word_boundary) {
+            // Read one character beyond the input prefix length, should always
+            // be safe because of the LANGFIELD_SEPARATOR
+            char endChar = key.at(phrase.length());
+            if (endChar != LANGFIELD_SEPARATOR && endChar != ' ') {
+                continue;
+            }
+        }
+
         // grab the langfield from the end of the key
         langfield_type message_langfield = extract_langfield(key);
         auto matches_language = static_cast<bool>(message_langfield & langfield);

diff --git a/src/rocksdbcache.hpp b/src/rocksdbcache.hpp
@@ -76,7 +76,7 @@ class RocksDBCache {
     std::vector<std::pair<std::string, langfield_type>> list();
 
     std::vector<uint64_t> __get(const std::string& phrase, langfield_type langfield);
-    std::vector<uint64_t> __getmatching(const std::string& phrase_ref, bool match_prefixes, langfield_type langfield);
+    std::vector<uint64_t> __getmatching(const std::string& phrase_ref, PrefixMatch match_prefixes, langfield_type langfield);
 
     std::shared_ptr<rocksdb::DB> db;
 };

diff --git a/test/coalesce.proximity.test.js b/test/coalesce.proximity.test.js
@@ -7,6 +7,7 @@
 const MemoryCache = require('../index.js').MemoryCache;
 const Grid = require('./grid.js');
 const coalesce = require('../index.js').coalesce;
+const scan = require('../index.js').PREFIX_SCAN;
 const test = require('tape');
 
 (function() {
@@ -56,7 +57,7 @@ const test = require('tape');
             zoom: 14,
             weight: 1,
             phrase: '1',
-            prefix: false
+            prefix: scan.disabled
         }], {
             radius: 200,
             centerzxy: [14, 100 + 10, 100 + 15]
@@ -76,7 +77,7 @@ const test = require('tape');
             zoom: 14,
             weight: 1,
             phrase: '1',
-            prefix: false
+            prefix: scan.disabled
         }], {
             radius: 200,
             centerzxy: [14, 100 + 10, 100 - 15]
@@ -96,7 +97,7 @@ const test = require('tape');
             zoom: 14,
             weight: 1,
             phrase: '1',
-            prefix: false
+            prefix: scan.disabled
         }], {
             radius: 200,
             centerzxy: [14, 100 - 10, 100 - 15]
@@ -116,7 +117,7 @@ const test = require('tape');
             zoom: 14,
             weight: 1,
             phrase: '1',
-            prefix: false
+            prefix: scan.disabled
         }], {
             radius: 200,
             centerzxy: [14, 100 - 10, 100 + 15]