Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

preserve trailing word boundaries #1545

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions sanitizer/_text.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ function _sanitize( raw, clean ){
// remove superfluous whitespace and quotes
text = _.trim(_.trim(raw.text), QUOTES);

// one or more characters have been removed from the end of the text
// add an artificial space to preserve the word boundary
// see: https://github.com/pelias/api/issues/1544
if (!_.isEmpty(text) && !_.toString(raw.text).endsWith(text)) {
text += ' ';
}

// validate input 'text'
if( !_.isString(text) || _.isEmpty(text) ){
messages.errors.push(`invalid param 'text': text length, must be >0`);
Expand Down
16 changes: 8 additions & 8 deletions test/unit/sanitizer/_text.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ module.exports.tests.text_parser = function(test, common) {
var raw = { text: ` test \n ` };
const messages = sanitizer.sanitize(raw, clean);

t.equals(clean.text, 'test');
t.equals(clean.text, 'test ');
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');

Expand All @@ -63,7 +63,7 @@ module.exports.tests.text_parser = function(test, common) {
var raw = { text: ` "test" \n ` };
const messages = sanitizer.sanitize(raw, clean);

t.equals(clean.text, 'test');
t.equals(clean.text, 'test ');
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');

Expand All @@ -75,7 +75,7 @@ module.exports.tests.text_parser = function(test, common) {
var raw = { text: ` 'test' \n ` };
const messages = sanitizer.sanitize(raw, clean);

t.equals(clean.text, 'test');
t.equals(clean.text, 'test ');
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');

Expand All @@ -87,7 +87,7 @@ module.exports.tests.text_parser = function(test, common) {
var raw = { text: ` „test“ \n ` };
const messages = sanitizer.sanitize(raw, clean);

t.equals(clean.text, 'test');
t.equals(clean.text, 'test ');
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');

Expand All @@ -99,7 +99,7 @@ module.exports.tests.text_parser = function(test, common) {
var raw = { text: ` »test« \n ` };
const messages = sanitizer.sanitize(raw, clean);

t.equals(clean.text, 'test');
t.equals(clean.text, 'test ');
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');

Expand All @@ -111,7 +111,7 @@ module.exports.tests.text_parser = function(test, common) {
var raw = { text: ` ﹁「test」﹂ \n ` };
const messages = sanitizer.sanitize(raw, clean);

t.equals(clean.text, 'test');
t.equals(clean.text, 'test ');
t.deepEquals(messages.errors, [], 'no errors');
t.deepEquals(messages.warnings, [], 'no warnings');

Expand Down Expand Up @@ -142,9 +142,9 @@ module.exports.tests.text_parser = function(test, common) {
test('should truncate very long text inputs', (t) => {
const raw = { text: `
Sometimes we make the process more complicated than we need to.
We will never make a journey of a thousand miles by fretting about
We will never make a journey of a thousand miles by fretting about
how long it will take or how hard it will be.
We make the journey by taking each day step by step and then repeating
We make the journey by taking each day step by step and then repeating
it again and again until we reach our destination.` };
const clean = {};
const messages = sanitizer.sanitize(raw, clean);
Expand Down