Skip to content

Commit

Permalink
Leave invisible characters in string fields
Browse files Browse the repository at this point in the history
  • Loading branch information
garethbowen committed Aug 15, 2022
1 parent 4382617 commit 3d00f5f
Show file tree
Hide file tree
Showing 4 changed files with 99 additions and 44 deletions.
20 changes: 16 additions & 4 deletions api/src/services/records.js
Expand Up @@ -171,22 +171,34 @@ const createByForm = (data, { locale }={}) => {
throw new PublicError('Missing required field: message');
}

data.message = data.message.replace(ZERO_WIDTH_UNICODE_CHARACTERS, '');

const rawMessage = data.message;
let formCode = smsparser.getFormCode(data.message);
let formDefinition = getForm(formCode);
if (formCode && !formDefinition) {
// try again, this time without invisible characters
const cleaned = formCode.replace(ZERO_WIDTH_UNICODE_CHARACTERS, '');
formDefinition = getForm(cleaned);
if (formDefinition) {
// update the message to use the correct form code
data.message = data.message.replace(formCode, cleaned);
formCode = cleaned;
}
}
const content = {
type: 'sms_message',
message: data.message,
form: smsparser.getFormCode(data.message),
form: formCode,
reported_date: data.sent_timestamp,
locale: data.locale || locale,
from: data.from,
gateway_ref: data.gateway_ref,
};
const formDefinition = getForm(content.form);

let formData;
if (content.form && formDefinition) {
formData = smsparser.parse(formDefinition, data);
}
content.message = rawMessage; // reset this to the original message now that parsing is complete
return getDataRecord(formData, content);
};

Expand Down
85 changes: 50 additions & 35 deletions api/src/services/report/smsparser.js
Expand Up @@ -10,6 +10,9 @@ const moment = require('moment');
const bs = require('bikram-sambat');

const MUVUKU_REGEX = /^\s*([A-Za-z]?\d)!.+!.+/;
// matches invisible characters that can mess up our parsing
// specifically: u200B, u200C, u200D, uFEFF
const ZERO_WIDTH_UNICODE_CHARACTERS = /[\u200B-\u200D\uFEFF]/g;

// Devanagari
const T_TABLE = {
Expand Down Expand Up @@ -44,6 +47,13 @@ const regexEscape = s => {
return s.replace(/[-/\\^$*+?.()|[\]{}]/g, '\\$&');
};

const stripInvisibleCharacters = s => {
if (typeof s !== 'string') {
return s;
}
return s.replace(ZERO_WIDTH_UNICODE_CHARACTERS, '');
};

// Remove the form code from the beginning of the message since it does
// not belong to the TextForms format but is just a convention to
// identify the message.
Expand Down Expand Up @@ -140,14 +150,14 @@ const getFieldByType = (def, type) => {

const lower = str => (str && str.toLowerCase ? str.toLowerCase() : str);

exports.parseField = (field, raw) => {
switch (field.type) {
case 'integer':
const fieldParsers = {
integer: (raw, field) => {
// store list value since it has more meaning.
// TODO we don't have locale data inside this function so calling
// translate does not resolve locale.
const cleaned = stripInvisibleCharacters(String(raw));
if (field.list) {
const item = field.list.find(item => String(item[0]) === String(raw));
const item = field.list.find(item => String(item[0]) === cleaned);
if (!item) {
logger.warn(
`Option not available for ${JSON.stringify(raw)} in list.`
Expand All @@ -156,44 +166,37 @@ exports.parseField = (field, raw) => {
}
return config.translate(item[1]);
}
return parseNum(raw);
case 'string':
if (raw === undefined) {
return;
}
if (raw === '') {
return null;
}
return parseNum(cleaned);
},
string: (raw, field, key) => {
if (field.list) {
const cleaned = stripInvisibleCharacters(raw);
for (const i of field.list) {
const item = field.list[i];
if (item[0] === raw) {
if (item[0] === cleaned) {
return item[1];
}
}
logger.warn(`Option not available for ${raw} in list.`);
} else if (key === 'patient_id' || key === 'place_id') {
// special handling for string IDs which must be [0-9]
return stripInvisibleCharacters(raw);
}
return raw;
case 'date':
if (!raw) {
return null;
}
},
date: (raw) => {
// YYYY-MM-DD assume muvuku format for now
// store in milliseconds since Epoch
return moment(raw).valueOf();
case 'bsDate': {
if (!raw) {
return null;
}
const separator = raw[raw.search(/[^0-9]/)];//non-numeric character
const dateParts = raw.split(separator);
return moment(stripInvisibleCharacters(raw)).valueOf();
},
bsDate: (raw) => {
const cleaned = stripInvisibleCharacters(raw);
const separator = cleaned[cleaned.search(/[^0-9]/)];//non-numeric character
const dateParts = cleaned.split(separator);
return bsToEpoch(...dateParts);
}
case 'boolean': {
if (raw === undefined) {
return;
}
const val = parseNum(raw);
},
boolean: (raw) => {
const val = parseNum(stripInvisibleCharacters(raw));
if (val === 1) {
return true;
}
Expand All @@ -202,14 +205,26 @@ exports.parseField = (field, raw) => {
}
// if we can't parse a number then return null
return null;
}
case 'month':
},
month: (raw) => {
// keep months integers, not their list value.
return parseNum(raw);
default:
return parseNum(stripInvisibleCharacters(raw));
}
};

exports.parseField = (field, raw, key) => {
const parser = fieldParsers[field.type];
if (!parser) {
logger.warn(`Unknown field type: ${field.type}`);
return raw;
}
if (raw === undefined) {
return;
}
if (raw === '') {
return null;
}
return parser(raw, field, key);
};

/**
Expand Down Expand Up @@ -261,7 +276,7 @@ exports.parse = (def, doc) => {
// parse field types and resolve dot notation keys
for (const k of Object.keys(def.fields)) {
if (msgData[k] || addOmittedFields) {
const value = exports.parseField(def.fields[k], msgData[k]);
const value = exports.parseField(def.fields[k], msgData[k], k);
createDeepKey(formData, k.split('.'), value);
}
}
Expand Down
36 changes: 32 additions & 4 deletions api/tests/mocha/services/records.spec.js
Expand Up @@ -54,14 +54,42 @@ describe('records service', () => {
});

it('strips unicode whitespace from textforms submission - #7654', () => {
// contains a zero width invisible unicode characters which should be stripped out
const message = 'A\u200B B\u200C C\u200D';
sinon.stub(config, 'get').returns(definitions.forms);

const formDefinition = 'YY\u200BYY'; // strip the invisible character to match form defn
const facilityId = 'Faci\u200Clity'; // string value - do not strip
const year = '19\u200C99'; // integer value - strip
const month = '1\u200D2'; // enum value - strip to match
const misoprostol = '1\uFEFF'; // boolean value with trailing invisible character

const message = `${formDefinition} ${facilityId} ${year} ${month} ${misoprostol}`;
const actual = service.createByForm({
message,
from: '+123'
});
chai.expect(actual.sms_message.message).to.equal(message); // the given string is unchanged
chai.expect(actual.sms_message.form).to.equal('YYYY'); // correct form is found
chai.expect(actual.fields.facility_id).to.equal(facilityId); // character not stripped from string
chai.expect(actual.fields.year).to.equal(1999); // integers parse
chai.expect(actual.fields.month).to.equal('December'); // list items are found
chai.expect(actual.fields.misoprostol_administered).to.equal(true); // booleans parse
});

it('strips unicode whitespace from textforms submission patient_id and place_id fields - #7676', () => {
sinon.stub(config, 'get').returns(definitions.forms);

const formDefinition = 'YYYR';
const patientId = '012\u200C34'; // patient_id should be stripped
const bsYear = '2068';

const message = `${formDefinition} ${patientId} ${bsYear}`;
const actual = service.createByForm({
message,
from: '+123'
});
chai.expect(actual.sms_message.message).to.equal('A B C');
chai.expect(actual.sms_message.form).to.equal('A');
chai.expect(actual.sms_message.message).to.equal(message);
chai.expect(actual.sms_message.form).to.equal('YYYR');
chai.expect(actual.fields.patient_id).to.equal('01234'); // character stripped from patient_id
});

it('create json', () => {
Expand Down
2 changes: 1 addition & 1 deletion api/tests/mocha/services/report/smsparser.spec.js
Expand Up @@ -1000,7 +1000,7 @@ describe('sms parser', () => {
chai.expect(actual).to.deep.equal({
facility_id: 'fa\\cility#2#3',
year: undefined,
month: null,
month: undefined,
misoprostol_administered: undefined,
quantity_dispensed: {
la_6x1: undefined,
Expand Down

0 comments on commit 3d00f5f

Please sign in to comment.