Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix luis converter to enable escaping specific chars #1111

Merged
merged 3 commits into from
Feb 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 4 additions & 3 deletions packages/lu/src/parser/lufile/visitor.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class Visitor {
let utterance = '';
let entities = [];
let errorMsgs = [];
for (const node of ctx.children) {
for (const [index, node] of ctx.children.entries()) {
const innerNode = node;
switch (innerNode.symbol.type) {
case lp.DASH: break;
Expand All @@ -23,7 +23,7 @@ class Visitor {
}
case lp.ESCAPE_CHARACTER: {
let escapeCharacters = innerNode.getText();
let escapedUtterace = escapeCharacters.length > 1 && EscapeCharsInUtterance.includes(escapeCharacters[1]) ? escapeCharacters.slice(1) : escapeCharacters;
let escapedUtterace = escapeCharacters.length > 1 && (EscapeCharsInUtterance.includes(escapeCharacters[1]) || (escapeCharacters[1] === '\\' && index + 1 < ctx.children.length && ctx.children[index + 1].symbol.type === lp.EXPRESSION)) ? escapeCharacters.slice(1) : escapeCharacters;
utterance = utterance.concat(escapedUtterace);
break;
}
Expand Down Expand Up @@ -98,7 +98,8 @@ class Visitor {
let expChars = exp.split('');
let escapeChar = false;
expChars.forEach(function (char, index) {
if (char === '\\' && expChars.length > index + 1 && EscapeCharsInUtterance.includes(expChars[index + 1])) {
if (char === '\\' && !escapeChar && expChars.length > index + 1
&& (EscapeCharsInUtterance.includes(expChars[index + 1]) || expChars[index + 1] === '\\')) {
escapeChar = true;
} else if (char === '{' && !escapeChar) {
let newEntity = {entityName : '', role : '', entityValue : undefined, parent : curEntity};
Expand Down
30 changes: 28 additions & 2 deletions packages/lu/src/parser/luis/luConverter.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
const NEWLINE = require('os').EOL;
const helperClasses = require('./../lufile/classes/hclasses')
const EntityTypeEnum = require('./../utils/enums/luisEntityTypes');
const EscapeCharsInUtterance = require('./../utils/enums/escapechars').EscapeCharsInUtterance;
const helpers = require('./../utils/helpers');

/**
* Parses a Luis object into Lu Content
Expand Down Expand Up @@ -89,7 +91,7 @@ const parseUtterancesToLu = function(utterances, luisJSON){
if(luisJSON.test === true && utterance.predictedResult !== undefined){
fileContent += parsePredictedResultToLu(utterance, luisJSON)
}
if(utterance.entities.length >= 0) {
if(utterance.entities.length > 0) {
// update utterance for each entity
let text = utterance.text;
// flatten entities
Expand All @@ -99,10 +101,25 @@ const parseUtterancesToLu = function(utterances, luisJSON){
// remove all children
sortedEntitiesList.forEach(entity => delete entity.children);
let tokenizedText = text.split('');
tokenizedText.forEach(function (token, index) {
tokenizedText[index] = EscapeCharsInUtterance.includes(token) ? `\\${token}` : token;
});
// handle cases where we have both child as well as cases where more than one entity can have the same start position
// if there are multiple entities in the same start position, then order them by composite, nDepth, regular entity
getEntitiesByPositionList(sortedEntitiesList, tokenizedText);
updatedText = tokenizedText.join('');
} else {
// will not add escape char for pattern utterances since brackets are strictly used in pattern
// so there are no exceptions that need to be handled in pattern
if (helpers.isUtterancePattern(utterance)) {
updatedText = utterance.text;
} else {
let tokenizedText = utterance.text.split('');
tokenizedText.forEach(function (token, index) {
tokenizedText[index] = EscapeCharsInUtterance.includes(token) ? `\\${token}` : token;
});
updatedText = tokenizedText.join('');
}
}

// remove duplicated whitespaces between words inside utterance to make sure they are aligned with the luis portal
Expand Down Expand Up @@ -138,7 +155,16 @@ const updateTokenizedTextByEntity = function(tokenizedText, entity) {
} else {
tokenizedText[parseInt(entity.startPos)] = `{@${entity.entity}=${tokenizedText[parseInt(entity.startPos)]}`;
}
tokenizedText[parseInt(entity.endPos)] = tokenizedText[parseInt(entity.endPos)] + '}';

// check blackslash before entity definition
// blackslash before { or } will be reconized to escape { or }
// to avoid such escape, add another blackslash before blackslash
if (parseInt(entity.startPos) > 0 && tokenizedText[parseInt(entity.startPos) - 1] === '\\') {
tokenizedText[parseInt(entity.startPos) - 1] += '\\'
}

tokenizedText[parseInt(entity.endPos)] = tokenizedText[parseInt(entity.endPos)] === '\\' ?
tokenizedText[parseInt(entity.endPos)] + '\\}' : tokenizedText[parseInt(entity.endPos)] + '}';
}

const parsePredictedResultToLu = function(utterance, luisJSON){
Expand Down
2 changes: 1 addition & 1 deletion packages/lu/src/parser/utils/enums/escapechars.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
*/
// Escape chars in utterance
module.exports = {
EscapeCharsInUtterance: ['{', '}', '\\']
EscapeCharsInUtterance: ['{', '}']
};
8 changes: 8 additions & 0 deletions packages/lu/test/commands/luis/convert.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,14 @@ describe('luis:convert', () => {
await assertToLu('./../../fixtures/verified/nDepthEntityInUtterance.json', './../../fixtures/verified/nDepthEntityInUtterance.lu')
})

it('luis:convert successfully reconstructs a markdown file from a LUIS input file (with escape characters in utterances)', async () => {
await assertToLu('./../../fixtures/verified/escapeCharactersInUtterances.json', './../../fixtures/verified/escapeCharactersInUtterances.lu')
})

it('luis:convert Utterances with escape characters correctly', async () => {
await assertToJSON('./../../fixtures/verified/escapeCharactersInUtterances.lu', './../../fixtures/verified/escapeCharactersInUtterances.json')
})

it('luis:convert Simple intent and utterances are parsed correctly', async () => {
await assertToJSON('./../../fixtures/examples/1.lu', './../../fixtures/verified/1.json', '1')
})
Expand Down