From d0ef508eb81c4d96371a1227225ebd84eb0a324c Mon Sep 17 00:00:00 2001 From: Fei Chen Date: Mon, 8 Feb 2021 20:11:46 +0800 Subject: [PATCH 1/2] fix luis converter --- packages/lu/src/parser/lufile/visitor.js | 3 +- packages/lu/src/parser/luis/luConverter.js | 30 +++++++++++++++++-- .../lu/src/parser/utils/enums/escapechars.js | 2 +- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/packages/lu/src/parser/lufile/visitor.js b/packages/lu/src/parser/lufile/visitor.js index 3f4f8925b..05e6d9daf 100644 --- a/packages/lu/src/parser/lufile/visitor.js +++ b/packages/lu/src/parser/lufile/visitor.js @@ -98,7 +98,8 @@ class Visitor { let expChars = exp.split(''); let escapeChar = false; expChars.forEach(function (char, index) { - if (char === '\\' && expChars.length > index + 1 && EscapeCharsInUtterance.includes(expChars[index + 1])) { + if (char === '\\' && !escapeChar && expChars.length > index + 1 + && (EscapeCharsInUtterance.includes(expChars[index + 1]) || expChars[index + 1] === '\\')) { escapeChar = true; } else if (char === '{' && !escapeChar) { let newEntity = {entityName : '', role : '', entityValue : undefined, parent : curEntity}; diff --git a/packages/lu/src/parser/luis/luConverter.js b/packages/lu/src/parser/luis/luConverter.js index db228dd2f..175fe523d 100644 --- a/packages/lu/src/parser/luis/luConverter.js +++ b/packages/lu/src/parser/luis/luConverter.js @@ -1,6 +1,8 @@ const NEWLINE = require('os').EOL; const helperClasses = require('./../lufile/classes/hclasses') const EntityTypeEnum = require('./../utils/enums/luisEntityTypes'); +const EscapeCharsInUtterance = require('./../utils/enums/escapechars').EscapeCharsInUtterance; +const helpers = require('./../utils/helpers'); /** * Parses a Luis object into Lu Content @@ -89,7 +91,7 @@ const parseUtterancesToLu = function(utterances, luisJSON){ if(luisJSON.test === true && utterance.predictedResult !== undefined){ fileContent += parsePredictedResultToLu(utterance, luisJSON) } - if(utterance.entities.length >= 0) { + if(utterance.entities.length > 0) { // update utterance for each entity let text = utterance.text; // flatten entities @@ -99,10 +101,25 @@ const parseUtterancesToLu = function(utterances, luisJSON){ // remove all children sortedEntitiesList.forEach(entity => delete entity.children); let tokenizedText = text.split(''); + tokenizedText.forEach(function (token, index) { + tokenizedText[index] = EscapeCharsInUtterance.includes(token) ? `\\${token}` : token; + }); // handle cases where we have both child as well as cases where more than one entity can have the same start position // if there are multiple entities in the same start position, then order them by composite, nDepth, regular entity getEntitiesByPositionList(sortedEntitiesList, tokenizedText); updatedText = tokenizedText.join(''); + } else { + // will not add escape char for pattern utterances since brackets are strictly used in pattern + // so there are no exceptions that need to be handled in pattern + if (helpers.isUtterancePattern(utterance)) { + updatedText = utterance.text; + } else { + let tokenizedText = utterance.text.split(''); + tokenizedText.forEach(function (token, index) { + tokenizedText[index] = EscapeCharsInUtterance.includes(token) ? `\\${token}` : token; + }); + updatedText = tokenizedText.join(''); + } } // remove duplicated whitespaces between words inside utterance to make sure they are aligned with the luis portal @@ -138,7 +155,16 @@ const updateTokenizedTextByEntity = function(tokenizedText, entity) { } else { tokenizedText[parseInt(entity.startPos)] = `{@${entity.entity}=${tokenizedText[parseInt(entity.startPos)]}`; } - tokenizedText[parseInt(entity.endPos)] = tokenizedText[parseInt(entity.endPos)] + '}'; + + // check blackslash before entity definition + // blackslash before { or } will be reconized to escape { or } + // to avoid such escape, add another blackslash before blackslash + if (parseInt(entity.startPos) > 0 && tokenizedText[parseInt(entity.startPos) - 1] === '\\') { + tokenizedText[parseInt(entity.startPos) - 1] += '\\' + } + + tokenizedText[parseInt(entity.endPos)] = tokenizedText[parseInt(entity.endPos)] === '\\' ? + tokenizedText[parseInt(entity.endPos)] + '\\}' : tokenizedText[parseInt(entity.endPos)] + '}'; } const parsePredictedResultToLu = function(utterance, luisJSON){ diff --git a/packages/lu/src/parser/utils/enums/escapechars.js b/packages/lu/src/parser/utils/enums/escapechars.js index 0dc5aa636..88e3e3024 100644 --- a/packages/lu/src/parser/utils/enums/escapechars.js +++ b/packages/lu/src/parser/utils/enums/escapechars.js @@ -4,5 +4,5 @@ */ // Escape chars in utterance module.exports = { - EscapeCharsInUtterance: ['{', '}', '\\'] + EscapeCharsInUtterance: ['{', '}'] }; \ No newline at end of file From 952595a0877beca52310ef3299594115d87ad2d7 Mon Sep 17 00:00:00 2001 From: feich-ms Date: Mon, 8 Feb 2021 22:29:15 +0800 Subject: [PATCH 2/2] add test cases --- packages/lu/src/parser/lufile/visitor.js | 4 +- .../lu/test/commands/luis/convert.test.js | 8 + .../escapeCharactersInUtterances.json | 452 ++++++++++++++++++ .../verified/escapeCharactersInUtterances.lu | 66 +++ 4 files changed, 528 insertions(+), 2 deletions(-) create mode 100644 packages/lu/test/fixtures/verified/escapeCharactersInUtterances.json create mode 100644 packages/lu/test/fixtures/verified/escapeCharactersInUtterances.lu diff --git a/packages/lu/src/parser/lufile/visitor.js b/packages/lu/src/parser/lufile/visitor.js index 05e6d9daf..c90264671 100644 --- a/packages/lu/src/parser/lufile/visitor.js +++ b/packages/lu/src/parser/lufile/visitor.js @@ -12,7 +12,7 @@ class Visitor { let utterance = ''; let entities = []; let errorMsgs = []; - for (const node of ctx.children) { + for (const [index, node] of ctx.children.entries()) { const innerNode = node; switch (innerNode.symbol.type) { case lp.DASH: break; @@ -23,7 +23,7 @@ class Visitor { } case lp.ESCAPE_CHARACTER: { let escapeCharacters = innerNode.getText(); - let escapedUtterace = escapeCharacters.length > 1 && EscapeCharsInUtterance.includes(escapeCharacters[1]) ? escapeCharacters.slice(1) : escapeCharacters; + let escapedUtterace = escapeCharacters.length > 1 && (EscapeCharsInUtterance.includes(escapeCharacters[1]) || (escapeCharacters[1] === '\\' && index + 1 < ctx.children.length && ctx.children[index + 1].symbol.type === lp.EXPRESSION)) ? escapeCharacters.slice(1) : escapeCharacters; utterance = utterance.concat(escapedUtterace); break; } diff --git a/packages/lu/test/commands/luis/convert.test.js b/packages/lu/test/commands/luis/convert.test.js index f01ab020e..3ba150852 100644 --- a/packages/lu/test/commands/luis/convert.test.js +++ b/packages/lu/test/commands/luis/convert.test.js @@ -62,6 +62,14 @@ describe('luis:convert', () => { await assertToLu('./../../fixtures/verified/nDepthEntityInUtterance.json', './../../fixtures/verified/nDepthEntityInUtterance.lu') }) + it('luis:convert successfully reconstructs a markdown file from a LUIS input file (with escape characters in utterances)', async () => { + await assertToLu('./../../fixtures/verified/escapeCharactersInUtterances.json', './../../fixtures/verified/escapeCharactersInUtterances.lu') + }) + + it('luis:convert Utterances with escape characters correctly', async () => { + await assertToJSON('./../../fixtures/verified/escapeCharactersInUtterances.lu', './../../fixtures/verified/escapeCharactersInUtterances.json') + }) + it('luis:convert Simple intent and utterances are parsed correctly', async () => { await assertToJSON('./../../fixtures/examples/1.lu', './../../fixtures/verified/1.json', '1') }) diff --git a/packages/lu/test/fixtures/verified/escapeCharactersInUtterances.json b/packages/lu/test/fixtures/verified/escapeCharactersInUtterances.json new file mode 100644 index 000000000..f76c05b51 --- /dev/null +++ b/packages/lu/test/fixtures/verified/escapeCharactersInUtterances.json @@ -0,0 +1,452 @@ +{ + "intents": [ + { + "name": "Test" + } + ], + "entities": [ + { + "name": "BoldAction", + "roles": [] + }, + { + "name": "Action", + "roles": [] + }, + { + "name": "ActionTargetPhrase", + "roles": [] + }, + { + "name": "Command", + "roles": [] + }, + { + "name": "ActionTargetStart", + "roles": [] + }, + { + "name": "ActionTargetSeparator", + "roles": [] + }, + { + "name": "ActionTargetEnd", + "roles": [] + }, + { + "name": "ActionTargetRange", + "roles": [] + }, + { + "name": "AllCapsAction", + "roles": [] + }, + { + "name": "DeleteAction", + "roles": [] + }, + { + "name": "ItalicsAction", + "roles": [] + }, + { + "name": "RemoveFormattingAction", + "roles": [] + }, + { + "name": "StrikeThroughAction", + "roles": [] + }, + { + "name": "RemoveSuperscriptAction", + "roles": [] + } + ], + "composites": [], + "closedLists": [], + "regex_entities": [], + "model_features": [], + "regex_features": [], + "utterances": [ + { + "text": "bold {directions from seattle to portland},", + "intent": "Test", + "entities": [ + { + "entity": "BoldAction", + "startPos": 0, + "endPos": 3 + }, + { + "entity": "Action", + "startPos": 0, + "endPos": 3 + }, + { + "entity": "ActionTargetPhrase", + "startPos": 6, + "endPos": 40 + }, + { + "entity": "Command", + "startPos": 0, + "endPos": 40 + } + ] + }, + { + "text": "emphasise from \"{\"\"result\"\":{<num_leds < bnxt_max_led) {", + "intent": "Test", + "entities": [ + { + "entity": "ItalicsAction", + "startPos": 0, + "endPos": 8 + }, + { + "entity": "Action", + "startPos": 0, + "endPos": 8 + }, + { + "entity": "ActionTargetPhrase", + "startPos": 10, + "endPos": 38 + }, + { + "entity": "Command", + "startPos": 0, + "endPos": 38 + } + ] + }, + { + "text": "clear formatting from \"\"\"notification\"\": {\"", + "intent": "Test", + "entities": [ + { + "entity": "RemoveFormattingAction", + "startPos": 0, + "endPos": 20 + }, + { + "entity": "Action", + "startPos": 0, + "endPos": 20 + }, + { + "entity": "ActionTargetPhrase", + "startPos": 25, + "endPos": 36 + }, + { + "entity": "Command", + "startPos": 0, + "endPos": 36 + } + ] + }, + { + "text": "strikethrough from \"{(\"\"customuri\"\", through ),\"", + "intent": "Test", + "entities": [ + { + "entity": "StrikeThroughAction", + "startPos": 0, + "endPos": 17 + }, + { + "entity": "Action", + "startPos": 0, + "endPos": 17 + }, + { + "entity": "ActionTargetStart", + "startPos": 24, + "endPos": 32 + }, + { + "entity": "ActionTargetSeparator", + "startPos": 37, + "endPos": 43 + }, + { + "entity": "ActionTargetEnd", + "startPos": 45, + "endPos": 51 + }, + { + "entity": "ActionTargetRange", + "startPos": 24, + "endPos": 51 + }, + { + "entity": "Command", + "startPos": 0, + "endPos": 51 + } + ] + }, + { + "text": "remove superscript on \"\"\"from\"\": {\"", + "intent": "Test", + "entities": [ + { + "entity": "RemoveSuperscriptAction", + "startPos": 0, + "endPos": 20 + }, + { + "entity": "Action", + "startPos": 0, + "endPos": 20 + }, + { + "entity": "ActionTargetPhrase", + "startPos": 25, + "endPos": 28 + }, + { + "entity": "Command", + "startPos": 0, + "endPos": 28 + } + ] + } + ], + "patterns": [], + "patternAnyEntities": [], + "prebuiltEntities": [], + "luis_schema_version": "3.2.0", + "versionId": "0.1", + "name": "", + "desc": "", + "culture": "en-us" + } \ No newline at end of file diff --git a/packages/lu/test/fixtures/verified/escapeCharactersInUtterances.lu b/packages/lu/test/fixtures/verified/escapeCharactersInUtterances.lu new file mode 100644 index 000000000..6b57cb834 --- /dev/null +++ b/packages/lu/test/fixtures/verified/escapeCharactersInUtterances.lu @@ -0,0 +1,66 @@ + +> LUIS application information +> !# @app.versionId = 0.1 +> !# @app.culture = en-us +> !# @app.luis_schema_version = 3.2.0 + + +> # Intent definitions + +# Test +- {@Command={@Action={@BoldAction=bold}} \{{@ActionTargetPhrase=directions from seattle to portland}}\}, +- {@Command={@Action={@BoldAction=emphasise from}} "\{""{@ActionTargetRange={@ActionTargetStart=result"":\{<num_leds < bnxt_max_led}}) \{ +- {@Command={@Action={@RemoveFormattingAction=clear formatting from}} """{@ActionTargetPhrase=notification}}"": \{" +- {@Command={@Action={@StrikeThroughAction=strikethrough from}} "\{(""{@ActionTargetRange={@ActionTargetStart=customuri}"", {@ActionTargetSeparator=through} {@ActionTargetEnd=}}})," +- {@Command={@Action={@RemoveSuperscriptAction=remove superscript on}} """{@ActionTargetPhrase=from}}"": \{" + + +> # Entity definitions + +@ ml BoldAction + +@ ml Action + +@ ml ActionTargetPhrase + +@ ml Command + +@ ml ActionTargetStart + +@ ml ActionTargetSeparator + +@ ml ActionTargetEnd + +@ ml ActionTargetRange + +@ ml AllCapsAction + +@ ml DeleteAction + +@ ml ItalicsAction + +@ ml RemoveFormattingAction + +@ ml StrikeThroughAction + +@ ml RemoveSuperscriptAction + + +> # PREBUILT Entity definitions + + +> # Phrase list definitions + + +> # List entities + +> # RegEx entities + +