Skip to content

Commit

Permalink
Merge pull request #852 from stanford-oval/wip/fix-dataset-parser
Browse files Browse the repository at this point in the history
Fix dialogue ID of parsed dialogues if the previous dialogue is empty
  • Loading branch information
gcampax committed Dec 13, 2021
2 parents e65af78 + 910ec76 commit 9e162a6
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 8 deletions.
21 changes: 14 additions & 7 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ branches:
- "/^dependabot/"
language: node_js
node_js: '12'
dist: xenial
python: '3.8'
dist: bionic
env:
global:
- COVERALLS_PARALLEL=true
Expand Down Expand Up @@ -54,7 +55,8 @@ jobs:
-
name: "Translation Tests"
install:
- pyenv global 3.7.2
- pyenv global 3.8.1
- virtualenv --py $(pyenv which python3) venv && . ./venv/bin/activate
- ./travis/install-starter-deps.sh
- pip3 install polib
- travis_retry npm ci
Expand All @@ -63,38 +65,43 @@ jobs:
-
name: "Starter Code Tests (Basic)"
install:
- pyenv global 3.7.2
- pyenv global 3.8.1
- virtualenv --py $(pyenv which python3) venv && . ./venv/bin/activate
- ./travis/install-starter-deps.sh
- travis_retry npm ci
script: npx nyc ./test/basic-starter.sh
after_success: npm run coverage
-
name: "Starter Code Tests (Schema.org)"
install:
- pyenv global 3.7.2
- pyenv global 3.8.1
- virtualenv --py $(pyenv which python3) venv && . ./venv/bin/activate
- ./travis/install-starter-deps.sh
- travis_retry npm ci
script: npx nyc ./test/schemaorg-starter.sh
after_success: npm run coverage
- name: "Starter Code Tests (Wikidata)"
install:
- pyenv global 3.7.2
- pyenv global 3.8.1
- virtualenv --py $(pyenv which python3) venv && . ./venv/bin/activate
- ./travis/install-starter-deps.sh
- travis_retry npm ci
script: npx nyc ./test/wikidata-starter.sh
after_success: npm run coverage
-
name: "Starter Code Tests (MultiWOZ)"
install:
- pyenv global 3.7.2
- pyenv global 3.8.1
- virtualenv --py $(pyenv which python3) venv && . ./venv/bin/activate
- ./travis/install-starter-deps.sh
- travis_retry npm ci
script: npx nyc ./test/multiwoz-starter.sh
after_success: npm run coverage
-
name: "Starter Code Tests (Custom Skills)"
install:
- pyenv global 3.7.2
- pyenv global 3.8.1
- virtualenv --py $(pyenv which python3) venv && . ./venv/bin/activate
- ./travis/install-starter-deps.sh
- travis_retry npm ci
script: npx nyc ./test/custom-starter.sh
Expand Down
4 changes: 4 additions & 0 deletions lib/dataset-tools/parsers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,10 @@ class DialogueParser extends Stream.Transform {
// ignore `====` at the beginning or at the end
// or consecutive appearances of `====`
// this simplifies concatenating datasets

// but reset the ID, otherwise the next dialogue will have the wrong
// ID
this._id = undefined;
callback();
return;
}
Expand Down
1 change: 1 addition & 0 deletions test/unit/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ do_test([
('./test_new_tokenizer_en'),
('./test_new_tokenizer_it'),
('./test_new_tokenizer_zh'),
('./test_parsers'),
('./test_pos_nfa'),
('./test_postprocess_nlg_en'),
('./test_priority_queue'),
Expand Down
133 changes: 133 additions & 0 deletions test/unit/test_parsers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// -*- mode: js; indent-tabs-mode: nil; js-basic-offset: 4 -*-
//
// This file is part of Genie
//
// Copyright 2021 The Board of Trustees of the Leland Stanford Junior University
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: Giovanni Campagna <gcampagn@cs.stanford.edu>

import assert from 'assert';

import { DialogueParser } from '../../lib/dataset-tools/parsers';

const DIALOGUE_PARSER_TEST_CASES = [
[`
====
====
# foo
U: hello
UT: $dialogue @org.thingpedia.dialogue.transaction.greet;
====
`, [{
id: 'foo',
turns: [{
user: 'hello',
user_target: '$dialogue @org.thingpedia.dialogue.transaction.greet;',
agent: '',
agent_target: '',
context: '',
intermediate_context: '',
comment: '',
}]
}]
],

[`
====
====
# foo
U: hello
UT: $dialogue @org.thingpedia.dialogue.transaction.greet;
C: $dialogue @org.thingpedia.dialogue.transaction.greet;
A: hello! how can i help you?
AT: $dialogue @org.thingpedia.dialogue.transaction.sys_greet;
U: thank you
UT: $dialogue @org.thingpedia.dialogue.transaction.cancel;
====
`, [{
id: 'foo',
turns: [{
user: 'hello',
user_target: '$dialogue @org.thingpedia.dialogue.transaction.greet;',
agent: '',
agent_target: '',
context: '',
intermediate_context: '',
comment: '',
}, {
user: 'thank you',
user_target: '$dialogue @org.thingpedia.dialogue.transaction.cancel;',
agent: 'hello! how can i help you?',
agent_target: '$dialogue @org.thingpedia.dialogue.transaction.sys_greet;',
context: '$dialogue @org.thingpedia.dialogue.transaction.greet;',
intermediate_context: '',
comment: '',
}]
}]
],

[`
====
# foo
====
# bar
U: hello
UT: $dialogue @org.thingpedia.dialogue.transaction.greet;
====
`, [{
id: 'bar',
turns: [{
user: 'hello',
user_target: '$dialogue @org.thingpedia.dialogue.transaction.greet;',
agent: '',
agent_target: '',
context: '',
intermediate_context: '',
comment: '',
}]
}]
]
];

async function dialogueParserTest(testId) {
console.log(`# Dialogue Parser Test Case ${testId+1}`);
const [input, expected] = DIALOGUE_PARSER_TEST_CASES[testId];

const parser = new DialogueParser();
for (const line of input.split('\n'))
parser.write(line);
parser.end();

let i = 0;
for await (const dlg of parser) {
assert(i < expected.length, `too many dialogues generated`);

assert.strictEqual(dlg.id, expected[i].id);
assert.strictEqual(dlg.length, expected[i].turns.length);
for (let j = 0; j < dlg.length; j++)
assert.deepStrictEqual(dlg[j], expected[i].turns[j]);

i++;
}
assert.strictEqual(i, expected.length, `not enough dialogues generated`);
}

async function main() {
for (let i = 0; i < DIALOGUE_PARSER_TEST_CASES.length; i++)
await dialogueParserTest(i);
}
export default main;
if (!module.parent)
main();
2 changes: 1 addition & 1 deletion travis/install-starter-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ srcdir=`dirname $0`/..
srcdir=`realpath $srcdir`

pip3 install --upgrade pip
which genienlp >/dev/null 2>&1 || pip3 install --user 'genienlp==v0.7.0a3'
which genienlp >/dev/null 2>&1 || pip3 install 'genienlp==v0.7.0a3'
which genienlp

mkdir -p $srcdir/test/embeddings

0 comments on commit 9e162a6

Please sign in to comment.