From a19cd63c7e0becda6ccdfe0b5315d17feea1cb8f Mon Sep 17 00:00:00 2001 From: Yuhao Zhang Date: Mon, 18 Feb 2019 23:49:50 -0800 Subject: [PATCH] Fix tests --- .travis.yml | 2 +- tests/test_protobuf.py | 2 +- tests/test_read.py | 145 ----------------------------------------- 3 files changed, 2 insertions(+), 147 deletions(-) delete mode 100644 tests/test_read.py diff --git a/.travis.yml b/.travis.yml index 53c31c5..001706e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,4 +6,4 @@ notifications: install: - pip install --quiet -e . script: - - pytest tests/test_read.py + - python -m pytest diff --git a/tests/test_protobuf.py b/tests/test_protobuf.py index ca7d5da..d86096c 100644 --- a/tests/test_protobuf.py +++ b/tests/test_protobuf.py @@ -14,7 +14,7 @@ from stanfordnlp.protobuf import parseFromDelimitedString, writeToDelimitedString, to_text -# Thext that was annotated +# Text that was annotated TEXT = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.\n" diff --git a/tests/test_read.py b/tests/test_read.py deleted file mode 100644 index ca7d5da..0000000 --- a/tests/test_read.py +++ /dev/null @@ -1,145 +0,0 @@ -""" -Tests to read a stored protobuf. -Also serves as an example of how to parse sentences, tokens, pos, lemma, -ner, dependencies and mentions. - -The test corresponds to annotations for the following sentence: - Chris wrote a simple sentence that he parsed with Stanford CoreNLP. -""" - -import os -from pytest import fixture -from stanfordnlp.protobuf import Document, Sentence, Token, DependencyGraph,\ - CorefChain -from stanfordnlp.protobuf import parseFromDelimitedString, writeToDelimitedString, to_text - - -# Thext that was annotated -TEXT = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP.\n" - - -@fixture -def doc_pb(): - test_dir = os.path.dirname(os.path.abspath(__file__)) - test_data = os.path.join(test_dir, 'data', 'test.dat') - with open(test_data, 'rb') as f: - buf = f.read() - doc = Document() - parseFromDelimitedString(doc, buf) - return doc - -def test_parse_protobuf(doc_pb): - assert doc_pb.ByteSize() == 4239 - -def test_write_protobuf(doc_pb): - stream = writeToDelimitedString(doc_pb) - buf = stream.getvalue() - stream.close() - - doc_pb_ = Document() - parseFromDelimitedString(doc_pb_, buf) - assert doc_pb == doc_pb_ - -def test_document_text(doc_pb): - assert doc_pb.text == TEXT - - -def test_sentences(doc_pb): - assert len(doc_pb.sentence) == 1 - - sentence = doc_pb.sentence[0] - assert isinstance(sentence, Sentence) - # check sentence length - assert sentence.characterOffsetEnd - sentence.characterOffsetBegin == 67 - # Note that the sentence text should actually be recovered from the tokens. - assert sentence.text == '' - assert to_text(sentence) == TEXT[:-1] - - -def test_tokens(doc_pb): - sentence = doc_pb.sentence[0] - tokens = sentence.token - assert len(tokens) == 12 - assert isinstance(tokens[0], Token) - - # Word - words = "Chris wrote a simple sentence that he parsed with Stanford CoreNLP .".split() - words_ = [t.word for t in tokens] - assert words_ == words - - # Lemma - lemmas = "Chris write a simple sentence that he parse with Stanford CoreNLP .".split() - lemmas_ = [t.lemma for t in tokens] - assert lemmas_ == lemmas - - # POS - pos = "NNP VBD DT JJ NN IN PRP VBD IN NNP NNP .".split() - pos_ = [t.pos for t in tokens] - assert pos_ == pos - - # NER - ner = "PERSON O O O O O O O O ORGANIZATION O O".split() - ner_ = [t.ner for t in tokens] - assert ner_ == ner - - # character offsets - begin = [int(i) for i in "0 6 12 14 21 30 35 38 45 50 59 66".split()] - end = [int(i) for i in "5 11 13 20 29 34 37 44 49 58 66 67".split()] - begin_ = [t.beginChar for t in tokens] - end_ = [t.endChar for t in tokens] - assert begin_ == begin - assert end_ == end - - -def test_dependency_parse(doc_pb): - """ - Extract the dependency parse from the annotation. - """ - sentence = doc_pb.sentence[0] - - # You can choose from the following types of dependencies. - # In general, you'll want enhancedPlusPlus - assert sentence.basicDependencies.ByteSize() > 0 - assert sentence.enhancedDependencies.ByteSize() > 0 - assert sentence.enhancedPlusPlusDependencies.ByteSize() > 0 - - tree = sentence.enhancedPlusPlusDependencies - isinstance(tree, DependencyGraph) - # Indices are 1-indexd with 0 being the "pseudo root" - assert tree.root # 'wrote' is the root. == [2] - # There are as many nodes as there are tokens. - assert len(tree.node) == len(sentence.token) - - # Enhanced++ depdencies often contain additional edges and are - # not trees -- here, 'parsed' would also have an edge to - # 'sentence' - assert len(tree.edge) == 12 - - # This edge goes from "wrote" to "Chirs" - edge = tree.edge[0] - assert edge.source == 2 - assert edge.target == 1 - assert edge.dep == "nsubj" - - -def test_coref_chain(doc_pb): - """ - Extract the corefence chains from the annotation. - """ - # Coreference chains span sentences and are stored in the - # document. - chains = doc_pb.corefChain - - # In this document there is 1 chain with Chris and he. - assert len(chains) == 1 - chain = chains[0] - assert isinstance(chain, CorefChain) - assert chain.mention[0].beginIndex == 0 # 'Chris' - assert chain.mention[0].endIndex == 1 - assert chain.mention[0].gender == "MALE" - - assert chain.mention[1].beginIndex == 6 # 'he' - assert chain.mention[1].endIndex == 7 - assert chain.mention[1].gender == "MALE" - - assert chain.representative == 0 # Head of the chain is 'Chris'