/
OpenIEITest.java
200 lines (178 loc) · 8.43 KB
/
OpenIEITest.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
package edu.stanford.nlp.naturalli;
import edu.stanford.nlp.ie.util.RelationTriple;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.StringUtils;
import org.junit.Ignore;
import org.junit.Test;
import java.util.*;
import static org.junit.Assert.*;
/**
* Test the natural logic OpenIE extractor at {@link edu.stanford.nlp.naturalli.OpenIE}.
*
* @author Gabor Angeli
*/
public class OpenIEITest {
protected static StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties() {{
setProperty("annotators", "tokenize,ssplit,pos,lemma,depparse,natlog,openie");
setProperty("openie.splitter.threshold", "0.25");
setProperty("openie.ignoreaffinity", "false");
setProperty("openie.max_entailments_per_clause", "1000");
setProperty("openie.triple.strict", "true");
// setProperty("openie.splitter.model", "/home/gabor/tmp/clauseSearcher.ser.gz");
setProperty("ssplit.isOneSentence", "true");
setProperty("tokenize.class", "PTBTokenizer");
setProperty("tokenize.language", "en");
setProperty("enforceRequirements", "true");
}});
public CoreMap annotate(String text) {
Annotation ann = new Annotation(text);
pipeline.annotate(ann);
return ann.get(CoreAnnotations.SentencesAnnotation.class).get(0);
}
public void assertExtracted(String expected, String text) {
boolean found = false;
Collection<RelationTriple> extractions = annotate(text).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
for (RelationTriple extraction : extractions) {
if (extraction.toString().equals("1.0\t" + expected)) {
found = true;
}
}
assertTrue("The extraction '" + expected + "' was not found in '" + text + "'", found);
}
public void assertExtracted(Set<String> expectedSet, String text) {
Collection<RelationTriple> extractions = annotate(text).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
String actual = StringUtils.join(
extractions.stream().map(x -> x.toString().substring(x.toString().indexOf("\t") + 1).toLowerCase()).sorted(),
"\n");
String expected = StringUtils.join(expectedSet.stream().map(String::toLowerCase).sorted(), "\n");
assertEquals(expected, actual);
}
public void assertEntailed(String expected, String text) {
boolean found = false;
Collection<SentenceFragment> extractions = annotate(text).get(NaturalLogicAnnotations.EntailedSentencesAnnotation.class);
for (SentenceFragment extraction : extractions) {
if (extraction.toString().equals(expected)) {
found = true;
}
}
assertTrue("The sentence '" + expected + "' was not entailed from '" + text + "'", found);
}
@Test
public void testAnnotatorRuns() {
annotate("all cats have tails");
}
@Test
public void testBasicEntailments() {
assertEntailed("some cats have tails", "some blue cats have tails");
assertEntailed("blue cats have tails", "some blue cats have tails");
assertEntailed("cats have tails", "some blue cats have tails");
}
@Test
public void testBasicExtractions() {
assertExtracted("cats\thave\ttails", "some cats have tails");
}
@Test
public void testExtractionsGeorgeBoyd() {
assertExtracted(new HashSet<String>() {{
add("George Boyd\tjoined on\t21 february 2013");
add("George Boyd\tjoined for\tremainder");
add("George Boyd\tjoined for\tremainder of season");
add("George Boyd\tjoined on\tloan");
add("George Boyd\tjoined on\tloan from peterborough united");
}}, "On 21 February 2013 George Boyd joined on loan from Peterborough United for the remainder of the season.");
}
@Test
public void testExtractionsObamaWikiOne() {
assertExtracted(new HashSet<String>() {{
add("Barack Hussein Obama II\tis 44th and current President of\tUnited States");
add("Barack Hussein Obama II\tis 44th President of\tUnited States");
// add("Barack Hussein Obama II\tis current President of\tUnited States");
add("Barack Hussein Obama II\tis President of\tUnited States");
add("Barack Hussein Obama II\tis\tPresident");
// add("Barack Hussein Obama II\tis\tcurrent President");
add("Barack Hussein Obama II\tis\t44th President");
}}, "Barack Hussein Obama II is the 44th and current President of the United States, and the first African American to hold the office.");
}
@Test
@Ignore // TODO(gabor) dependency parse error.
public void testExtractionsObamaWikiTwo() {
assertExtracted(new HashSet<String>() {{
add("Obama\tis graduate of\tColumbia University");
// add("Obama\tis graduate of\tHarvard Law School");
add("Obama\tborn in\tHawaii");
add("Obama\tborn in\tHonolulu");
add("he\tserved as\tpresident of Harvard Law Review");
add("he\tserved as\tpresident");
add("he\tpresident of\tHarvard Law Review");
add("Obama\tis\tgraduate");
}}, "Born in Honolulu, Hawaii, Obama is a graduate of Columbia University and Harvard Law School, where he served as president of the Harvard Law Review.");
}
@Test
public void testExtractionsObamaWikiThree() {
assertExtracted(new HashSet<String>() {{
add("He\twas\tcommunity organizer in Chicago");
add("He\twas\tcommunity organizer");
// add("He\tearning\tlaw degree");
add("He\tearning\this law degree");
}}, "He was a community organizer in Chicago before earning his law degree.");
}
@Test
public void testExtractionsObamaWikiFour() {
assertExtracted(new HashSet<String>() {{
add("He\tworked as\tcivil rights attorney");
add("He\tworked as\trights attorney");
add("He\ttaught\tconstitutional law");
add("He\ttaught\tlaw");
add("He\ttaught law at\tUniversity of Chicago Law School");
// add("He\ttaught law at\tUniversity of Chicago Law School from 1992");
add("He\ttaught law at\tUniversity");
add("He\ttaught law from\t1992 to 2004"); // shouldn't be here, but sometimes appears?
}}, "He worked as a civil rights attorney and taught constitutional law at the University of Chicago Law School from 1992 to 2004.");
}
@Test
public void testExtractionsObamaWikiFive() {
assertExtracted(new HashSet<String>() {{
add("He\tserved\tthree terms");
// note[gabor] Should get these
// add("He\trepresenting\t13th District in Illinois Senate");
// add("He\trepresenting\t13th District");
// add("He\trepresenting\tDistrict in Illinois Senate");
// add("He\trepresenting\tDistrict");
// add("He\trunning unsuccessfully for\tUnited States House of Representatives in 2000");
add("He\trunning unsuccessfully for\tUnited States House of Representatives");
add("He\trunning unsuccessfully for\tUnited States House");
// add("He\trunning for\tUnited States House of Representatives in 2000");
add("He\trunning for\tUnited States House of Representatives");
add("He\trunning for\tUnited States House");
add("He\trunning in\t2000");
add("He\trunning unsuccessfully in\t2000");
}}, "He served three terms representing the 13th District in the Illinois Senate from 1997 to 2004, running unsuccessfully for the United States House of Representatives in 2000.");
}
@Test
public void testExtractionsObamaWikiSix() {
assertExtracted(new HashSet<String>() {{
add("He\tdefeated\tRepublican nominee John McCain");
add("He\tdefeated\tnominee John McCain");
add("He\tdefeated nominee John McCain in\telection");
add("He\tdefeated nominee John McCain in\tgeneral election");
add("He\twas inaugurated as\tpresident on January 20 2009");
add("He\twas inaugurated as\tpresident");
add("He\twas\tinaugurated");
// note[gabor] these are wrong!
// add("nominee john mccain\twas\tinaugurated");
// add("nominee john mccain\twas inaugurated as\tpresident");
// add("nominee john mccain\twas inaugurated as\tpresident on january 20 2009");
// add("Republican nominee John McCain\twas\tinaugurated");
// add("Republican nominee John McCain\twas inaugurated as\tpresident");
// add("Republican nominee John McCain\twas inaugurated as\tpresident on january 20 2009");
// note[gabor] end wrong extractions
}}, "He then defeated Republican nominee John McCain in the general election, and was inaugurated as president on January 20, 2009.");
}
@Test
public void dummyTest() {
assertTrue(true);
}
}