-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
EnglishGrammaticalRelations.java
1805 lines (1638 loc) · 93.3 KB
/
EnglishGrammaticalRelations.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Stanford Dependencies - Code for producing and using Stanford dependencies.
// Copyright © 2005-2014,2019 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see http://www.gnu.org/licenses/ .
//
// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 2A
// Stanford CA 94305-9020
// USA
// parser-support@lists.stanford.edu
// http://nlp.stanford.edu/software/stanford-dependencies.html
package edu.stanford.nlp.trees;
import static edu.stanford.nlp.trees.EnglishPatterns.*;
import edu.stanford.nlp.international.Language;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;
import edu.stanford.nlp.util.Generics;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import static edu.stanford.nlp.trees.GrammaticalRelation.*;
/**
* {@code EnglishGrammaticalRelations} is a
* set of {@link GrammaticalRelation} objects for the English language.
* These relations are commonly called Stanford Dependencies (SD).
*
* Grammatical relations can either be shown in their basic form, where each
* input token receives a relation, or "collapsed" which does certain normalizations
* which group words or turns them into relations. See
* {@link EnglishGrammaticalStructure}. What is presented here mainly
* shows the basic form, though there is some mixture. The "collapsed" grammatical
* relations primarily differ as follows:
* <ul>
* <li>Some multiword conjunctions and prepositions are treated as single
* words, and then processed as below.</li>
* <li>Prepositions do not appear as words but are turned into new "prep" or "prepc"
* grammatical relations, one for each preposition.</li>
* <li>Conjunctions do not appear as words but are turned into new "conj"
* grammatical relations, one for each conjunction.</li>
* <li>The possessive "'s" is deleted, leaving just the relation between the
* possessor and possessum.</li>
* <li>Agents of passive sentences are recognized and marked as agent and not as prep_by.</li>
* </ul>
* <br>
* This set of English grammatical relations is not intended to be
* exhaustive or immutable. It's just where we're at now.
* <br>
* <br>
* See {@link GrammaticalRelation} for details of fields and matching.
* <br>
* <br>
* If using LexicalizedParser, it should be run with the
* {@code -retainTmpSubcategories} option and one of the
* {@code -splitTMP} options (e.g., {@code -splitTMP 1}) in order to
* get the temporal NP dependencies maximally right!
* <br>
* <i>Implementation notes: </i> Don't change the set of GRs without discussing it
* with people first. If a change is needed, to add a new grammatical relation:
* <ul>
* <li> Governor nodes of the grammatical relations should be the lowest ones.</li>
* <li> Check the semantic head rules in SemanticHeadFinder and
* ModCollinsHeadFinder, both in the trees package. That's what will be used to
* match here.</li>
* <li> Create and define the GrammaticalRelation similarly to the others.</li>
* <li> Add it to the {@code values} array at the end of the file.</li>
* </ul>
* The patterns in this code assume that an NP may be followed by either a
* -ADV or -TMP functional tag but there are no other functional tags represented.
* This corresponds to what we currently get from NPTmpRetainingTreeNormalizer or
* DependencyTreeTransformer.
*
* @author Bill MacCartney
* @author Marie-Catherine de Marneffe
* @author Christopher Manning
* @author Galen Andrew (refactoring English-specific stuff)
* @see GrammaticalStructure
* @see GrammaticalRelation
* @see EnglishGrammaticalStructure
*/
public class EnglishGrammaticalRelations {
//todo: Things still to fix: comparatives, in order to clauses, automatic Vadas-like NP structure
/** This class is just a holder for static classes
* that act a bit like an enum.
*/
private EnglishGrammaticalRelations() {}
// By setting the HeadFinder to null, we find out right away at
// runtime if we have incorrectly set the HeadFinder for the
// dependency tregexes
private static final TregexPatternCompiler tregexCompiler = new TregexPatternCompiler((HeadFinder) null);
/**
* The "predicate" grammatical relation. The predicate of a
* clause is the main VP of that clause; the predicate of a
* subject is the predicate of the clause to which the subject
* belongs.<p>
* <br>
* Example: <br>
* "Reagan died" → {@code pred}(Reagan, died)
*/
public static final GrammaticalRelation PREDICATE =
new GrammaticalRelation(Language.English, "pred", "predicate",
DEPENDENT, "S|SINV", tregexCompiler,
"S|SINV <# VP=target");
/**
* The "auxiliary" grammatical relation. An auxiliary of a clause is a
* non-main verb of the clause.<p>
* <br>
* Example: <br>
* "Reagan has died" → {@code aux}(died, has)
*/
public static final GrammaticalRelation AUX_MODIFIER =
new GrammaticalRelation(Language.English, "aux", "auxiliary",
DEPENDENT, "VP|SQ|SINV|CONJP", tregexCompiler,
"VP < VP < (/^(?:TO|MD|VB.*|AUXG?|POS)$/=target)",
"SQ|SINV < (/^(?:VB|MD|AUX)/=target $++ /^(?:VP|ADJP)/)",
"CONJP < TO=target < VB", // (CONJP not to mention)
// add handling of tricky VP fronting cases...
"SINV < (VP=target < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ") $-- (VP < VBG))");
/**
* The "passive auxiliary" grammatical relation. A passive auxiliary of a
* clause is a
* non-main verb of the clause which contains the passive information.
* <br>
* Example: <br>
* "Kennedy has been killed" → {@code auxpass}(killed, been)
*/
public static final GrammaticalRelation AUX_PASSIVE_MODIFIER =
new GrammaticalRelation(Language.English, "auxpass", "passive auxiliary",
AUX_MODIFIER, "VP|SQ|SINV", tregexCompiler,
"VP < (/^(?:VB|AUX|POS)/=target < " + passiveAuxWordRegex + " ) < (VP|ADJP [ < VBN|VBD | < (VP|ADJP < VBN|VBD) < CC ] )",
"SQ|SINV < (/^(?:VB|AUX|POS)/=target < " + beAuxiliaryRegex + " $++ (VP < VBD|VBN))",
// add handling of tricky VP fronting cases...
"SINV < (VP=target < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ") $-- (VP < VBD|VBN))",
"SINV < (VP=target < (VP < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ")) $-- (VP < VBD|VBN))");
/**
* The "copula" grammatical relation. A copula is the relation between
* the complement of a copular verb and the copular verb.<p>
* <br>
* Examples: <br>
* "Bill is big" → {@code cop}(big, is) <br>
* "Bill is an honest man" → {@code cop}(man, is)
*/
public static final GrammaticalRelation COPULA =
new GrammaticalRelation(Language.English, "cop", "copula",
AUX_MODIFIER, "VP|SQ|SINV|SBARQ", tregexCompiler,
"VP < (/^(?:VB|AUX)/=target < " + copularWordRegex + " [ $++ (/^(?:ADJP|NP$|WHNP$)/ !< (VBN|VBD !$++ /^N/)) | $++ (S <: (ADJP < JJ)) ] )",
"SQ|SINV < (/^(?:VB|AUX)/=target < " + copularWordRegex + " [ $++ (ADJP !< VBN|VBD) | $++ (NP $++ NP) | $++ (S <: (ADJP < JJ)) ] )",
// matches (what, is) in "what is that" after the SQ has been flattened out of the tree
"SBARQ < (/^(?:VB|AUX)/=target < " + copularWordRegex + ") < (WHNP < WP)",
// "Such a great idea this was"
"SINV <# (NP $++ (NP $++ (VP=target < (/^(?:VB|AUX)/ < " + copularWordRegex + "))))");
/**
* The "conjunct" grammatical relation. A conjunct is the relation between
* two elements connected by a conjunction word. We treat conjunctions
* asymmetrically: The head of the relation is the first conjunct and other
* conjunctions depend on it via the <i>conj</i> relation.<p>
* <br>
* Example: <br>
* "Bill is big and honest" → {@code conj}(big, honest)
* <br>
* <i>Note:</i>Modified in 2010 to exclude the case of a CC/CONJP first in its phrase: it has to conjoin things.
*/
public static final GrammaticalRelation CONJUNCT =
new GrammaticalRelation(Language.English, "conj", "conjunct",
DEPENDENT, "VP|(?:WH)?NP(?:-TMP|-ADV)?|ADJP|PP|QP|ADVP|UCP(?:-TMP|-ADV)?|S|NX|SBAR|SBARQ|SINV|SQ|JJP|NML|RRC", tregexCompiler,
"VP|S|SBAR|SBARQ|SINV|SQ|RRC < (CC|CONJP $-- !/^(?:``|-LRB-|PRN|PP|ADVP|RB)/ $+ !/^(?:SBAR|PRN|``|''|-[LR]RB-|,|:|\\.)$/=target)",
// This case is separated out from the previous case to
// avoid conflicts with advcl when you have phrases such as
// "but only because ..."
"SBAR < (CC|CONJP $-- @SBAR $+ @SBAR=target)",
// non-parenthetical or comma in suitable phrase with conj then adverb to left
"VP|S|SBAR|SBARQ|SINV|SQ|RRC < (CC|CONJP $-- !/^(?:``|-LRB-|PRN|PP|ADVP|RB)/ $+ (ADVP $+ !/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/=target))",
// content phrase to the right of a comma or a parenthetical
// The test at the end is to make sure that a conjunction or
// comma etc actually show up between the target of the conj
// dependency and the head of the phrase. Otherwise, a
// different relationship is probably more appropriate.
// Note that this test looks for one of two things: a
// cc/conjp which does not have a , between it and the
// target or a , which does not appear to the right of a
// cc/conjp. This test eliminates things such as
// parentheticals which come after a list, such as in the
// sentence "to see the market go down and dump everything,
// which ..." where "go down and dump everything, which..."
// is all in one VP node.
"VP|S|SBAR|SBARQ|SINV|SQ=root < (CC|CONJP $-- !/^(?:``|-LRB-|PRN|PP|ADVP|RB)/) < (/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/ $+ (/^S|SINV$|^(?:A|N|V|PP|PRP|J|W|R)/=target [$-- (CC|CONJP $-- (__ ># =root) !$++ (/^:|,$/ $++ =target)) | $-- (/^:|,$/ $-- (__ ># =root) [!$-- /^CC|CONJP$/ | $++ (=target < (/^,$/ $++ (__ ># =target)))])] ) )",
// non-parenthetical or comma in suitable phrase with conjunction to left
"/^(?:ADJP|JJP|PP|QP|(?:WH)?NP(?:-TMP|-ADV)?|ADVP|UCP(?:-TMP|-ADV)?|NX|NML)$/ [ < (CC|CONJP $-- !/^(?:``|-LRB-|PRN)$/ $+ !/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/=target) | < " + ETC_PAT_target + " | < " + FW_ETC_PAT_target + "]",
// non-parenthetical or comma in suitable phrase with conj then adverb to left
"/^(?:ADJP|PP|(?:WH)?NP(?:-TMP|-ADV)?|ADVP|UCP(?:-TMP|-ADV)?|NX|NML)$/ < (CC|CONJP $-- !/^(?:``|-LRB-|PRN)$/ $+ (ADVP $+ !/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/=target))",
// content phrase to the right of a comma or a parenthetical
"/^(?:ADJP|PP|(?:WH)?NP(?:-TMP|-ADV)?|ADVP|UCP(?:-TMP|-ADV)?|NX|NML)$/ [ < (CC|CONJP $-- !/^(?:``|-LRB-|PRN)$/) | < " + ETC_PAT + " | < " + FW_ETC_PAT + "] < (/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/ [ $+ /^S|SINV$|^(?:A|N|V|PP|PRP|J|W|R)/=target | $+ " + ETC_PAT_target + " ] )",
// content phrase to the left of a comma for at least NX
"NX|NML [ < (CC|CONJP $- __) | < " + ETC_PAT + "] < (/^,$/ $- /^(?:A|N|V|PP|PRP|J|W|R|S)/=target)",
// to take the conjunct in a preconjunct structure "either X or Y"
// also catches some missing examples of etc as conj
"/^(?:VP|S|SBAR|SBARQ|SINV|ADJP|PP|QP|(?:WH)?NP(?:-TMP|-ADV)?|ADVP|UCP(?:-TMP|-ADV)?|NX|NML)$/ [ < (CC $++ (CC|CONJP $+ !/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/=target)) | <- " + ETC_PAT_target + " | <- " + FW_ETC_PAT_target + " ]");
/**
* The "coordination" grammatical relation. A coordination is the relation
* between an element and a conjunction.
* <br>
* Example: <br>
* "Bill is big and honest." → {@code cc}(big, and)
*/
public static final GrammaticalRelation COORDINATION =
new GrammaticalRelation(Language.English, "cc", "coordination",
DEPENDENT, ".*", tregexCompiler,
"__ [ < (CC=target !< /^(?i:either|neither|both)$/ ) | < (CONJP=target !< (RB < /^(?i:not)$/ $+ (RB|JJ < /^(?i:only|just|merely)$/))) ]");
/**
* The "punctuation" grammatical relation. This is used for any piece of
* punctuation in a clause, if punctuation is being retained in the
* typed dependencies.
* <br>
* Example: <br>
* "Go home!" → {@code punct}(Go, !)
* <br>
* The condition for NFP to appear hear is that it does not match the emoticon patterns under discourse.
*/
public static final GrammaticalRelation PUNCTUATION =
new GrammaticalRelation(Language.English, "punct", "punctuation",
DEPENDENT, ".*", tregexCompiler,
"__ < /^(?:\\.|:|,|''|``|\\*|-LRB-|-RRB-|HYPH)$/=target",
"__ < (NFP=target !< " + WESTERN_SMILEY + " !< " + ASIAN_SMILEY + ")");
/**
* The "argument" grammatical relation. An argument of a VP is a
* subject or complement of that VP; an argument of a clause is
* an argument of the VP which is the predicate of that
* clause.<p>
* <br>
* Example: <br>
* "Clinton defeated Dole" → {@code arg}(defeated, Clinton), {@code arg}(defeated, Dole)
*/
public static final GrammaticalRelation ARGUMENT =
new GrammaticalRelation(Language.English, "arg", "argument", DEPENDENT);
/**
* The "subject" grammatical relation. The subject of a VP is
* the noun or clause that performs or experiences the VP; the
* subject of a clause is the subject of the VP which is the
* predicate of that clause.<p>
* <br>
* Examples: <br>
* "Clinton defeated Dole" → {@code subj}(defeated, Clinton) <br>
* "What she said is untrue" → {@code subj}(is, What she said)
*/
public static final GrammaticalRelation SUBJECT =
new GrammaticalRelation(Language.English, "subj", "subject", ARGUMENT);
/**
* The "nominal subject" grammatical relation. A nominal subject is
* a subject which is an noun phrase.
*
* Example: <br>
* "Clinton defeated Dole" → {@code nsubj}(defeated, Clinton)
*/
public static final GrammaticalRelation NOMINAL_SUBJECT =
new GrammaticalRelation(Language.English, "nsubj", "nominal subject",
SUBJECT, "S|SQ|SBARQ|SINV|SBAR|PRN", tregexCompiler,
"S=subj < ((NP|WHNP=target !< EX !<# (/^NN/ < (" + timeWordRegex + "))) $++ VP=verb) : (=subj !> VP | !<< (=verb < TO))",
"S < ( NP=target <# (/^NN/ < " + timeWordRegex + ") !$++ NP $++VP)",
"SQ|PRN < (NP=target !< EX $++ VP)",
"SQ < (NP=target !< EX $- (/^(?:VB|AUX)/ < " + copularWordRegex + ") !$++ VP)",
// Allows us to match "Does it?" without matching "Who does it?"
"SQ < (NP=target !< EX $- /^(?:VB|AUX)/ !$++ VP) !$-- NP|WHNP",
"SQ < ((NP=target !< EX) $- (RB $- /^(?:VB|AUX)/) ![$++ VP])",
"SBARQ < WHNP=target < (SQ < (VP !$-- NP))",
// This will capture incorrectly parsed trees in sentences
// such as "What disease causes cancer" without capturing
// correctly parsed trees such as "What do elephants eat?"
"SBARQ < WHNP=target < (SQ < ((/^(?:VB)/ !< " + copularWordRegex + ") !$-- NP !$++ VP))",
"SBARQ < (SQ=target < (/^(?:VB|AUX)/ < " + copularWordRegex + ") !< VP)",
// matches subj in SINV
"SINV < (NP|WHNP=target [ $- VP|VBZ|VBD|VBP|VB|MD|AUX | $- (@RB|ADVP $- VP|VBZ|VBD|VBP|VB|MD|AUX) | !$- __ !$ @NP] )",
// Another SINV subj, such as "Such a great idea this was"
"SINV < (NP $++ (NP=target $++ (VP < (/^(?:VB|AUX)/ < " + copularWordRegex + "))))",
//matches subj in xcomp like "He considered him a friend"
"S < (NP=target $+ NP|ADJP) > VP",
// matches subj in relative clauses
"SBAR < WHNP=target [ < (S < (VP !$-- NP) !< SBAR) | < (VP !$-- NP) !< S ]", // second disjunct matches errors where there is no S under SBAR and otherwise does no harm
// matches subj in relative clauses
"SBAR !< WHNP < (S !< (NP $++ VP)) > (VP > (S $- WHNP=target))",
// matches subj in existential "there" SQ
"SQ < ((NP < EX) $++ NP=target)",
// matches subj in existential "there" S
"S < (NP < EX) <+(VP) (VP < NP=target)",
// matches (what, that) in "what is that" after the SQ has been flattened out of the tree
"SBARQ < (/^(?:VB|AUX)/ < " + copularWordRegex + ") < (WHNP < WP) < NP=target",
// matches (what, wrong) in "what is wrong with ..." after the SQ has been flattened out of the tree
// note that in that case "wrong" is taken as the head thanks to SemanticHeadFinder hackery
// The !$++ matches against (what, worth) in What is UAL stock worth?
"SBARQ < (WHNP=target $++ ((/^(?:VB|AUX)/ < " + copularWordRegex + ") $++ ADJP=adj !$++ (NP $++ =adj)))",
// the (NP < EX) matches (is, WHNP) in "what dignity is there in ..."
// the PP matches (is, WHNP) in "what is on the test"
"SBARQ <1 WHNP=target < (SQ < (/^(?:VB|AUX)/ < " + copularWordRegex + ") [< (NP < EX) | < PP])");
/**
* The "nominal passive subject" grammatical relation. A nominal passive
* subject is a subject of a passive which is an noun phrase.
*
* Example: <br>
* "Dole was defeated by Clinton" → {@code nsubjpass}(defeated, Dole)
* <p>
* This pattern recognizes basic (non-coordinated) examples. The coordinated
* examples are currently handled by correctDependencies() in
* EnglishGrammaticalStructure. This seemed more accurate than any tregex
* expression we could come up with.
*/
public static final GrammaticalRelation NOMINAL_PASSIVE_SUBJECT =
new GrammaticalRelation(Language.English, "nsubjpass", "nominal passive subject",
NOMINAL_SUBJECT, "S|SQ", tregexCompiler,
"S|SQ < (WHNP|NP=target !< EX) < (VP < (/^(?:VB|AUX)/ < " + passiveAuxWordRegex + ") < (VP < VBN|VBD))");
/**
* The "clausal subject" grammatical relation. A clausal subject is
* a subject which is a clause.<p>
* <br>
* Examples: (subject is "what she said" in both examples) <br>
* "What she said makes sense" → {@code csubj}(makes, said) <br>
* "What she said is untrue" → {@code csubj}(untrue, said)
*/
public static final GrammaticalRelation CLAUSAL_SUBJECT =
new GrammaticalRelation(Language.English, "csubj", "clausal subject",
SUBJECT, "S", tregexCompiler,
"S < (SBAR|S=target !$+ /^,$/ $++ (VP !$-- NP))");
/**
* The "clausal passive subject" grammatical relation. A clausal passive subject is
* a subject of a passive verb which is a clause.<p>
* <br>
* Example: (subject is "that she lied") <br>
* "That she lied was suspected by everyone" → {@code csubjpass}(suspected, lied)
*/
public static final GrammaticalRelation CLAUSAL_PASSIVE_SUBJECT =
new GrammaticalRelation(Language.English, "csubjpass", "clausal passive subject",
CLAUSAL_SUBJECT, "S", tregexCompiler,
"S < (SBAR|S=target !$+ /^,$/ $++ (VP < (VP < VBN|VBD) < (/^(?:VB|AUXG?)/ < " + passiveAuxWordRegex + ") !$-- NP))",
"S < (SBAR|S=target !$+ /^,$/ $++ (VP <+(VP) (VP < VBN|VBD > (VP < (/^(?:VB|AUX)/ < " + passiveAuxWordRegex + "))) !$-- NP))");
/**
* The "complement" grammatical relation. A complement of a VP
* is any object (direct or indirect) of that VP, or a clause or
* adjectival phrase which functions like an object; a complement
* of a clause is an complement of the VP which is the predicate
* of that clause.<p>
* <br>
* Examples: <br>
* "She gave me a raise" →
* {@code comp}(gave, me),
* {@code comp}(gave, a raise) <br>
* "I like to swim" →
* {@code comp}(like, to swim)
*/
public static final GrammaticalRelation COMPLEMENT =
new GrammaticalRelation(Language.English, "comp", "complement", ARGUMENT);
/**
* The "object" grammatical relation. An object of a VP
* is any direct object or indirect object of that VP; an object
* of a clause is an object of the VP which is the predicate
* of that clause.<p>
* <br>
* Examples: <br>
* "She gave me a raise" →
* {@code obj}(gave, me),
* {@code obj}(gave, raise)
*/
public static final GrammaticalRelation OBJECT =
new GrammaticalRelation(Language.English, "obj", "object", COMPLEMENT);
/**
* The "direct object" grammatical relation. The direct object
* of a verb is the noun phrase which is the (accusative) object of
* the verb; the direct object of a clause or VP is the direct object of
* the head predicate of that clause.
*
* Example: <br>
* "She gave me a raise" →
* {@code dobj}(gave, raise) <br>
* Note that dobj can also be assigned by the conversion of rel in the postprocessing.
*/
public static final GrammaticalRelation DIRECT_OBJECT =
new GrammaticalRelation(Language.English, "dobj", "direct object",
OBJECT, "VP|SQ|SBARQ?", tregexCompiler,
"VP !< (/^(?:VB|AUX)/ [ < " + copularWordRegex + " | < " + clausalComplementRegex + " ]) < (NP|WHNP=target [ [ !<# (/^NN/ < " + timeWordRegex + ") !$+ NP ] | $+ NP-TMP | $+ (NP <# (/^NN/ < " + timeWordRegex + ")) ] ) " +
// The next qualification eliminates parentheticals that
// come after the actual dobj
" <# (__ !$++ (NP $++ (/^[:]$/ $++ =target))) ",
// Examples such as "Rolls-Royce expects sales to remain steady"
"VP < (S < (NP|WHNP=target $++ (VP < TO)))",
// This matches rare cases of misparses, such as "What
// disease causes cancer?" where the "causes" does not get a
// surrounding VP. Hopefully it does so without overlapping
// any other dependencies.
"SQ < (/^(?:VB)/=verb !< " + copularWordRegex + ") $-- WHNP !< VP !< (/^(?:VB)/ ! == =verb) < (NP|WHNP=target [ [ !<# (/^NN/ < " + timeWordRegex + ") !$+ NP ] | $+ NP-TMP | $+ (NP <# (/^NN/ < " + timeWordRegex + ")) ] )",
// The rule for Wh-questions
// cdm Jul 2010: No longer require WHNP as first child of SBARQ below: often not because of adverbials, quotes, etc., and removing restriction does no harm
// this next pattern used to assume no empty NPs. Corrected.
// One could require the VP at the end of the <+ to also be !< (/^(?:VB|AUX)/ $. SBAR) . This would be right for complement SBAR, but often avoids good matches for adverbial SBAR. Adding it kills 4 good matches for avoiding 2 wrong matches on sum of TB3-train and EWT
"SBARQ < (WHNP=target !< WRB !<# (/^NN/ < " + timeWordRegex + ")) <+(SQ|SINV|S|VP) (VP !< NP|TO !< (S < (VP < TO)) !< (/^(?:VB|AUX)/ < " + copularWordRegex + " $++ (VP < VBN|VBD)) !< (PP <: IN|TO) $-- (NP !< /^-NONE-$/))",
// matches direct object in relative clauses with relative pronoun "I saw the book that you bought". Seems okay. If this is changed, also change the pattern for "rel"
// TODO: this can occasionally produce incorrect dependencies, such as the sentence
// "with the way which his split-fingered fastball is behaving"
// eg take a tree where the verb doesn't have an object
"SBAR < (WHNP=target !< WRB) < (S < NP < (VP !< SBAR !<+(VP) (PP <- IN|TO) !< (S < (VP < TO))))",
// // matches direct object for long dependencies in relative clause without explicit relative pronouns
// "SBAR !< (WHPP|WHNP|WHADVP) < (S < (@NP $++ (VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + " !$+ VP) !<+(VP) (/^(?:VB|AUX)/ < " + copularWordRegex + " $+ (VP < VBN|VBD)) !<+(VP) NP !< SBAR !<+(VP) (PP <- IN|TO)))) !$-- CC $-- NP > NP=target " +
// // avoid conflicts with rcmod. TODO: we could look for
// // empty nodes in this kind of structure and use that to
// // find dobj, tmod, advmod, etc. won't help the parser,
// // of course, but will help when converting a treebank
// // which contains empties
// // Example: "with the way his split-fingered fastball is behaving"
// "!($-- @NP|WHNP|NML > @NP|WHNP <: (S !< (VP < TO)))",
// If there was an NP between the WHNP and the ADJP, we want
// that NP to have the nsubj relation, and the WHNP is either
// a dobj or a pobj instead. For example, dobj(What, worth)
// in "What is UAL stock worth?"
"SBARQ < (WHNP=target $++ ((/^(?:VB|AUX)/ < " + copularWordRegex + ") $++ (ADJP=adj !< (PP !< NP)) $++ (NP $++ =adj)))"
// Now allow $++ in main pattern above so don't need this.
// "SBAR !< (WHPP|WHNP|WHADVP) < (S < (@NP $+ (ADVP $+ (VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + " !$+ VP) !<+(VP) (/^(?:VB|AUX)/ < " + copularWordRegex + " $+ (VP < VBN|VBD)) !<+(VP) NP !< SBAR !<+(VP) (PP <- IN|TO))))) !$-- CC $-- NP > NP=target"
// Excluding BE doesn't allow cases of NP-PRD followed by NP-TMP or NP-LOC like "These are Europeans next door."
// Doc said: case with an iobj before dobj as two regular NPs. (This won't match if second one is explicitly NP-TMP.) But basic case covers this case. Does nothing.
// "VP < (NP $+ (NP|WHNP=target !< (/^NN/ < " + timeWordLotRegex + "))) !<(/^(?:VB|AUX)/ < " + copularWordRegex + ")", // this time one also included "lot"
// Doc said: match "give it next week". CDM 2013: I think this was put in to handle parse errors where the 2 NPs of a ditransitive were grouped into 1. But it is in principle wrong, and including it seems to be a no-op on TB3 WSJ. So exclude for now.
// "VP < (NP < (NP $+ (/^(NP|WHNP)$/=target !< (/^NN/ < " + timeWordLotRegex + "))))!< (/^(?:VB|AUX)/ < " + copularWordRegex + ")", // this time one also included "lot"
// Doc said: matches direct object in relative clauses "I saw the book that you said you bought". But it didn't seem to determine anything.
// This was various attempts at handling a long distance dependency, but that doesn't work; now handled through rel mechanism.
// "SBAR !< WHNP|WHADVP < (S < (@NP $++ (VP !$++ NP))) > (VP > (S < NP $- WHNP=target))",
// "SBAR !< WHNP|WHADVP|IN < (S < @NP < (VP !< (NP !<<# " + timeWordRegex + "))) > (VP > (S < NP $- WHNP=target))",
// "S < (@NP !< /^-NONE-$/) <+(VP) (VP !< (@NP !< /^-NONE-$/ < (/^VB/ !< " + copularWordRegex + ")) !< CONJP|CC|SBAR) > (@SBAR !< @WHNP|WHADVP $- /^VB/ >+(VP|S|SBAR) (S < (@NP !< /^-NONE-$/ !<<# " + timeWordRegex + ") $- (@WHNP=target !< /^-NONE-$/ !<# WRB)))",
// we now don't match "VBG > PP $+ NP=target", since it seems better to CM to regard these quasi preposition uses (like "including soya") as prepositions rather than verbs with objects -- that's certainly what the phrase structure at least suggests in the PTB. They're now matched as pobj
);
/**
* The "indirect object" grammatical relation. The indirect
* object of a VP is the noun phrase which is the (dative) object
* of the verb; the indirect object of a clause is the indirect
* object of the VP which is the predicate of that clause.
* <br>
* Example: <br>
* "She gave me a raise" →
* {@code iobj}(gave, me)
*/
public static final GrammaticalRelation INDIRECT_OBJECT =
new GrammaticalRelation(Language.English, "iobj", "indirect object",
OBJECT, "VP", tregexCompiler,
"VP < (NP=target !< /\\$/ !<# (/^NN/ < " + timeWordRegex + ") $+ (NP !<# (/^NN/ < " + timeWordRegex + ")))",
// this next one was meant to fix common mistakes of our parser, but is perhaps too dangerous to keep
// excluding selfRegex leaves out phrases such as "I cooked dinner myself"
// excluding DT leaves out phrases such as "My dog ate it all""
"VP < (NP=target < (NP !< /\\$/ $++ (NP !<: (PRP < " + selfRegex + ") !<: DT !< (/^NN/ < " + timeWordLotRegex + ")) !$ CC|CONJP !$ /^,$/ !$++ /^:$/))");
/**
* The "prepositional object" grammatical relation. The object of a
* preposition is the head of a noun phrase following the preposition, or
* the adverbs "here" and "there".
* (The preposition in turn may be modifying a noun, verb, etc.)
* We here define cases of VBG quasi-prepositions like "including",
* "concerning", etc. as instances of pobj (unlike the Penn Treebank).
* <br>
* Example: <br>
* "I sat on the chair" →
* {@code pobj}(on, chair)
* <br>
* (The preposition can be called a FW for pace, versus, etc. It can also
* be called a CC - but we don't currently handle that and would need to
* distinguish from conjoined PPs. Jan 2010 update: We now insist that the
* NP must follow the preposition. This prevents a preceding NP measure
* phrase being matched as a pobj. We do allow a preposition tagged RB
* followed by an NP pobj, as happens in the Penn Treebank for adverbial uses
* of PP like "up 19%")
*/
public static final GrammaticalRelation PREPOSITIONAL_OBJECT =
new GrammaticalRelation(Language.English, "pobj", "prepositional object",
OBJECT, "SBARQ|PP(?:-TMP)?|WHPP|PRT|ADVP|WHADVP|XS", tregexCompiler,
"/^(?:PP(?:-TMP)?|(?:WH)?(?:PP|ADVP))$/ < (SYM|IN|VBG|VBN|TO|FW|RB|RBR $++ (/^(?:WH)?(?:NP|ADJP)(?:-TMP|-ADV)?$/=target !$- @NP) !< /^(?i:not)$/)",
// We allow ADVP with NP objects for cases like (ADVP earlier this year)
"/^PP(?:-TMP)?$/ < (/^(?:IN|VBG|VBN|TO)$/ $+ (ADVP=target [ < (RB < /^(?i:here|there)$/) | < (ADVP < /^NP(?:-TMP)?$/) ] ))",
// second disjunct is weird ADVP, only matches 1 tree in 2-21
// to deal with preposition stranding in questions (e.g., "Which city do you live in?") -- the preposition is sometimes treated as a particle by the parser (works well but doesn't preserve the tree structure!)
"PRT >- (VP !< (S < (VP < TO)) >+(SQ|SINV|S|VP) (SBARQ <, (WHNP=target !< WRB)) $-- (NP !< /^-NONE-$/))",
"(PP <: IN|TO) >- (VP !< (S < (VP < TO)) >+(SQ|SINV|S|VP) (SBARQ <, (WHNP=target !< WRB)) $-- (NP !< /^-NONE-$/))",
"(PP <: IN|TO) $- (NP $-- (VBZ|VBD) !$++ VP) >+(SQ) (SBARQ <, (WHNP=target !< WRB)) $-- (NP !< /^-NONE-$/)",
"XS|ADVP < (IN < /^(?i:at)$/) < JJS|DT=target", // at least, at most, at best, at worst, at all
//"PP < (CC < less) < NP",
"@PP < CC < @NP=target !< @IN|TO|VBG|VBN|RB|RP|PP", // for cases where "preposition" like "plus", "but", or "versus"
// to handle "in and out of government"
"@WHPP|PP < (@WHPP|PP $++ (CC|CONJP $++ (@WHPP|PP $+ (NP=target !$+ __))))",
// to handle "What weapon is Apollo most proficient with?"
"SBARQ < (WHNP=target $++ ((/^(?:VB|AUX)/ < " + copularWordRegex + ") $++ (ADJP=adj < (PP !< NP)) $++ (NP $++ =adj)))");
/**
* The "prepositional complement" grammatical relation.
* This is used when the complement of a preposition is a clause or
* an adverbial or prepositional phrase.
* The prepositional complement of
* a preposition is the head of the sentence following the preposition,
* or the preposition head of the PP.
* <br>
* Examples: <br>
* "We have no useful information on whether users are at risk" &arr;
* {@code pcomp}(on, are) <br>
* "They heard about you missing classes." &arr;
* {@code pcomp}(about, missing) <br>
* It is warmer in Greece than in Italy &arr;
* {@code pcomp}(than, in)
*/
public static final GrammaticalRelation PREPOSITIONAL_COMPLEMENT =
new GrammaticalRelation(Language.English, "pcomp", "prepositional complement",
COMPLEMENT, "(?:WH)?PP(?:-TMP)?", tregexCompiler,
"@PP|WHPP < (IN|VBG|VBN|TO $+ @SBAR|S|PP|ADVP=target)", // no intervening NP; VBN is for "compared with"
"@PP|WHPP < (RB $+ @SBAR|S=target)", // RB is for weird tagging like "after/RB adjusting for inflation"
"@PP|WHPP !< IN|TO < (SBAR=target <, (IN $+ S))");
// /**
// * The "attributive" grammatical relation. The attributive is the complement of a
// * verb such as "to be, to seem, to appear".
// * <p>
// * These mainly occur in questions. Arguably they shouldn't and we should treat the question
// * WHNP and WHADJP as predicates (as we do for ADJP and NP complements (NP-PRD and ADJP-PRD),
// * but we at present don't produce this.
// */
// public static final GrammaticalRelation ATTRIBUTIVE =
// new GrammaticalRelation(Language.English, "attr", "attributive",
// COMPLEMENT, "VP|SBARQ|SQ", tregexCompiler,
// new String[] {
// "VP < NP=target <(/^(?:VB|AUX)/ < " + copularWordRegex + ") !$ (NP < EX)",
// // "What is that?"
// "SBARQ < (WHNP|WHADJP=target $+ (SQ < (/^(?:VB|AUX)/ < " + copularWordRegex + " !$++ VP) !< (VP <- (PP <:IN)) !<- (PP <: IN)))",
// "SBARQ < (WHNP|WHADJP=target !< WRB) <+(SQ|SINV|S|VP) (VP !< (S < (VP < TO)) < (/^(?:VB|AUX)/ < " + copularWordRegex + " $++ (VP < VBN|VBD)) !<- PRT !<- (PP <: IN) $-- (NP !< /^-NONE-$/))",
// // "Is he the man?"
// "SQ <, (/^(?:VB|AUX)/ < " + copularWordRegex + ") < (NP=target $-- (NP !< EX))"
// });
/**
* The "clausal complement" grammatical relation. A clausal complement of
* a verb or adjective is a dependent clause with an internal subject which
* functions like an object of the verb, or adjective. Clausal complements
* for nouns are limited to complement clauses with a subset of nouns
* like "fact" or "report". We analyze them the same (parallel to the
* analysis of this class as "content clauses" in Huddleston and Pullum 2002).
* Clausal complements are usually finite (though there
* are occasional exceptions including remnant English subjunctives, and we
* also classify the complement of causative "have" (She had him arrested)
* in this category.<p>
* <br>
* Example: <br>
* "He says that you like to swim" →
* {@code ccomp}(says, like) <br>
* "I am certain that he did it" →
* {@code ccomp}(certain, did) <br>
* "I admire the fact that you are honest" →
* {@code ccomp}(fact, honest)
*/
public static final GrammaticalRelation CLAUSAL_COMPLEMENT =
new GrammaticalRelation(Language.English, "ccomp", "clausal complement",
COMPLEMENT, "VP|SINV|S|ADJP|ADVP|NP(?:-.*)?", tregexCompiler,
// Weird case of verbs with direct S complement that is not an infinitive or participle
// ("I saw [him take the cake].", "making [him go crazy]")
"VP < (S=target < (VP !<, TO|VBG|VBN) !$-- NP)",
// the canonical case of a SBAR[that] with an overt "that" or "whether"
"VP < (SBAR=target < (S <+(S) VP) <, (IN|DT < /^(?i:that|whether)$/))",
// Conjoined SBAR otherwise in the canonical case
"VP < (SBAR=target < (SBAR < (S <+(S) VP) <, (IN|DT < /^(?i:that|whether)$/)) < CC|CONJP)",
// This finds most ccomp SBAR[that] with omission of that, but only ones without dobj
"VP < (SBAR=target < (S < VP) !$-- NP !<, (IN|WHADVP) !<2 (IN|WHADVP $- ADVP|RB))",
// Find ccomp SBAR[that] after dobj for clear marker verbs
"VP < (/^V/ < " + ccompObjVerbRegex + ") < (SBAR=target < (S < VP) $-- NP !<, (IN|WHADVP) !<2 (IN|WHADVP $- ADVP|RB))",
"VP < (SBAR=target < (S < VP) !$-- NP <, (WHADVP < (WRB < /^(?i:how)$/)))",
"VP < @SBARQ=target", // Direct question: She asked "Who is in trouble"
"VP < (/^VB/ < " + haveRegex + ") < (S=target < @NP < VP)",
// !$-- @SBAR|S handles cases where the answer to the question
// "What do they ccompVerb?"
// is already answered by a different node
// the ccompObjVerbRegex/NP test distinguishes "He told me why ..."
// vs "They know my order when ..."
"VP < (@SBAR=target !$-- @SBAR|S !$-- /^:$/ [ == @SBAR=sbar | <# @SBAR=sbar ] ) < (/^V/ < " + ccompVerbRegex + ") [ < (/^V/ < " + ccompObjVerbRegex + ") | < (=target !$-- NP) ] : (=sbar < (WHADVP|WHNP < (WRB !< /^(?i:how)$/) !$-- /^(?!RB|ADVP).*$/) !< (S < (VP < TO)))",
// to find "...", he said or "...?" he asked.
// We eliminate conflicts with conj by looking for CC
// Matching against "!< (VP < TO|VBG|VBN)" matches against vmod
// "!< (VP <1 (VP [ <1 VBG|VBN | <2 (VBG|VBN $-- ADVP) ])))" also matches against vmod
"@S|SINV < (@S|SBARQ=target $+ /^(,|\\.|'')$/ !$- /^(?:CC|CONJP|:)$/ !$- (/^(?:,)$/ $- CC|CONJP) !< (VP < TO|VBG|VBN) !< (VP <1 (VP [ <1 VBG|VBN | <2 (VBG|VBN $-- ADVP) ]))) !< (@S !== =target $++ =target !$++ @CC|CONJP)",
// ADVP is things like "As long as they spend ..."
// < WHNP captures phrases such as "no matter what", "no matter how", etc
"ADVP < (SBAR=target [ < WHNP | ( < (IN < /^(?i:as|that)/) < (S < (VP !< TO))) ])",
"ADJP < (SBAR=target !< (IN < as) < S)", // ADJP is things like "sure (that) he'll lose" or for/to ones or object of comparison with than "than we were led to expect"; Leave aside as in "as clever as we thought.
// That ... he know
"S <, (SBAR=target <, (IN < /^(?i:that|whether)$/) !$+ VP)",
// JJ catches a couple of funny NPs with heads like "enough"
// Note that we eliminate SBAR which also match an vmod pattern
"@NP < JJ|NN|NNS < (SBAR=target [ !<(S < (VP < TO )) | !$-- NP|NN|NNP|NNS ] )",
// New ones to pick up some more "say" patterns (2019); avoid S-ADV descendants
"VP < (/^V/ < " + sayVerbRegex + ") < (S|S-CLF|S-TTL|SQ=target <+(S) (VP < /^VB[DZP]$/))",
"@S < /^S-TPC/=target < VP"
);
/**
* An open clausal complement (<i>xcomp</i>) of a VP or an ADJP is a clausal
* complement without its own subject, whose reference is determined by an
* external subject. These complements are always non-finite.
* The name <i>xcomp</i> is borrowed from Lexical-Functional Grammar.
* (Mainly "TO-clause" are recognized, but also some VBG like "stop eating")
* <br>
* <br>
* Examples: <br>
* "I like to swim" →
* {@code xcomp}(like, swim) <br>
* "I am ready to leave" →
* {@code xcomp}(ready, leave)
*/
public static final GrammaticalRelation XCLAUSAL_COMPLEMENT =
new GrammaticalRelation(Language.English, "xcomp", "xclausal complement",
COMPLEMENT, "VP|ADJP|SINV", tregexCompiler,
"VP < (S=target [ !$-- NP | $-- (/^V/ < " + xcompVerbRegex + ") ] !$- (NN < order) < (VP < TO))", // used to have !> (VP < (VB|AUX < be))
"ADJP < (S=target <, (VP <, TO))",
"VP < (S=target !$- (NN < order) < (NP $+ NP|ADJP))",
// to find "help sustain ...
"VP <# (/^(?:VB|AUX)/ $+ (VP=target < VB|VBG))",
"VP < (SBAR=target < (S !$- (NN < order) < (VP < TO))) !> (VP < (VB|AUX < be)) ",
"VP < (S=target !$- (NN < order) <: NP) > VP",
"VP < (/^VB/ $+ (@S=target < (@ADJP < /^JJ/ ! $-- @NP|S))) $-- (/^VB/ < " + copularWordRegex + " )",
// stop eating
// note that we eliminate parentheticals and clauses that could match a vmod
// the clause !$-- VBG eliminates matches such as "What are you wearing dancing tonight"
"(VP < (S=target < (VP < VBG ) !< NP !$- (/^,$/ [$- @NP|VP | $- (@PP $-- @NP ) |$- (@ADVP $-- @NP)]) !$-- /^:$/ !$-- VBG))",
// Detects xcomp(becoming, requirement) in "Hand-holding is becoming an investment banking job requirement"
// Also, xcomp(becoming, problem) in "Why is Dave becoming a problem?"
"(VP $-- (/^(?:VB|AUX)/ < " + copularWordRegex + ") < (/^VB/ < " + clausalComplementRegex + ") < NP=target)",
"VP < (/^(?:VB|AUX)/ < " + clausalComplementRegex + ") < (NP|WHNP=target [ [ !<# (/^NN/ < " + timeWordRegex + ") !$+ NP ] | $+ NP-TMP | $+ (NP <# (/^NN/ < " + timeWordRegex + ")) ] ) " +
// The next qualification eliminates parentheticals that
// come after the actual dobj
" <# (__ !$++ (NP $++ (/^[:]$/ $++ =target))) ",
// The old attr relation, used here to recover xcomp relations instead.
"VP=vp < NP=target <(/^(?:VB|AUX)/ < " + copularWordRegex + " >># =vp) !$ (NP < EX)",
// "Such a great idea this was" if "was" is the root, eg -makeCopulaHead
"SINV <# (VP < (/^(?:VB|AUX)/ < " + copularWordRegex + ") $-- (NP $-- NP=target))",
// For new treebank xcomp changes, match V + NP + xcomp patterns
"VP < (/^V/ < " + xcompVerbRegex + ") < NP < (S=target < (VP < TO))"
);
/**
* The RELATIVE grammatical relation is only here as a temporary
* relation. This tregex triggering indicates either a dobj or a
* pobj should be here. We figure this out in a post-processing
* step by looking at the surrounding dependencies.
*/
public static final GrammaticalRelation RELATIVE =
new GrammaticalRelation(Language.English, "rel", "relative",
COMPLEMENT, "SBAR|SBARQ", tregexCompiler,
"SBAR < (WHNP=target !< WRB) < (S < NP < (VP [ < SBAR | <+(VP) (PP <- IN|TO) | < (S < (VP < TO)) ] ))",
// Rule for copular Wh-questions, e.g. "What am I good at?"
"SBARQ < (WHNP=target !< WRB !<# (/^NN/ < " + timeWordRegex + ")) <+(SQ|SINV) (/^(?:VB|AUX)/ < " + copularWordRegex + " !$++ VP)");
/**
* The "referent" grammatical relation. A
* referent of the Wh-word of a NP is the relative word introducing the relative clause modifying the NP.
* <br>
* Example: <br>
* "I saw the book which you bought" →
* {@code ref}(book, which) <br>
* "I saw the book the cover of which you designed" →
* {@code ref}(book, which)
*/
public static final GrammaticalRelation REFERENT =
new GrammaticalRelation(Language.English, "ref", "referent", DEPENDENT);
/**
* The "expletive" grammatical relation.
* This relation captures an existential there.
* <br>
* <br>
* Example: <br>
* "There is a statue in the corner" →
* {@code expl}(is, there)
*/
public static final GrammaticalRelation EXPLETIVE =
new GrammaticalRelation(Language.English, "expl", "expletive",
DEPENDENT, "S|SQ|SINV", tregexCompiler,
"S|SQ|SINV < (NP=target <+(NP) EX)");
/**
* The "adjectival complement" grammatical relation. An
* adjectival complement of a VP is an adjectival phrase which
* functions as the complement (like an object of the verb); an adjectival
* complement of a clause is the adjectival complement of the VP which is
* the predicate of that clause.<p>
* <br>
* Example: <br>
* "She looks very beautiful" →
* {@code acomp}(looks, beautiful)
*/
public static final GrammaticalRelation ADJECTIVAL_COMPLEMENT =
new GrammaticalRelation(Language.English, "acomp", "adjectival complement",
COMPLEMENT, "VP|SQ", tregexCompiler,
"VP [ < ADJP=target | ( < (/^VB/ [ ( < " + clausalComplementRegex + " $++ VP=target ) | $+ (@S=target < (@ADJP < /^JJ/ ! $-- @NP|S)) ] ) !$-- (/^VB/ < " + copularWordRegex + " )) ]",
//Questions like "What am I good at?" with the copula being the head
"SQ < (/^VB/ < " + copularWordRegex + " $++ ADJP=target !$++ VP)");
/**
* The "modifier" grammatical relation. A modifier of a VP is
* any constituent that serves to modify the meaning of the VP
* (but is not an {@code ARGUMENT} of that
* VP); a modifier of a clause is an modifier of the VP which is
* the predicate of that clause.<p>
* <br>
* Examples: <br>
* "Last night, I swam in the pool" →
* {@code mod}(swam, in the pool),
* {@code mod}(swam, last night)
*/
public static final GrammaticalRelation MODIFIER =
new GrammaticalRelation(Language.English, "mod", "modifier", DEPENDENT);
/**
* The "adverbial clause modifier" grammatical relation. An adverbial clause
* modifier of some predicates, such as a VP or (inverted) sentence is a clause modifying the verb
* (temporal clauses, consequences, conditional clauses, etc.).
* <br>
* Examples: <br>
* "The accident happened as the night was falling" →
* {@code advcl}(happened, falling) <br>
* "If you know who did it, you should tell the teacher" →
* {@code advcl}(tell, know)
*/
public static final GrammaticalRelation ADV_CLAUSE_MODIFIER =
new GrammaticalRelation(Language.English, "advcl", "adverbial clause modifier",
MODIFIER, "VP|S|SQ|SINV|SBARQ|NP|ADVP", tregexCompiler,
"VP < (@SBAR=target <= (@SBAR [ < (IN !< /^(?i:that|whether)$/) | <: (SINV <1 /^(?:VB|MD|AUX)/) | < (RB|IN < so|now) < (IN < that) | <1 (ADVP < (RB < now)) <2 (IN < that) ] ))",
"S|SQ|SINV < (SBAR|SBAR-TMP=target <, (IN !< /^(?i:that|whether)$/ !$+ (NN < order)) !$-- /^(?!CC|CONJP|``|,|INTJ|PP(-.*)?).*$/ !$+ VP)",
// to get "rather than"
"S|SQ|SINV < (SBAR|SBAR-TMP=target <2 (IN !< /^(?i:that|whether)$/ !$+ (NN < order)) !$-- /^(?!CC|CONJP|``|,|INTJ|PP(-.*)?$).*$/)",
// this one might just be better, but at any rate license one with quotation marks or a conjunction beforehand
"S|SQ|SINV < (SBAR|SBAR-TMP=target <, (IN !< /^(?i:that|whether)$/ !$+ (NN < order)) !$+ @VP $+ /^,$/ $++ @NP)",
// the last part should probably only be @SQ, but this captures some strays at no cost
"SBARQ < (SBAR|SBAR-TMP|SBAR-ADV=target <, (IN !< /^(?i:that|whether)$/ !$+ (NN < order)) $+ /^,$/ $++ @SQ|S|SBARQ)",
// added the (S < (VP <TO)) part so that "I tell them how to do so" doesn't get a wrong advcl
// note that we allow adverb phrases to come before the WHADVP, which allows for phrases such as "even when"
// ":" indicates something that should be a parataxis
// in cases where there are two SBARs conjoined, we're happy
// to use the head SBAR as a candidate for this relation
"S|SQ < (@SBAR=target [ == @SBAR=sbar | <# @SBAR=sbar ] ): (=sbar < (WHADVP|WHNP < (WRB !< /^(?i:how)$/) !$-- /^(?!RB|ADVP).*$/) !< (S < (VP < TO)) !$-- /^:$/)",
"VP < (@SBAR=target !$-- /^:$/ [ == @SBAR=sbar | <# @SBAR=sbar ] ) [ !< (/^V/ < " + ccompVerbRegex + ") | < (=target $-- @SBAR|S) | ( !< (/^V/ < " + ccompObjVerbRegex + ") < (=target $-- NP)) ] : (=sbar < (WHADVP|WHNP < (WRB !< /^(?i:how)$/) !$-- /^(?!RB|ADVP).*$/) !< (S < (VP < TO)))",
// "S|SQ < (PP=target <, RB < @S)", // caught as prep and pcomp.
"@S < (@SBAR=target $++ @NP $++ @VP)", // fronted adverbial clause
"@S < (@S=target < (VP < TO) $+ (/^,$/ $++ @NP))", // part of former purpcl: This is fronted infinitives: "To find out why, we went to ..."
// "VP > (VP < (VB|AUX < be)) < (S=target !$- /^,$/ < (VP < TO|VBG) !$-- NP)", // part of former purpcl [cdm 2010: this pattern was added by me in 2006, but it is just bad!]
// // matches direct object for long dependencies in relative clause without explicit relative pronouns
// "SBAR !< (WHPP|WHNP|WHADVP) < (S < (@NP $++ (VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + " !$+ VP) !<+(VP) (/^(?:VB|AUX)/ < " + copularWordRegex + " $+ (VP < VBN|VBD)) !<+(VP) NP !< SBAR !<+(VP) (PP <- IN|TO)))) !$-- CC $-- NP > NP=target " +
// // avoid conflicts with rcmod. TODO: we could look for
// // empty nodes in this kind of structure and use that to
// // find dobj, tmod, advmod, etc. won't help the parser,
// // of course, but will help when converting a treebank
// // which contains empties
// // Example: "with the way his split-fingered fastball is behaving"
// "!($-- @NP|WHNP|NML > @NP|WHNP <: (S !< (VP < TO)))",
"NP < (NP $++ (SBAR=target < (IN < /^(?i:than)$/) !< (WHPP|WHNP|WHADVP) < (S < (@NP $++ (VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + " !$+ VP) !<+(VP) (/^(?:VB|AUX)/ < " + copularWordRegex + " $+ (VP < VBN|VBD)) !<+(VP) NP !< SBAR !<+(VP) (PP <- IN|TO)))) !<: (S !< (VP < TO))) !$++ (CC $++ =target))",
// this is for comparative or as ... as complements: sold more quickly [than they had expected]
// available as long [as they install a crash barrier]
"ADVP < ADVP < SBAR=target"
);
/*
* The "purpose clause modifier" grammatical relation has been discontinued
* It is now just seen as a special case of an advcl. A purpose clause
* modifier of a VP is a clause headed by "(in order) to" specifying a
* purpose. Note: at present we only recognize ones that have
* "in order to" or are fronted. Otherwise we can't use our surface representations to
* distinguish these from xcomp's. We can also recognize "to" clauses
* introduced by "be VBN".
* <br>
* Example: <br>
* "He talked to the president in order to secure the account" →
* {@code purpcl}(talked, secure)
*/
/**
* The "relative clause modifier" grammatical relation. A relative clause
* modifier of an NP is a relative clause modifying the NP. The link
* points from the head noun of the NP to the head of the relative clause,
* normally a verb.
* <br>
* <br>
* Examples: <br>
* "I saw the man you love" →
* {@code rcmod}(man, love) <br>
* "I saw the book which you bought" →
* {@code rcmod}(book, bought)
*/
public static final GrammaticalRelation RELATIVE_CLAUSE_MODIFIER =
new GrammaticalRelation(Language.English, "rcmod", "relative clause modifier",
MODIFIER, "(?:WH)?(?:NP|NML|ADVP)(?:-.*)?", tregexCompiler,
"@NP|WHNP|NML=np $++ (SBAR=target [ <+(SBAR) WHPP|WHNP | <: (S !< (VP < TO)) ]) !$-- @NP|WHNP|NML !$++ " + ETC_PAT + " !$++ " + FW_ETC_PAT + " > @NP|WHNP : (=np !$++ (CC|CONJP $++ =target))",
"NP|NML $++ (SBAR=target < (WHADVP < (WRB </^(?i:where|why|when)/))) !$-- NP|NML !$++ " + ETC_PAT + " !$++ " + FW_ETC_PAT + " > @NP",
// for case of relative clauses with no relativizer
// (it doesn't distinguish whether actually gapped).
"@NP|WHNP < RRC=target <# NP|WHNP|NML|DT|S",
"@ADVP < (@ADVP < (RB < /where$/)) < @SBAR=target",
"NP < (NP $++ (SBAR=target !< (IN < /^(?i:than|that|whether)$/) !< (WHPP|WHNP|WHADVP) < (S < (@NP $++ (VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + " !$+ VP) !<+(VP) (/^(?:VB|AUX)/ < " + copularWordRegex + " $+ (VP < VBN|VBD)) !<+(VP) NP !< SBAR !<+(VP) (PP <- IN|TO)))) !<: (S !< (VP < TO))) !$++ (CC $++ =target))");
/*
* The "complementizer" grammatical relation is a discontinued grammatical relation. A
* A complementizer of a clausal complement was the word introducing it.
* It only matched "that" or "whether". We've now merged this in with "mark" which plays a similar
* role with other clausal modifiers.
* <br>
* <br>
* Example: <br>
* "He says that you like to swim" →
* {@code complm}(like, that)
*/
/**
* The "marker" grammatical relation. A marker is the word introducing a finite clause subordinate to another clause.
* For a complement clause, this will typically be "that" or "whether".
* For an adverbial clause, the marker is typically a preposition like "while" or "although".
* <br>
* Example: <br>
* "U.S. forces have been engaged in intense fighting after insurgents launched simultaneous attacks" →
* {@code mark}(launched, after)
*/
public static final GrammaticalRelation MARKER =
new GrammaticalRelation(Language.English, "mark", "marker",
MODIFIER, "SBAR(?:-TMP)?", tregexCompiler,
"SBAR|SBAR-TMP < (IN|DT=target $++ S|FRAG)",
"SBAR < (IN|DT=target < that|whether) [ $-- /^(?:VB|AUX)/ | $- NP|NN|NNS | > ADJP|PP | > (@NP|UCP|SBAR < CC|CONJP $-- /^(?:VB|AUX)/) ]");
/**
* The "adjectival modifier" grammatical relation. An adjectival
* modifier of an NP is any adjectival phrase that serves to modify
* the meaning of the NP.<p>
* <br>
* Example: <br>
* "Sam eats red meat" →
* {@code amod}(meat, red) <br>
* The relation amod is also used for multiword country adjectives, despite their
* questionable treebank representation.
* <br>
* Example: <br>
* "the West German economy" →
* {@code amod}(German, West),
* {@code amod}(economy, German)
*/
public static final GrammaticalRelation ADJECTIVAL_MODIFIER =
new GrammaticalRelation(Language.English, "amod", "adjectival modifier",
MODIFIER, "NP(?:-TMP|-ADV)?|NX|NML|NAC|WHNP|ADJP", tregexCompiler,
"/^(?:NP(?:-TMP|-ADV)?|NX|NML|NAC|WHNP)$/ < (ADJP|WHADJP|JJ|JJR|JJS|JJP|VBN|VBG|VBD|IN=target !< (QP !< /^[$]$/) !$- CC)",
// IN above is needed for "next" in "next week" etc., which is often tagged IN.
"ADJP !< CC|CONJP < (JJ|NNP $ JJ|NNP=target)",
// Cover the case of "John, 34, works at Stanford" - similar to an expression for appos
"WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (NP=target <: CD $- /^,$/ $-- /^(?:WH)?NP/ !$ CC|CONJP)");
/**
* The "numeric modifier" grammatical relation. A numeric
* modifier of an NP is any number phrase that serves to modify
* the meaning of the NP.
* <br>
* Also, the enumeration of lists have this relation to the head of
* the list item. For that, we allow the list of constituents which
* have a list under them in any of the training data, as the parser
* will likely not produce anything else anyway.
* <br>
* PTB: PP NP X S FRAG <br>
* EWT: SQ SBARQ SINV SBAR NML VP <br>
* Craft: PRN <br>
* OntoNotes: ADJP <br>
* Example: <br>
* "Sam eats 3 sheep" →
* {@code num}(sheep, 3)
*/
public static final GrammaticalRelation NUMERIC_MODIFIER =
new GrammaticalRelation(Language.English, "num", "numeric modifier",
MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?|NML|NX|ADJP|WHADJP|QP|PP|X|S|FRAG|SQ|SBARQ|SINV|SBAR|VP|PRN", tregexCompiler,
"/^(?:WH)?(?:NP|NX|NML)(?:-TMP|-ADV)?$/ < (CD|QP=target !$- CC)",
// $ is so phrases such as "$ 100 million buyout" get amod(buyout, $)
"/^(?:WH)?(?:NP|NX|NML)(?:-TMP|-ADV)?$/ < (ADJP=target <: (QP !< /^[$]$/))",
// Phrases such as $ 100 million get converted from (QP ($ $) (CD 100) (CD million)) to
// (QP ($ $) (QP (CD 100) (CD million))). This next tregex covers those phrases.
// Note that the earlier tregexes are usually enough to cover those phrases, such as when
// the QP is by itself in an ADJP or NP, but sometimes it can have other siblings such
// as in the phrase "$ 100 million or more". In that case, this next expression is needed.
"QP < QP=target < /^[$]$/",
// Lists are treated as nummod in UD_English-EWT
"PP|NP|X|S|FRAG|SQ|SBARQ|SINV|SBAR|NML|VP|PRN|ADJP < LST=target");
/**
* The "compound number modifier" grammatical relation. A compound number
* modifier is a part of a number phrase or currency amount.
* <br>
* Example: <br>
* "I lost $ 3.2 billion" →
* {@code number}($, billion)
*/
public static final GrammaticalRelation NUMBER_MODIFIER =
new GrammaticalRelation(Language.English, "number", "compound number modifier",
MODIFIER, "QP|ADJP", tregexCompiler,
"QP|ADJP < (/^(?:CD|$|#)$/=target !$- CC)");
/**
* The "quantifier phrase modifier" grammatical relation. A quantifier
* modifier is an element modifying the head of a QP constituent.
* <br>
* Example: <br>
* "About 200 people came to the party" →
* {@code quantmod}(200, About)
*/
public static final GrammaticalRelation QUANTIFIER_MODIFIER =
new GrammaticalRelation(Language.English, "quantmod", "quantifier modifier",
MODIFIER, "QP", tregexCompiler,
// XS and XSL is to match "up to" or similar phrases
// after the QPTreeTransformer's operation
"QP < IN|RB|RBR|RBS|PDT|DT|JJ|JJR|JJS|XS|XSL|RP=target"
);
/**
* The "noun compound modifier" grammatical relation. A noun compound