Skip to content

Commit

Permalink
Adapt test to German #1
Browse files Browse the repository at this point in the history
  • Loading branch information
tabergma committed Mar 16, 2016
1 parent e88db89 commit 0bee9e8
Show file tree
Hide file tree
Showing 18 changed files with 718 additions and 750 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,12 @@ private Collection<Range> removeRangeOverlapWithRelation(ChunkedExtraction rel,
*/
protected Collection<ChunkedArgumentExtraction> extractCandidates(ChunkedExtraction rel) {
ChunkedSentence sent = rel.getSentence();
Collection<Range>
Collection<Range> npChunkRanges= removeRangeOverlapWithRelation(rel, sent.getNpChunkRanges());
// TODO
if (mode == Mode.RIGHT) {
npChunkRanges =
removeRangeOverlapWithRelation(rel, sent.getNpChunkRanges());
removeRangeOverlapWithRelation(rel, sent.getPpChunkRanges());
}
Collection<ChunkedArgumentExtraction> args = new ArrayList<ChunkedArgumentExtraction>();
for (Range npChunkRange : npChunkRanges) {
if (acceptRange(rel, npChunkRange)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

public abstract class ReVerbRelationExtractor extends RelationFirstNpChunkExtractor {

// TODO
/**
* Definition of the "verb" of the relation pattern.
*/
Expand All @@ -34,7 +35,7 @@ public abstract class ReVerbRelationExtractor extends RelationFirstNpChunkExtrac
* Definition of the "non-verb/prep" part of the relation pattern.
*/
public static final String WORD =
"[NE_pos NN_pos]";
"[NE_pos NN_pos ART_pos]";

/**
* Definition of the "preposition" part of the relation pattern.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,14 @@ public ImmutableList<String> getPosTags(Range range) {
* @return an unmodifiable list over the ranges of the NP chunks in this sentence.
*/
public ImmutableCollection<Range> getNpChunkRanges() {
return getSpans(NP_LAYER, "NP");
return getSpans(NP_LAYER, "NP");
}

/**
* @return an unmodifiable list over the ranges of the PP chunks in this sentence.
*/
public ImmutableCollection<Range> getPpChunkRanges() {
return getSpans(NP_LAYER, "PP");
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ public VerbalRelationNormalizer() {
ignorePosTags = new HashSet<String>();
ignorePosTags.add("VMFIN"); // dürfen
ignorePosTags.add("VMINF"); // wollen
ignorePosTags.add("PTKNEG"); // nicht
ignorePosTags.add("ART"); // der, die, das
ignorePosTags.add("PDS"); // dieser, jener
ignorePosTags.add("ADJA"); // adjectives
ignorePosTags.add("ADJD"); // adjectives
ignorePosTags.add("ADV"); // adverbs
ignorePosTags.add("PPOSAT"); // mein, deine

Expand Down Expand Up @@ -92,7 +93,7 @@ private void removeIgnoredPosTags(List<String> tokens, List<String> posTags) {
int i = 0;
while (i < posTags.size()) {
String tag = posTags.get(i);
boolean isAdj = tag.startsWith("J");
boolean isAdj = tag.startsWith("ADJ");

/*
* This is checking for a special case where the relation phrase
Expand Down Expand Up @@ -124,6 +125,8 @@ private void removeLeadingBeHave(List<String> tokens, List<String> posTags) {
if (lastVerbIndex < 0) {
return;
}

// remove auxiliary verbs before other verbs
int i = 0;
while (i < lastVerbIndex) {
String tok = tokens.get(i);
Expand All @@ -138,5 +141,21 @@ private void removeLeadingBeHave(List<String> tokens, List<String> posTags) {
i++;
}
}

// remove auxiliary verbs after other verbs
i = lastVerbIndex;
while (i > 0) {
String tok = tokens.get(i);
if (i - 1 >= 0 && !posTags.get(i - 1).startsWith("V")) {
break;
}
if (auxVerbs.contains(tok)) {
tokens.remove(i);
posTags.remove(i);
lastVerbIndex--;
} else {
i--;
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
public class LayeredPatternTokenizer {

// The pattern used to match layer/symbol pairs like The_pos
private final String tokenPatternStr = "([a-zA-Z0-9\\-.,:;?!\"'`$]+)_([a-zA-Z0-9\\-]+)";
private final String tokenPatternStr = "([a-zA-ZöäüßÖÄÜ0-9\\-.,:;?!\"'`$]+)_([a-zA-ZöäüßÖÄÜ0-9\\-]+)";
private final Pattern tokenPattern = Pattern.compile(tokenPatternStr);

// The allowed meta-characters
Expand Down

0 comments on commit 0bee9e8

Please sign in to comment.