Skip to content

Commit

Permalink
Include {} as punct words, as some parsers don't produce LCB/RCB
Browse files Browse the repository at this point in the history
  • Loading branch information
AngledLuffa committed Jul 7, 2023
1 parent 56cd6bb commit a606afa
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion src/edu/stanford/nlp/trees/PennTreebankLanguagePack.java
Expand Up @@ -29,7 +29,7 @@ public PennTreebankLanguagePack() {
private static final String[] collinsPunctTags = {"''", "``", ".", ":", ","};

// new tokenizers should return (), old tokenizers return -LRB- -RRB-. so we anticipate both
private static final String[] pennPunctWords = {"''", "'", "``", "`", "-LRB-", "-RRB-", "(", ")", "-LCB-", "-RCB-", ".", "?", "!", ",", ":", "-", "--", "...", ";"};
private static final String[] pennPunctWords = {"{", "}", "''", "'", "``", "`", "-LRB-", "-RRB-", "(", ")", "-LCB-", "-RCB-", ".", "?", "!", ",", ":", "-", "--", "...", ";"};

private static final String[] pennSFPunctWords = {".", "!", "?"};

Expand Down

0 comments on commit a606afa

Please sign in to comment.