-
Notifications
You must be signed in to change notification settings - Fork 2
/
token-regex.txt
44 lines (40 loc) · 1.58 KB
/
token-regex.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
## example token regular expression file
## Example: (parse "I like Teddy Grams on Tuesday")
## ({:entity-type "PRODUCT",
## :token-range [2 4],
## :sent-index 0,
## :tok-re-ner-tag "PRODUCT",
## :char-range [7 18],
## :text "Teddy Grams"}
## {:entity-type "DAY_OF_WEEK",
## :token-range [5 6],
## :sent-index 0,
## :tok-re-ner-tag "DAY_OF_WEEK",
## :char-range [22 29],
## :text "Tuesday"})
## ...
## {:token-range [2 3],
## :ner-tag "PERSON",
## :pos-tag "NNP",
## :lemma "Teddy",
## :token-index 3,
## :sent-index 0,
## :tok-re-ner-features {:food-type "dessert"},
## :tok-re-ner-tag "PRODUCT",
## :tok-re-ner-item-id 497,
## :char-range [7 12],
## :text "Teddy"}
## ...
## more examples:
## http://nlp.stanford.edu/software/tokensregex.html
# define ruleType to be over tokens
ENV.defaults["ruleType"] = "tokens"
# define keys for annotations when added
zstrner = {type:"CLASS",value:"zensols.stanford.nlp.TokenRegexAnnotations$NERAnnotation"}
zstrfeat = {type:"CLASS",value:"zensols.stanford.nlp.TokenRegexAnnotations$NERFeatureCreateAnnotation"}
zstriid = {type:"CLASS",value:"zensols.stanford.nlp.TokenRegexAnnotations$NERItemIDAnnotation"}
# Case insensitive pattern matching (see java.util.regex.Pattern flags)
#ENV.defaultStringPatternFlags = 2
# define rules
{pattern:(/(?iu)monday|tuesday|wednesday|thursday|friday|saturday|sunday/),action:(Annotate($0,zstrner,"DAY_OF_WEEK"))}
{pattern:([{word:"Teddy"}] [{lemma:"Grams"}]),action:(Annotate($0,zstrner,"PRODUCT"),Annotate($0,zstriid,"497"),Annotate($0,zstrfeat,"food-type={dessert}"))}