-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
SentimentAnnotator.java
149 lines (133 loc) · 5.71 KB
/
SentimentAnnotator.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
package edu.stanford.nlp.pipeline;
import java.util.*;
import edu.stanford.nlp.ling.CoreAnnotation;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.sentiment.CollapseUnaryTransformer;
import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
import edu.stanford.nlp.sentiment.SentimentCostAndGradient;
import edu.stanford.nlp.sentiment.SentimentModel;
import edu.stanford.nlp.sentiment.SentimentUtils;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.ArraySet;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.IntPair;
import edu.stanford.nlp.util.PropertiesUtils;
/**
* This annotator attaches a binarized tree with sentiment annotations
* to each sentence. It requires there to already be binarized trees
* attached to the sentence, which is best done in the
* ParserAnnotator.
*
* The tree will be attached to each sentence in the
* SentencesAnnotation via the SentimentCoreAnnotations.SentimentAnnotatedTree
* annotation. The class name for the top level class is also set
* using the SentimentCoreAnnotations.SentimentClass annotation.
*
* The reason the decision was made to do the binarization in the
* ParserAnnotator is because it may require specific options set in
* the parser. An alternative would be to do the binarization here,
* which would require at a minimum the HeadFinder used in the parser.
*
* @author John Bauer
*/
public class SentimentAnnotator extends SentenceAnnotator {
private static final String DEFAULT_MODEL = "edu/stanford/nlp/models/sentiment/sentiment.ser.gz";
private final String modelPath;
private final SentimentModel model;
private final CollapseUnaryTransformer transformer = new CollapseUnaryTransformer();
private final int nThreads;
/**
* Stop processing if we exceed this time limit, in milliseconds.
* Use 0 for no limit.
*/
private final long maxTime;
public SentimentAnnotator(String annotatorName, Properties props) {
this.modelPath = props.getProperty(annotatorName + ".model", DEFAULT_MODEL);
if (modelPath == null) {
throw new IllegalArgumentException("No model specified for Sentiment annotator");
}
this.model = SentimentModel.loadSerialized(modelPath);
this.nThreads = PropertiesUtils.getInt(props, annotatorName + ".nthreads", PropertiesUtils.getInt(props, "nthreads", 1));
this.maxTime = PropertiesUtils.getLong(props, annotatorName + ".maxtime", -1);
}
@Override
public Set<Class<? extends CoreAnnotation>> requirementsSatisfied() {
return Collections.emptySet();
}
@Override
public Set<Class<? extends CoreAnnotation>> requires() {
return Collections.unmodifiableSet(new ArraySet<>(Arrays.asList(
CoreAnnotations.PartOfSpeechAnnotation.class,
TreeCoreAnnotations.TreeAnnotation.class,
TreeCoreAnnotations.BinarizedTreeAnnotation.class,
CoreAnnotations.CategoryAnnotation.class
)));
}
public static String signature(String annotatorName, Properties props) {
StringBuilder os = new StringBuilder();
os.append(annotatorName + ".model:" +
props.getProperty(annotatorName + ".model", DEFAULT_MODEL));
os.append(annotatorName + ".nthreads:" +
props.getProperty(annotatorName + ".nthreads", props.getProperty("nthreads", "")));
os.append(annotatorName + ".maxtime:" +
props.getProperty(annotatorName + ".maxtime", "-1"));
return os.toString();
}
@Override
protected int nThreads() {
return nThreads;
}
@Override
protected long maxTime() {
return maxTime;
}
@Override
public void doOneFailedSentence(Annotation annotation, CoreMap sentence) {
// not sure what to do here, so just bail
}
@Override
protected void doOneSentence(Annotation annotation, CoreMap sentence) {
Tree binarized = sentence.get(TreeCoreAnnotations.BinarizedTreeAnnotation.class);
if (binarized == null) {
throw new AssertionError("Binarized sentences not built by parser");
}
Tree collapsedUnary = transformer.transformTree(binarized);
SentimentCostAndGradient scorer = new SentimentCostAndGradient(model, null);
scorer.forwardPropagateTree(collapsedUnary);
sentence.set(SentimentCoreAnnotations.SentimentAnnotatedTree.class, collapsedUnary);
int sentiment = RNNCoreAnnotations.getPredictedClass(collapsedUnary);
sentence.set(SentimentCoreAnnotations.SentimentClass.class, SentimentUtils.sentimentString(model, sentiment));
Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
if (tree != null) {
collapsedUnary.setSpans();
// map the sentiment annotations onto the tree
Map<IntPair,String> spanSentiment = Generics.newHashMap();
for (Tree bt : collapsedUnary) {
IntPair p = bt.getSpan();
int sen = RNNCoreAnnotations.getPredictedClass(bt);
String sentStr = SentimentUtils.sentimentString(model, sen);
if ( ! spanSentiment.containsKey(p)) {
// we'll take the first = highest one discovered
spanSentiment.put(p, sentStr);
}
}
if (((CoreLabel) tree.label()).containsKey(CoreAnnotations.SpanAnnotation.class)) {
throw new IllegalStateException("This code assumes you don't have SpanAnnotation");
}
tree.setSpans();
for (Tree t : tree) {
IntPair p = t.getSpan();
String str = spanSentiment.get(p);
if (str != null) {
CoreLabel cl = (CoreLabel) t.label();
cl.set(SentimentCoreAnnotations.SentimentClass.class, str);
cl.remove(CoreAnnotations.SpanAnnotation.class);
}
}
}
}
}