-
Notifications
You must be signed in to change notification settings - Fork 1.4k
/
Match.java
241 lines (209 loc) · 6.68 KB
/
Match.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
/* LanguageTool, a natural language style checker
* Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
* USA
*/
package org.languagetool.rules.patterns;
import java.util.regex.Pattern;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.synthesis.Synthesizer;
import org.languagetool.tools.StringTools;
/**
* A {@link Match} is the configuration of an algorithm used to match {@link AnalyzedTokenReadings}s.
* In XML, it's the {@code <match/>} element.
* Use {@link #createState(Synthesizer, AnalyzedTokenReadings)} and {@link #createState(Synthesizer, AnalyzedTokenReadings[], int, int)}
* to create a {@link MatchState} used to actually match {@link AnalyzedTokenReadings}.
*
* @author Marcin Miłkowski
*/
public final class Match {
/** Possible string case conversions. **/
public enum CaseConversion {
NONE, STARTLOWER, STARTUPPER, ALLLOWER, ALLUPPER, PRESERVE, FIRSTUPPER, NOTASHKEEL
}
public enum IncludeRange {
NONE, FOLLOWING, ALL
}
private final String posTag;
private final boolean suppressMisspelled;
private final String regexReplace;
private final String posTagReplace;
private final CaseConversion caseConversionType;
private final IncludeRange includeSkipped;
// Pattern used to define parts of the matched token:
private final Pattern pRegexMatch;
// if true the suggestion will only be applied if match is successful
private boolean ifMatch;
// True if this match element is used for formatting POS token:
private final boolean setPos;
private boolean postagRegexp;
// True if this match element formats a statically defined lemma which is
// enclosed by the element, e.g., <match...>word</match>:
private boolean staticLemma;
private String lemma;
private int tokenRef;
// Pattern used to define parts of the matched POS token:
private Pattern pPosRegexMatch;
// True when the match is not in the suggestion:
private boolean inMessageOnly;
public Match(String posTag, String posTagReplace,
boolean postagRegexp, String regexMatch,
String regexReplace, CaseConversion caseConversionType,
boolean setPOS,
boolean suppressMisspelled,
IncludeRange includeSkipped) {
this.posTag = posTag;
this.postagRegexp = postagRegexp;
this.caseConversionType = caseConversionType;
pRegexMatch = regexMatch != null ? Pattern.compile(regexMatch) : null;
if (postagRegexp && posTag != null) {
pPosRegexMatch = Pattern.compile(posTag);
}
this.regexReplace = regexReplace;
this.posTagReplace = posTagReplace;
this.setPos = setPOS;
this.includeSkipped = includeSkipped;
this.suppressMisspelled = suppressMisspelled;
}
/**
* Creates a state used for actually matching a token.
* @since 2.3
*/
public MatchState createState(Synthesizer synthesizer, AnalyzedTokenReadings token) {
MatchState state = new MatchState(this, synthesizer);
state.setToken(token);
return state;
}
/**
* Creates a state used for actually matching a token.
* @since 2.3
*/
public MatchState createState(Synthesizer synthesizer, AnalyzedTokenReadings[] tokens, int index, int next) {
MatchState state = new MatchState(this, synthesizer);
state.setToken(tokens, index, next);
return state;
}
/**
* Checks if the Match element is used for setting the part of speech: {@code setpos="yes"} in XML.
* @return True if Match sets POS.
*/
public boolean setsPos() {
return setPos;
}
/**
* Checks if the Match element uses regexp-based form of the POS tag.
* @return True if regexp is used in POS.
*/
public boolean posRegExp() {
return postagRegexp;
}
/**
* Sets a base form (lemma) that will be formatted, or synthesized, using the
* specified POS regular expressions.
* @param lemmaString String that specifies the base form.
*/
public void setLemmaString(String lemmaString) {
if (!StringTools.isEmpty(lemmaString)) {
lemma = lemmaString;
staticLemma = true;
postagRegexp = true;
if (posTag != null) {
pPosRegexMatch = Pattern.compile(posTag);
}
}
}
/** @since 2.3 */
public String getLemma() {
return lemma;
}
/** @since 2.3 */
public boolean isStaticLemma() {
return staticLemma;
}
/**
* Used to tell whether the Match class will spell-check the result so
* that misspelled suggestions are suppressed.
* @return True if this is so.
*/
public boolean checksSpelling() {
return suppressMisspelled;
}
/**
* Sets the token number referenced by the match.
* @param i Token number.
*/
public void setTokenRef(int i) {
tokenRef = i;
}
/**
* Gets the token number referenced by the match.
* @return token number.
*/
public int getTokenRef() {
return tokenRef;
}
/**
* Used to let LT know that it should change the case of the match.
* @return true if match converts the case of the token.
*/
public boolean convertsCase() {
return caseConversionType != CaseConversion.NONE;
}
/** @since 2.3 */
public CaseConversion getCaseConversionType() {
return caseConversionType;
}
public void setInMessageOnly(boolean inMessageOnly) {
this.inMessageOnly = inMessageOnly;
}
public boolean isInMessageOnly() {
return inMessageOnly;
}
/** @since 2.3 */
public String getPosTag() {
return posTag;
}
/** @since 2.3 */
public Pattern getRegexMatch() {
return pRegexMatch;
}
/** @since 2.3 */
public String getRegexReplace() {
return regexReplace;
}
/** @since 2.3 */
public Pattern getPosRegexMatch() {
return pPosRegexMatch;
}
/** @since 2.3 */
public boolean isPostagRegexp() {
return postagRegexp;
}
/** @since 2.3 */
public String getPosTagReplace() {
return posTagReplace;
}
/** @since 2.3 */
public IncludeRange getIncludeSkipped() {
return includeSkipped;
}
public boolean isIfMatch() {
return ifMatch;
}
public void setIfMatch(boolean ifMatch) {
this.ifMatch = ifMatch;
}
}