Permalink
Browse files

Support search= fragment identifier for text

  • Loading branch information...
ndw committed Sep 29, 2016
1 parent 21e3647 commit 14dd907a8904cfd05f9e75961869b7edc1ffa82f
@@ -176,6 +176,10 @@ public boolean processStartElement(XdmNode node) throws SaxonApiException {
String accept = node.getAttributeValue(_accept); String accept = node.getAttributeValue(_accept);
String accept_lang = node.getAttributeValue(_accept_language); String accept_lang = node.getAttributeValue(_accept_language);
if (href == null) {
href = "";
}
if (accept != null && accept.matches(".*[^\u0020-\u007e].*")) { if (accept != null && accept.matches(".*[^\u0020-\u007e].*")) {
throw new XProcException("Invalid characters in accept value"); throw new XProcException("Invalid characters in accept value");
} }
@@ -250,9 +254,15 @@ public boolean processStartElement(XdmNode node) throws SaxonApiException {
if (xptr != null) { if (xptr != null) {
/* HACK */ /* HACK */
if ("text".equals(parse) && !xptr.trim().startsWith("text(")) { if ("text".equals(parse)) {
xptr = "text(" + xptr + ")"; String xtrim = xptr.trim();
} // What about spaces around the "=" !
if (xtrim.startsWith("line=") || xtrim.startsWith("char=")) {
xptr = "text(" + xptr + ")";
} else if (xtrim.startsWith("search=")) {
xptr = "search(" + xptr + ")";
}
}
xpointer = new XPointer(runtime, xptr, readLimit); xpointer = new XPointer(runtime, xptr, readLimit);
} }
@@ -27,6 +27,7 @@
private static final QName _element = new QName("", "element"); private static final QName _element = new QName("", "element");
private static final QName _xpath = new QName("", "xpath"); private static final QName _xpath = new QName("", "xpath");
private static final QName _text = new QName("", "text"); private static final QName _text = new QName("", "text");
private static final QName _search = new QName("", "search");
private Vector<XPointerScheme> parts = new Vector<XPointerScheme> (); private Vector<XPointerScheme> parts = new Vector<XPointerScheme> ();
private int readLimit = 0; private int readLimit = 0;
@@ -82,7 +83,11 @@ public String selectText(BufferedReader stream, int contentLength) {
String select = scheme.textEquivalent(); String select = scheme.textEquivalent();
if (result == null && select != null) { if (result == null && select != null) {
try { try {
result = scheme.selectText(stream, contentLength); if (select.startsWith("search=")) {
result = scheme.selectSearchText(stream, contentLength);
} else {
result = scheme.selectText(stream, contentLength);
}
} catch (IllegalArgumentException iae) { } catch (IllegalArgumentException iae) {
// in this case we will never have started reading the file, so we're good to go // in this case we will never have started reading the file, so we're good to go
except = iae; except = iae;
@@ -142,6 +147,8 @@ private String parse(String xpointer) {
parts.add(new XPointerXPathScheme(name, data, readLimit)); parts.add(new XPointerXPathScheme(name, data, readLimit));
} else if (_text.equals(name)) { } else if (_text.equals(name)) {
parts.add(new XPointerTextScheme(name, data, readLimit)); parts.add(new XPointerTextScheme(name, data, readLimit));
} else if (_search.equals(name)) {
parts.add(new XPointerTextSearchScheme(name, data, readLimit));
} else { } else {
parts.add(new XPointerScheme(name, data, readLimit)); parts.add(new XPointerScheme(name, data, readLimit));
} }
@@ -311,4 +318,14 @@ public String textEquivalent() {
return schemeData; return schemeData;
} }
} }
private class XPointerTextSearchScheme extends XPointerScheme {
public XPointerTextSearchScheme(QName name, String data, int readLimit) {
super(name,data,readLimit);
}
public String textEquivalent() {
return schemeData;
}
}
} }
@@ -9,10 +9,11 @@
import net.sf.saxon.s9api.XPathSelector; import net.sf.saxon.s9api.XPathSelector;
import net.sf.saxon.s9api.XdmItem; import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode; import net.sf.saxon.s9api.XdmNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Hashtable; import java.util.Hashtable;
import java.util.Iterator; import java.util.Iterator;
import java.util.Vector; import java.util.Vector;
@@ -29,9 +30,15 @@
public class XPointerScheme { public class XPointerScheme {
protected QName schemeName = null; protected QName schemeName = null;
protected String schemeData = null; protected String schemeData = null;
protected Logger logger = null;
private int readLimit = 0; private int readLimit = 0;
private static final Pattern rangeRE = Pattern.compile("^.*?=(\\d*)?(,(\\d*)?)?$"); private static final Pattern rangeRE = Pattern.compile("^.*?=(\\d*)?(,(\\d*)?)?$");
private static final Pattern lengthRE = Pattern.compile("^length=(\\d+)(,.*)?$"); private static final Pattern lengthRE = Pattern.compile("^length=(\\d+)(,[^;]*)?(.*)$");
private static final Pattern leadingWhitespaceRE = Pattern.compile("^(\\s*)(\\S.*)$");
private static final int INCLUDE_MATCH = 0;
private static final int EXCLUDE_MATCH = 1;
private static final int TRIM = 2;
private long sp = -1; private long sp = -1;
private long ep = -1; private long ep = -1;
@@ -51,10 +58,12 @@ public String getData() {
return schemeData; return schemeData;
} }
public XPointerScheme(QName name, String data, int readLimit) { protected XPointerScheme(QName name, String data, int readLimit) {
this.readLimit = readLimit; this.readLimit = readLimit;
schemeName = name; schemeName = name;
schemeData = data; schemeData = data;
logger = LoggerFactory.getLogger(XPointerScheme.class);
} }
public String xpathEquivalent() { public String xpathEquivalent() {
@@ -108,7 +117,7 @@ public String textEquivalent() {
return selectedNodes; return selectedNodes;
} }
public String selectText(BufferedReader rd, int contentLength) { protected String selectText(BufferedReader rd, int contentLength) {
String select = textEquivalent(); String select = textEquivalent();
if (select == null) { if (select == null) {
@@ -189,7 +198,7 @@ public String selectText(BufferedReader rd, int contentLength) {
try { try {
rd.reset(); rd.reset();
} catch (IOException ioe) { } catch (IOException ioe) {
throw new XProcException(ioe); // oh well
} }
} }
return data; return data;
@@ -228,4 +237,241 @@ private String selectLines(String line) {
lp++; lp++;
return data; return data;
} }
public String selectSearchText(BufferedReader rd, int contentLength) {
String select = textEquivalent();
if (select == null) {
throw new XProcException("XPointer cannot be used to select text: " + schemeName + "(" + schemeData + ")");
}
// search=(digit)/string/opt,(digit)/string/opt;integrity
// Where start and end can be enclosed in character
// and the options for start opt are "from", "after", or "trim"
// and the options for end opt are "to", "before", or "trim"
// Yes, this is probably all horribly inefficient...
Matcher matcher = null;
String origSelect = select;
String startSearch = null;
int startOpt = INCLUDE_MATCH;
int startCount = 1;
String endSearch = null;
int endOpt = INCLUDE_MATCH;
int endCount = 1;
boolean found = false;
boolean strip = false;
int stripWS = Integer.MAX_VALUE;
select = select.substring(7).trim();
if ("".equals(select)) {
malformedSearch("at least one of start/end required", origSelect);
}
String skip = "";
char ch = select.charAt(0);
if (ch == ',') {
select = select.substring(1);
} else {
while (Character.isDigit(ch)) {
skip = skip + ch;
select = select.substring(1);
if ("".equals(select)) {
malformedSearch("start must specify a search string", origSelect);
}
ch = select.charAt(0);
}
if (!"".equals(skip)) {
startCount = Integer.parseInt(skip);
}
select = select.substring(1);
int pos = select.indexOf(ch);
if (pos < 0) {
malformedSearch("unterminated start string", origSelect);
}
startSearch = select.substring(0, pos);
select = select.substring(pos+1).trim();
if (select.startsWith("trim")) {
startOpt = TRIM;
select = select.substring(4).trim();
} else if (select.startsWith("from")) {
startOpt = INCLUDE_MATCH;
select = select.substring(4).trim();
} else if (select.startsWith("after")) {
startOpt = EXCLUDE_MATCH;
select = select.substring(5).trim();
} else if ("".equals(select) || select.startsWith(",")) {
// ok
} else {
malformedSearch("invalid start option", origSelect);
}
}
if (select.startsWith(",")) {
select = select.substring(1);
}
if (!"".equals(select)) {
skip = "";
ch = select.charAt(0);
while (Character.isDigit(ch)) {
skip = skip + ch;
select = select.substring(1);
if ("".equals(select)) {
malformedSearch("end must specify a search string", origSelect);
}
ch = select.charAt(0);
}
if (!"".equals(skip)) {
endCount = Integer.parseInt(skip);
}
select = select.substring(1);
int pos = select.indexOf(ch);
if (pos < 0) {
malformedSearch("unterminated end string", origSelect);
}
endSearch = select.substring(0, pos);
select = select.substring(pos+1).trim();
if (select.startsWith("trim")) {
endOpt = TRIM;
select = select.substring(4).trim();
} else if (select.startsWith("to")) {
endOpt = INCLUDE_MATCH;
select = select.substring(2).trim();
} else if (select.startsWith("before")) {
endOpt = EXCLUDE_MATCH;
select = select.substring(6).trim();
}
}
if (select.startsWith(";")) {
select = select.substring(1).trim();
}
if (select.startsWith("strip")) {
strip = true;
select = select.substring(5).trim();
if (select.startsWith(";")) {
select = select.substring(1).trim();
}
}
logger.trace("XPointer search scheme: search='" + startSearch + "';" + startOpt + ",'" + endSearch + "';" + endOpt);
String data = "";
try {
rd.mark(readLimit);
matcher = lengthRE.matcher(select);
if (matcher.matches()) {
int checklen = Integer.parseInt(matcher.group(1));
String charset = matcher.group(2);
select = matcher.group(3);
if (contentLength >= 0) {
if (checklen != contentLength) {
throw new IllegalArgumentException("Integrity check failed: " + checklen + " != " + contentLength);
}
}
if (select.startsWith(";")) {
select = select.substring(1).trim();
}
if (select.startsWith("strip")) {
strip = true;
select = select.substring(5).trim();
}
}
if (!"".equals(select)) {
malformedSearch("unexpected characters at end", origSelect);
}
Vector<String> lines = new Vector<> ();
boolean finished = false;
boolean output = false;
String line;
while (!finished && (line = rd.readLine()) != null) {
if (output && endSearch != null && line.contains(endSearch)) {
if (endCount == 1) {
output = false;
finished = true;
if (endOpt == INCLUDE_MATCH) {
lines.add(line);
}
}
endCount--;
}
if (output) {
lines.add(line);
}
if (startSearch == null || line.contains(startSearch)) {
found = true;
if (startCount == 1) {
output = true;
if (startOpt == INCLUDE_MATCH) {
lines.add(line);
}
}
startCount--;
}
}
if (!found) {
throw new XProcException("No matching lines found");
}
if (lines.size() > 0) {
if (strip && stripWS > 0) {
for (String l : lines) {
matcher = leadingWhitespaceRE.matcher(l);
if (matcher.matches()) {
int wslen = matcher.group(1).length();
if (wslen < stripWS) {
stripWS = wslen;
}
}
}
}
while (lines.size() > 0 && startOpt == TRIM && "".equals(lines.firstElement().trim())) {
lines.remove(0);
}
while (lines.size() > 0 && endOpt == TRIM && "".equals(lines.lastElement().trim())) {
lines.remove(lines.size()-1);
}
}
for (String l : lines) {
if (strip && stripWS > 0 && l.length() >= stripWS) {
l = l.substring(stripWS);
}
data += l + "\n";
}
} catch (IOException ioe) {
throw new XProcException(ioe);
} finally {
try {
rd.reset();
} catch (IOException ioe) {
// oh well
}
}
return data;
}
private void malformedSearch(String select, String msg) {
throw new XProcException("Malformed search: " + msg + ": " + select);
}
} }

0 comments on commit 14dd907

Please sign in to comment.