Skip to content

Commit

Permalink
[ issue #34 ] SPARQL results pagination
Browse files Browse the repository at this point in the history
  • Loading branch information
agazzarini committed Mar 16, 2015
1 parent cb05291 commit b6e21bf
Show file tree
Hide file tree
Showing 15 changed files with 5,604 additions and 175 deletions.
5 changes: 3 additions & 2 deletions solrdf/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
<dependency>
<groupId>org.apache.solr</groupId>
<artifactId>solr-core</artifactId>
<version>4.8.0</version>
<version>${solr.version}</version>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
Expand Down Expand Up @@ -135,7 +135,8 @@
<container>
<systemProperties>
<solr.solr.home>${project.build.directory}/${project.artifactId}-${project.version}-dev/${project.artifactId}</solr.solr.home>
<solr.data.dir>${project.build.directory}/data</solr.data.dir>
<!-- <solr.data.dir>${project.build.directory}/data</solr.data.dir> -->
<solr.data.dir>/work/data/solrdf</solr.data.dir>
<log4j.configuration>file://${basedir}/src/test/resources/log4j.xml</log4j.configuration>
</systemProperties>
<containerId>jetty7x</containerId>
Expand Down
1 change: 0 additions & 1 deletion solrdf/src/dev/eclipse/start-SolRDF.launch
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,5 @@
<booleanAttribute key="M2_UPDATE_SNAPSHOTS" value="false"/>
<stringAttribute key="M2_USER_SETTINGS" value=""/>
<booleanAttribute key="M2_WORKSPACE_RESOLUTION" value="true"/>
<stringAttribute key="org.eclipse.debug.core.source_locator_id" value="org.eclipse.m2e.launching.MavenSourceLocator"/>
<stringAttribute key="org.eclipse.jdt.launching.WORKING_DIRECTORY" value="${workspace_loc:/solrdf}"/>
</launchConfiguration>
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.gazzax.labs.solrdf.NTriples;

import com.google.common.collect.UnmodifiableIterator;
import com.hp.hpl.jena.graph.Node;
import com.hp.hpl.jena.graph.Triple;

/**
Expand All @@ -35,30 +36,36 @@ public class DeepPagingIterator extends UnmodifiableIterator<Triple> {
TRIPLE_FIELDS.add(Field.O);
}

protected static final Triple DUMMY_TRIPLE = new Triple(Node.ANY, Node.ANY, Node.ANY);

private final SolrIndexSearcher searcher;
final SolrIndexSearcher.QueryCommand queryCommand;
final GraphEventListener listener;
final GraphEventConsumer consumer;
private DocList page;

private CursorMark nextCursorMark;
private CursorMark sentCursorMark;

/**
* Iteration state: we need to (re)execute a query.
* This could be needed the very first time we start iteration and each time the current result
* page has been consumed.
*/
private final Iterator<Triple> executeQuery = new UnmodifiableIterator<Triple>() {
private final Iterator<Triple> firstQueryExecution = new UnmodifiableIterator<Triple>() {
@Override
public boolean hasNext() {
try {
final SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();
searcher.search(result, queryCommand);

consumer.onDocSet(result.getDocListAndSet().docSet);
queryCommand.clearFlags(SolrIndexSearcher.GET_DOCSET);

sentCursorMark = queryCommand.getCursorMark();
nextCursorMark = result.getNextCursorMark();

page = result.getDocListAndSet().docList;

return page.size() > 0;
} catch (final Exception exception) {
throw new RuntimeException(exception);
Expand All @@ -70,6 +77,44 @@ public Triple next() {
currentState = iterateOverCurrentPage;
return currentState.next();
}

public String toString() {
return "firstQueryExecution";
};
};

/**
* Iteration state: we need to (re)execute a query.
* This could be needed the very first time we start iteration and each time the current result
* page has been consumed.
*/
private final Iterator<Triple> executeQuery = new UnmodifiableIterator<Triple>() {
@Override
public boolean hasNext() {
try {
final SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();
searcher.search(result, queryCommand);

sentCursorMark = queryCommand.getCursorMark();
nextCursorMark = result.getNextCursorMark();

page = result.getDocListAndSet().docList;

return page.size() > 0;
} catch (final Exception exception) {
throw new RuntimeException(exception);
}
}

@Override
public Triple next() {
currentState = iterateOverCurrentPage;
return currentState.next();
}

public String toString() {
return "executeQuery";
};
};

/**
Expand All @@ -93,12 +138,18 @@ public boolean hasNext() {
public Triple next() {
try {
final int nextDocId = iterator().nextDoc();
final Document document = searcher.doc(nextDocId, TRIPLE_FIELDS);
final Triple triple = Triple.create(
NTriples.asURIorBlankNode((String) document.get(Field.S)),
NTriples.asURI((String) document.get(Field.P)),
NTriples.asNode((String) document.get(Field.O)));
listener.afterTripleHasBeenBuilt(triple, nextDocId);

Triple triple = null;
if (consumer.requireTripleBuild()) {
final Document document = searcher.doc(nextDocId, TRIPLE_FIELDS);
triple = Triple.create(
NTriples.asURIorBlankNode((String) document.get(Field.S)),
NTriples.asURI((String) document.get(Field.P)),
NTriples.asNode((String) document.get(Field.O)));
} else {
triple = DUMMY_TRIPLE;
}
consumer.afterTripleHasBeenBuilt(triple, nextDocId);
return triple;
} catch (final IOException exception) {
throw new RuntimeException(exception);
Expand All @@ -109,9 +160,12 @@ DocIterator iterator() {
if (iterator == null) {
iterator = page.iterator();
}
return iterator;

return iterator;
}

public String toString() {
return "iterateOverCurrentPage";
};
};

/**
Expand All @@ -133,9 +187,13 @@ public boolean hasNext() {
public Triple next() {
return currentState.next();
}

public String toString() {
return "checkForConsumptionCompleteness";
};
};

private Iterator<Triple> currentState = executeQuery;
private Iterator<Triple> currentState = firstQueryExecution;

/**
* Builds a new iterator with the given data.
Expand All @@ -144,13 +202,13 @@ public Triple next() {
* @param queryCommand the query command that will be submitted.static
* @param sort the sort specs.
*/
DeepPagingIterator(final SolrIndexSearcher searcher, final SolrIndexSearcher.QueryCommand queryCommand, final SortSpec sort, final GraphEventListener listener) {
DeepPagingIterator(final SolrIndexSearcher searcher, final SolrIndexSearcher.QueryCommand queryCommand, final SortSpec sort, final GraphEventConsumer listener) {
this.searcher = searcher;
this.queryCommand = queryCommand;
sort.setOffset(0);
this.sentCursorMark = new CursorMark(searcher.getSchema(), sort);
this.queryCommand.setCursorMark(sentCursorMark);
this.listener = listener;
this.consumer = listener;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package org.gazzax.labs.solrdf.graph;

import org.apache.solr.search.DocSet;

import com.hp.hpl.jena.graph.Triple;

/**
* An optional consumer that drives and listens the graph parsing consumption.
*
* @author Andrea Gazzarini
* @since 1.0
*/
public interface GraphEventConsumer {
/**
* A new triples has been built.
* Note that if a preceding call to {@link #requireTripleBuild()} returned false, then
* the given Triple can be just a placeholder ({@link DeepPagingIterator#DUMMY_TRIPLE}).
*
* @param triple the triple.
* @param docId the (internal Lucene) document identifier.
* @see DeepPagingIterator#DUMMY_TRIPLE
*/
void afterTripleHasBeenBuilt(Triple triple, int docId);

/**
* While parsing the graph the consumer can or cannot be interested in effectively building a triple.
* This method will be called each time a graph needs to create a new Triple representation.
*
* For example, if we are just collecting the docIds, the consumer can control the effective triple creation
* therefore avoiding a lot of temporary (and unuseful) objects.
*
* @return if the current triple match must be represented by a new {@link Triple} instance.
*/
boolean requireTripleBuild();

/**
* The consumer is informed about the {@link DocSet} associated with the current search.
* This callbacks happens once per query.
*
* @param docSet the {@link DocSet} associated with the current search.
*/
void onDocSet(DocSet docSet);
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SolrIndexSearcher;
import org.gazzax.labs.solrdf.Field;
Expand All @@ -28,11 +29,29 @@
* @since 1.0
*/
public class SolRDFDatasetGraph extends DatasetGraphCaching {
final static GraphEventConsumer NULL_GRAPH_EVENT_CONSUMER = new GraphEventConsumer() {

@Override
public boolean requireTripleBuild() {
return true;
}

@Override
public void onDocSet(DocSet docSet) {
// Nothing to be done here.
}

@Override
public void afterTripleHasBeenBuilt(final Triple triple, final int docId) {
// Nothing to be done here.
}
};

final SolrQueryRequest request;
final SolrQueryResponse response;
final QParser qParser;

final GraphEventListener listener;
final GraphEventConsumer listener;

/**
* Builds a new Dataset graph with the given data.
Expand All @@ -58,11 +77,11 @@ public SolRDFDatasetGraph(
final SolrQueryRequest request,
final SolrQueryResponse response,
final QParser qParser,
final GraphEventListener listener) {
final GraphEventConsumer listener) {
this.request = request;
this.response = response;
this.qParser = qParser;
this.listener = listener;
this.listener = listener != null ? listener : NULL_GRAPH_EVENT_CONSUMER;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
* @since 1.0
*/
public final class SolRDFGraph extends GraphBase {
static final int DEFAULT_QUERY_FETCH_SIZE = 10;
static final int DEFAULT_QUERY_FETCH_SIZE = 1000;

private FieldInjectorRegistry registry = new FieldInjectorRegistry();

Expand All @@ -63,7 +63,7 @@ public final class SolRDFGraph extends GraphBase {

final int queryFetchSize;

final GraphEventListener listener;
final GraphEventConsumer listener;

/**
* Creates a Read / Write {@link Graph}.
Expand All @@ -79,7 +79,7 @@ public static SolRDFGraph readableAndWritableGraph(
final SolrQueryRequest request,
final SolrQueryResponse response,
final QParser qParser,
final GraphEventListener listener) {
final GraphEventConsumer listener) {
return new SolRDFGraph(graphNode, request, response, qParser, DEFAULT_QUERY_FETCH_SIZE, listener);
}

Expand All @@ -99,7 +99,7 @@ public static SolRDFGraph readableAndWritableGraph(
final SolrQueryResponse response,
final QParser qParser,
final int fetchSize,
final GraphEventListener listener) {
final GraphEventConsumer listener) {
return new SolRDFGraph(graphNode, request, response, qParser, fetchSize, listener);
}

Expand All @@ -118,7 +118,7 @@ private SolRDFGraph(
final SolrQueryResponse response,
final QParser qparser,
final int fetchSize,
final GraphEventListener listener) {
final GraphEventConsumer listener) {
this.graphNode = graphNode;
this.graphNodeStringified = (graphNode != null) ? asNtURI(graphNode) : null;
this.request = request;
Expand Down Expand Up @@ -239,6 +239,7 @@ Iterator<Triple> query(final TripleMatch pattern) throws SyntaxError {
cmd.setQuery(new MatchAllDocsQuery());
cmd.setSort(sortSpec.getSort());
cmd.setLen(queryFetchSize);
cmd.setFlags(cmd.getFlags() | SolrIndexSearcher.GET_DOCSET);

final List<Query> filters = new ArrayList<Query>();

Expand All @@ -251,7 +252,7 @@ Iterator<Triple> query(final TripleMatch pattern) throws SyntaxError {
}

if (p != null) {
filters.add(new TermQuery(new Term(Field.P, asNt(p))));
filters.add(new TermQuery(new Term(Field.P, asNtURI(p))));
}

if (o != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
public interface MessageCatalog {
String PREFIX = "<SOLRDF";

String _00090_SWITCHING_2_HYB_MODE = PREFIX + "-00090> : Switching to Hybrid mode.";
String _00091_NULL_QUERY_OR_EXECUTION = PREFIX + "-00091> : Query or QueryExecution cannot be null.";
String _00092_NEGOTIATED_CONTENT_TYPE = PREFIX + "-00092> : Query type %s, incoming Accept header is %s, applied Content-type is %s";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,6 @@ public void write(
final Query query = (Query)request.getContext().get(Names.QUERY);
final QueryExecution execution = (QueryExecution)response.getValues().get(Names.QUERY_EXECUTION);
try {
if (query == null || execution == null) {
LOGGER.error(MessageCatalog._00091_NULL_QUERY_OR_EXECUTION);
return;
}

final boolean isHybridMode = Boolean.TRUE.equals(request.getContext().get(Names.HYBRID_MODE));
if (isHybridMode) {
response.add(Names.SOLR_REQUEST, request);
Expand All @@ -109,6 +104,10 @@ public void write(
strategy = strategy != null ? strategy : compositeWriters.get("text/xml");
strategy.doWrite(values, writer, contentType);
} else {
if (query == null || execution == null) {
LOGGER.error(MessageCatalog._00091_NULL_QUERY_OR_EXECUTION);
return;
}
writers.get(query.getQueryType()).doWrite(values, writer, getContentType(request));
}
} finally {
Expand Down
Loading

0 comments on commit b6e21bf

Please sign in to comment.