Permalink
Browse files

git-svn-id: file:///opt/svn/repositories/sonatype.org/nexus/tags/nexu…

…s-indexer-3.0.3@7005 2aa8b3fc-8ebb-4439-a84f-95066eaea8ab
  • Loading branch information...
2 parents deda64b + 9ee4cd2 commit 64daa2856341151442699f36bf3b1bda5b8742ac dbradicich committed Aug 13, 2010
@@ -0,0 +1,22 @@
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.LockObtainFailedException;
+
+public class ExtendedIndexWriter
+ extends IndexWriter
+{
+ public ExtendedIndexWriter( Directory d, boolean autoCommit, Analyzer a, boolean create )
+ throws CorruptIndexException, LockObtainFailedException, IOException
+ {
+ super( d, autoCommit, a, create );
+ }
+
+ public boolean hasUncommitedChanges()
+ {
+ return pendingCommit != null;
+ }
+}
@@ -144,6 +144,8 @@
@Deprecated
private transient ArtifactVersion artifactVersion;
+ private transient float luceneScore;
+
public String classifier;
/**
@@ -218,6 +220,16 @@ public ArtifactVersion getArtifactVersion()
return artifactVersion;
}
+ public float getLuceneScore()
+ {
+ return luceneScore;
+ }
+
+ public void setLuceneScore( float score )
+ {
+ this.luceneScore = score;
+ }
+
public String getUinfo()
{
if ( uinfo == null )
@@ -9,8 +9,9 @@
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
+import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Hits;
-import org.apache.lucene.search.MultiSearcher;
+import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
@@ -19,7 +20,6 @@
import org.apache.lucene.search.highlight.TextFragment;
import org.sonatype.nexus.index.context.IndexUtils;
import org.sonatype.nexus.index.context.IndexingContext;
-import org.sonatype.nexus.index.context.NexusIndexSearcher;
import org.sonatype.nexus.index.creator.JarFileContentsIndexCreator;
/**
@@ -39,14 +39,20 @@
// TODO: inspect is this limit actually needed or not.
private static final int HARD_HIT_COUNT_LIMIT = Integer.MAX_VALUE;
+ private final IteratorSearchRequest searchRequest;
+
+ private final IndexSearcher indexSearcher;
+
+ private final List<IndexingContext> contexts;
+
+ private final int[] starts;
+
private final ArtifactInfoFilter filter;
private final ArtifactInfoPostprocessor postprocessor;
private final List<MatchHighlightRequest> matchHighlightRequests;
- private final MultiSearcher searcher;
-
private final Hits hits;
private final int from;
@@ -57,19 +63,37 @@
private int pointer;
+ private int processedArtifactInfoCount;
+
private ArtifactInfo ai;
- protected DefaultIteratorResultSet( AbstractSearchRequest request, MultiSearcher searcher, final Hits hits )
+ protected DefaultIteratorResultSet( final IteratorSearchRequest request, final IndexSearcher indexSearcher,
+ final List<IndexingContext> contexts, final Hits hits )
throws IOException
{
+ this.searchRequest = request;
+
+ this.indexSearcher = indexSearcher;
+
+ this.contexts = contexts;
+
+ {
+ int maxDoc = 0;
+ this.starts = new int[contexts.size() + 1]; // build starts array
+ for ( int i = 0; i < contexts.size(); i++ )
+ {
+ starts[i] = maxDoc;
+ maxDoc += contexts.get( i ).getIndexReader().maxDoc(); // compute maxDocs
+ }
+ starts[contexts.size()] = maxDoc;
+ }
+
this.filter = request.getArtifactInfoFilter();
this.postprocessor = request.getArtifactInfoPostprocessor();
this.matchHighlightRequests = request.getMatchHighlightRequests();
- this.searcher = searcher;
-
this.hits = hits;
this.from = ( request.getStart() == AbstractSearchRequest.UNDEFINED ? 0 : request.getStart() );
@@ -80,6 +104,8 @@ protected DefaultIteratorResultSet( AbstractSearchRequest request, MultiSearcher
this.pointer = from;
+ this.processedArtifactInfoCount = 0;
+
this.maxRecPointer = from + count;
ai = createNextAi();
@@ -108,6 +134,23 @@ public ArtifactInfo next()
return result;
}
+ public void remove()
+ {
+ throw new UnsupportedOperationException( "Method not supported on " + getClass().getName() );
+ }
+
+ public Iterator<ArtifactInfo> iterator()
+ {
+ return this;
+ }
+
+ public int getTotalProcessedArtifactInfoCount()
+ {
+ return processedArtifactInfoCount;
+ }
+
+ // ==
+
protected ArtifactInfo createNextAi()
throws IOException
{
@@ -122,12 +165,21 @@ protected ArtifactInfo createNextAi()
{
Document doc = hits.doc( pointer );
- IndexingContext context = getIndexingContextForPointer( hits.id( pointer ) );
+ IndexingContext context = getIndexingContextForPointer( doc, hits.id( pointer ) );
result = IndexUtils.constructArtifactInfo( doc, context );
if ( result != null )
{
+ // uncomment this to have explainations too
+ // WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION
+ // For debugging only
+ //
+ // result.getAttributes().put( Explanation.class.getName(),
+ // indexSearcher.explain( searchRequest.getQuery(), hits.id( pointer ) ).toString() );
+
+ result.setLuceneScore( hits.score( pointer ) );
+
result.repository = context.getRepositoryId();
result.context = context.getId();
@@ -152,6 +204,7 @@ protected ArtifactInfo createNextAi()
}
pointer++;
+ processedArtifactInfoCount++;
}
return result;
@@ -283,18 +336,33 @@ protected IndexerField selectStoredIndexerField( Field field )
return fragTexts;
}
- protected IndexingContext getIndexingContextForPointer( int docPtr )
+ protected IndexingContext getIndexingContextForPointer( Document doc, int docPtr )
{
- return ( (NexusIndexSearcher) searcher.getSearchables()[searcher.subSearcher( docPtr )] ).getIndexingContext();
+ return contexts.get( readerIndex( docPtr, this.starts, this.contexts.size() ) );
}
- public void remove()
- {
- throw new UnsupportedOperationException( "Method not supported on " + getClass().getName() );
- }
+ private static int readerIndex( int n, int[] starts, int numSubReaders )
+ { // find reader for doc n:
+ int lo = 0; // search starts array
+ int hi = numSubReaders - 1; // for first element less
- public Iterator<ArtifactInfo> iterator()
+ while ( hi >= lo )
{
- return this;
+ int mid = ( lo + hi ) >>> 1;
+ int midValue = starts[mid];
+ if ( n < midValue )
+ hi = mid - 1;
+ else if ( n > midValue )
+ lo = mid + 1;
+ else
+ { // found a match
+ while ( mid + 1 < numSubReaders && starts[mid + 1] == midValue )
+ {
+ mid++; // scan to last match
+ }
+ return mid;
+}
+ }
+ return hi;
}
}
@@ -9,6 +9,7 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.QueryParser.Operator;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
@@ -223,7 +224,11 @@ else if ( SearchType.SCORED.equals( type ) )
Term t = new Term( indexerField.getKey(), query );
bq.add( new TermQuery( t ), Occur.SHOULD );
- bq.add( new PrefixQuery( t ), Occur.SHOULD );
+
+ PrefixQuery pq = new PrefixQuery( t );
+ pq.setBoost( 0.8f );
+
+ bq.add( pq , Occur.SHOULD );
return bq;
}
@@ -235,8 +240,7 @@ else if ( SearchType.SCORED.equals( type ) )
// tokenization should happen against the field!
QueryParser qp = new QueryParser( indexerField.getKey(), new NexusAnalyzer() );
-
- Query q1 = null;
+ qp.setDefaultOperator( Operator.AND );
// small cheap trick
// if a query is not "expert" (does not contain field:val kind of expression)
@@ -259,16 +263,29 @@ else if ( SearchType.SCORED.equals( type ) )
try
{
- q1 = qp.parse( qpQuery );
+ // qpQuery = "\"" + qpQuery + "\"";
+
+ BooleanQuery q1 = new BooleanQuery();
+
+ q1.add( qp.parse( qpQuery ), Occur.SHOULD );
+
+ if ( qpQuery.contains( " " ) )
+ {
+ q1.add( qp.parse( "\"" + qpQuery + "\"" ), Occur.SHOULD );
+ }
Query q2 = null;
+ // try with KW only if the processed query in qpQuery does not have spaces!
+ if ( !query.contains( " " ) )
+ {
IndexerField keywordField = selectIndexerField( indexerField.getOntology(), SearchType.EXACT );
if ( keywordField.isKeyword() )
{
q2 = constructQuery( indexerField.getOntology(), keywordField, query, type );
}
+ }
if ( q2 == null )
{
@@ -16,11 +16,11 @@
import java.util.TreeSet;
import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiReader;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.ParallelMultiSearcher;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Searchable;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.codehaus.plexus.component.annotations.Component;
@@ -292,26 +292,37 @@ private IteratorSearchResponse searchIteratorPaged( IteratorSearchRequest reques
request.setCount( IteratorSearchRequest.UNDEFINED );
}
- ArrayList<IndexSearcher> contextsToSearch = new ArrayList<IndexSearcher>( indexingContexts.size() );
+ // to not change the API all away, but we need stable ordering here
+ // filter for those 1st, that take part in here
+ ArrayList<IndexingContext> contexts = new ArrayList<IndexingContext>( indexingContexts.size() );
for ( IndexingContext ctx : indexingContexts )
{
if ( ignoreContext || ctx.isSearchable() )
{
- contextsToSearch.add( ctx.getReadOnlyIndexSearcher() );
+ contexts.add( ctx );
}
}
- ParallelMultiSearcher multiSearcher =
- new ParallelMultiSearcher( contextsToSearch.toArray( new Searchable[contextsToSearch.size()] ) );
+ ArrayList<IndexReader> contextsToSearch = new ArrayList<IndexReader>( contexts.size() );
+
+ for ( IndexingContext ctx : contexts )
+ {
+ contextsToSearch.add( ctx.getIndexReader() );
+ }
+
+ MultiReader multiReader =
+ new MultiReader( contextsToSearch.toArray( new IndexReader[contextsToSearch.size()] ) );
+
+ IndexSearcher indexSearcher = new IndexSearcher( multiReader );
// NEXUS-3482 made us to NOT use reverse ordering (it is a fix I wanted to implement, but user contributed patch
// did come in faster! -- Thanks)
Hits hits =
- multiSearcher.search( request.getQuery(), new Sort( new SortField[] { SortField.FIELD_SCORE,
+ indexSearcher.search( request.getQuery(), new Sort( new SortField[] { SortField.FIELD_SCORE,
new SortField( null, SortField.DOC, false ) } ) );
return new IteratorSearchResponse( request.getQuery(), hits.length(), new DefaultIteratorResultSet( request,
- multiSearcher, hits ) );
+ indexSearcher, contexts, hits ) );
}
}
@@ -14,5 +14,12 @@
public interface IteratorResultSet
extends Iterator<ArtifactInfo>, Iterable<ArtifactInfo>
{
-
+ /**
+ * Returns the up-to-date number of the actual number of loaded Lucene Documents that were converted into
+ * ArtifactInfo object until last next() invocation. Warning: this method will return ALL touched/loaded document
+ * count, even those that are filtered out and NOT returned by iterator's next() method!
+ *
+ * @return total number of processed ArtifactInfos so far
+ */
+ int getTotalProcessedArtifactInfoCount();
}
Oops, something went wrong.

0 comments on commit 64daa28

Please sign in to comment.