Skip to content

Commit

Permalink
Prefix and seek queries regression
Browse files Browse the repository at this point in the history
Switch to latest available lucene 5.5 version.
Introduce custom PrefixMultiTermsQuery and use it instead of default PrefixQuery to avoid expensive internal automaton creation.
Switch posting format to BlockTreeOrdsPostingsFormat by default.

Both of optimisations are pluggable and can be switched off by corresponding feature toggle.
Please note: switching posting format will require index regeneration.
  • Loading branch information
MishaDemianenko committed Feb 26, 2016
1 parent 1d7025e commit 306c259
Show file tree
Hide file tree
Showing 52 changed files with 150 additions and 32 deletions.
1 change: 1 addition & 0 deletions community/bolt/LICENSES.txt
Expand Up @@ -5,6 +5,7 @@ libraries. For an overview of the licenses see the NOTICE.txt file.
Apache Software License, Version 2.0
Apache Commons Lang
ConcurrentLinkedHashMap
Lucene codecs
Lucene Common Analyzers
Lucene Core
Lucene Memory
Expand Down
1 change: 1 addition & 0 deletions community/bolt/NOTICE.txt
Expand Up @@ -28,6 +28,7 @@ Third-party licenses
Apache Software License, Version 2.0
Apache Commons Lang
ConcurrentLinkedHashMap
Lucene codecs
Lucene Common Analyzers
Lucene Core
Lucene Memory
Expand Down
1 change: 1 addition & 0 deletions community/consistency-check-legacy/LICENSES.txt
Expand Up @@ -4,6 +4,7 @@ libraries. For an overview of the licenses see the NOTICE.txt file.
------------------------------------------------------------------------------
Apache Software License, Version 2.0
Apache Commons Lang
Lucene codecs
Lucene Common Analyzers
Lucene Core
Lucene Memory
Expand Down
1 change: 1 addition & 0 deletions community/consistency-check-legacy/NOTICE.txt
Expand Up @@ -27,6 +27,7 @@ Third-party licenses

Apache Software License, Version 2.0
Apache Commons Lang
Lucene codecs
Lucene Common Analyzers
Lucene Core
Lucene Memory
Expand Down
1 change: 1 addition & 0 deletions community/consistency-check/LICENSES.txt
Expand Up @@ -4,6 +4,7 @@ libraries. For an overview of the licenses see the NOTICE.txt file.
------------------------------------------------------------------------------
Apache Software License, Version 2.0
Apache Commons Lang
Lucene codecs
Lucene Common Analyzers
Lucene Core
Lucene Memory
Expand Down
1 change: 1 addition & 0 deletions community/consistency-check/NOTICE.txt
Expand Up @@ -27,6 +27,7 @@ Third-party licenses

Apache Software License, Version 2.0
Apache Commons Lang
Lucene codecs
Lucene Common Analyzers
Lucene Core
Lucene Memory
Expand Down
1 change: 1 addition & 0 deletions community/cypher/cypher/LICENSES.txt
Expand Up @@ -4,6 +4,7 @@ libraries. For an overview of the licenses see the NOTICE.txt file.
------------------------------------------------------------------------------
Apache Software License, Version 2.0
ConcurrentLinkedHashMap
Lucene codecs
Lucene Common Analyzers
Lucene Core
Lucene Memory
Expand Down
1 change: 1 addition & 0 deletions community/cypher/cypher/NOTICE.txt
Expand Up @@ -27,6 +27,7 @@ Third-party licenses

Apache Software License, Version 2.0
ConcurrentLinkedHashMap
Lucene codecs
Lucene Common Analyzers
Lucene Core
Lucene Memory
Expand Down
1 change: 1 addition & 0 deletions community/import-tool/LICENSES.txt
Expand Up @@ -4,6 +4,7 @@ libraries. For an overview of the licenses see the NOTICE.txt file.
------------------------------------------------------------------------------
Apache Software License, Version 2.0
Apache Commons Lang
Lucene codecs
Lucene Common Analyzers
Lucene Core
Lucene Memory
Expand Down
1 change: 1 addition & 0 deletions community/import-tool/NOTICE.txt
Expand Up @@ -27,6 +27,7 @@ Third-party licenses

Apache Software License, Version 2.0
Apache Commons Lang
Lucene codecs
Lucene Common Analyzers
Lucene Core
Lucene Memory
Expand Down
2 changes: 1 addition & 1 deletion community/lucene-index-upgrade/pom.xml
Expand Up @@ -13,7 +13,7 @@
<license-text.header>GPL-3-header.txt</license-text.header>
<licensing.prepend.text>notice-gpl-prefix.txt</licensing.prepend.text>
<lucene4.version>4.10.4</lucene4.version>
<lucene5.version>5.4.0</lucene5.version>
<lucene5.version>5.5.0</lucene5.version>
</properties>

<modelVersion>4.0.0</modelVersion>
Expand Down
Expand Up @@ -66,8 +66,8 @@ default void migrated( String name ) {}
private static final String LIBRARY_DIRECTORY = "lib";
private static final String RESOURCE_SEPARATOR = "/";
private static final String LUCENE4_CORE_JAR_NAME = "lucene-core-4.10.4.jar";
private static final String LUCENE5_CORE_JAR_NAME = "lucene-core-5.4.0.jar";
private static final String LUCENE5_BACKWARD_CODECS_NAME = "lucene-backward-codecs-5.4.0.jar";
private static final String LUCENE5_CORE_JAR_NAME = "lucene-core-5.5.0.jar";
private static final String LUCENE5_BACKWARD_CODECS_NAME = "lucene-backward-codecs-5.5.0.jar";
private static final String SEGMENTS_FILE_NAME_PREFIX = "segments";

private final Path indexRootPath;
Expand Down
1 change: 1 addition & 0 deletions community/lucene-index/LICENSES.txt
Expand Up @@ -4,6 +4,7 @@ libraries. For an overview of the licenses see the NOTICE.txt file.
------------------------------------------------------------------------------
Apache Software License, Version 2.0
Apache Commons Lang
Lucene codecs
Lucene Common Analyzers
Lucene Core
Lucene Memory
Expand Down
1 change: 1 addition & 0 deletions community/lucene-index/NOTICE.txt
Expand Up @@ -27,6 +27,7 @@ Third-party licenses

Apache Software License, Version 2.0
Apache Commons Lang
Lucene codecs
Lucene Common Analyzers
Lucene Core
Lucene Memory
Expand Down
5 changes: 5 additions & 0 deletions community/lucene-index/pom.xml
Expand Up @@ -91,6 +91,11 @@ the relevant Commercial Agreement.
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-codecs</artifactId>
</dependency>


<dependency>
<groupId>junit</groupId>
Expand Down
Expand Up @@ -377,7 +377,7 @@ private IndexSearcher searcher( boolean allowRefreshSearcher )
try
{
IndexReader newReader = this.reader == null ?
DirectoryReader.open( this.writer, true ) :
DirectoryReader.open( this.writer ) :
DirectoryReader.openIfChanged( (DirectoryReader) this.reader );
if ( newReader == this.reader )
{
Expand Down
Expand Up @@ -226,7 +226,7 @@ private IndexReference refreshSearcher( IndexReference searcher )
// TODO: this cast should always succeed, maybe check nonetheless?
DirectoryReader reader = (DirectoryReader) searcher.getSearcher().getIndexReader();
IndexWriter writer = searcher.getWriter();
IndexReader reopened = DirectoryReader.openIfChanged( reader, writer, true );
IndexReader reopened = DirectoryReader.openIfChanged( reader, writer );
if ( reopened != null )
{
IndexSearcher newSearcher = newIndexSearcher( searcher.getIdentifier(), reopened );
Expand Down Expand Up @@ -305,7 +305,7 @@ synchronized IndexReference syncGetIndexSearcher( IndexIdentifier identifier )
if ( searcher == null )
{
IndexWriter writer = newIndexWriter( identifier );
IndexReader reader = DirectoryReader.open( writer, true );
IndexReader reader = DirectoryReader.open( writer );
IndexSearcher indexSearcher = newIndexSearcher( identifier, reader );
searcher = new IndexReference( identifier, indexSearcher, writer );
indexSearchers.put( identifier, searcher );
Expand Down
Expand Up @@ -19,6 +19,9 @@
*/
package org.neo4j.kernel.api.impl.index;

import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
import org.apache.lucene.codecs.lucene54.Lucene54Codec;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogByteSizeMergePolicy;

Expand All @@ -39,6 +42,8 @@ public final class IndexWriterConfigs
FeatureToggles.getDouble( IndexWriterConfigs.class, "nocfs.ratio", 1.0 );
private static final double MERGE_POLICY_MIN_MERGE_MB =
FeatureToggles.getDouble( IndexWriterConfigs.class, "min.merge", 0.1 );
private static final boolean CODEC_BLOCK_TREE_ORDS_POSTING_FORMAT =
FeatureToggles.flag( IndexWriterConfigs.class, "block.tree.ords.posting.format", true );

private static final int POPULATION_RAM_BUFFER_SIZE_MB =
FeatureToggles.getInteger( IndexWriterConfigs.class, "population.ram.buffer.size", 50 );
Expand All @@ -55,6 +60,16 @@ public static IndexWriterConfig standard()
writerConfig.setMaxBufferedDocs( MAX_BUFFERED_DOCS );
writerConfig.setIndexDeletionPolicy( new MultipleBackupDeletionPolicy() );
writerConfig.setUseCompoundFile( true );
writerConfig.setCodec(new Lucene54Codec()
{
@Override
public PostingsFormat getPostingsFormatForField( String field )
{
PostingsFormat postingFormat = super.getPostingsFormatForField( field );
return CODEC_BLOCK_TREE_ORDS_POSTING_FORMAT ? new BlockTreeOrdsPostingsFormat() :
postingFormat;
}
});

LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy();
mergePolicy.setNoCFSRatio( MERGE_POLICY_NO_CFS_RATIO );
Expand Down
Expand Up @@ -560,28 +560,17 @@ public int freq() throws IOException
}

@Override
public int docID()
{
throw new UnsupportedOperationException();
}

@Override
public int nextDoc() throws IOException
public DocIdSetIterator iterator()
{
throw new UnsupportedOperationException();
}

@Override
public int advance( int target ) throws IOException
public int docID()
{
throw new UnsupportedOperationException();
}

@Override
public long cost()
{
return scores.length;
}
}

private static final class DocsInIndexOrderIterator extends AbstractIndexHits<Document>
Expand Down
Expand Up @@ -51,7 +51,7 @@ public IndexPartition( File partitionFolder, Directory directory, IndexWriterCon
this.indexFolder = partitionFolder;
this.directory = directory;
this.indexWriter = new IndexWriter( directory, writerConfig );
this.searcherManager = new SearcherManager( indexWriter, true, new SearcherFactory() );
this.searcherManager = new SearcherManager( indexWriter, new SearcherFactory() );
}

public IndexWriter getIndexWriter()
Expand Down
Expand Up @@ -24,6 +24,7 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
Expand All @@ -32,24 +33,34 @@
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.StringHelper;

import java.io.IOException;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.Map;

import org.neo4j.unsafe.impl.internal.dragons.FeatureToggles;

import static org.apache.lucene.document.Field.Store.YES;

public class LuceneDocumentStructure
{
private static final boolean USE_LUCENE_STANDARD_PREFIX_QUERY =
FeatureToggles.flag( LuceneDocumentStructure.class, "lucene.standard.prefix.query", false );

public static final String NODE_ID_KEY = "id";

// Absolute hard maximum length for a term, in bytes once
Expand Down Expand Up @@ -139,7 +150,7 @@ public static Query newRangeSeekByStringQuery( String lower, boolean includeLowe
builder.add( termRangeQuery, BooleanClause.Occur.SHOULD );
return builder.build();
}
return termRangeQuery;
return new ConstantScoreQuery( termRangeQuery );
}

public static Query newWildCardStringQuery( String searchFor )
Expand All @@ -150,9 +161,12 @@ public static Query newWildCardStringQuery( String searchFor )
return new WildcardQuery( term );
}

public static PrefixQuery newRangeSeekByPrefixQuery( String prefix )
public static Query newRangeSeekByPrefixQuery( String prefix )
{
return new PrefixQuery( new Term( ValueEncoding.String.key(), prefix ) );
Term term = new Term( ValueEncoding.String.key(), prefix );
MultiTermQuery prefixQuery = USE_LUCENE_STANDARD_PREFIX_QUERY ? new PrefixQuery( term ) :
new PrefixMultiTermsQuery( term );
return new ConstantScoreQuery( prefixQuery );
}

public static Term newTermForChangeOrRemove( long nodeId )
Expand Down Expand Up @@ -188,6 +202,54 @@ public static TermsEnum originalTerms( Terms terms, String fieldKey ) throws IOE
: termsEnum;
}

/**
* Simple implementation of prefix query that mimics old lucene way of handling prefix queries.
* According to benchmarks this implementation is faster then
* {@link org.apache.lucene.search.PhraseQuery} because we do not construct automaton which is
* extremely expensive.
*/
private static class PrefixMultiTermsQuery extends MultiTermQuery
{
private Term term;

PrefixMultiTermsQuery( Term term )
{
super(term.field());
this.term = term;
}

@Override
protected TermsEnum getTermsEnum( Terms terms, AttributeSource atts ) throws IOException
{
return term.bytes().length == 0 ? terms.iterator() : new PrefixTermsEnum( terms.iterator(), term.bytes() );
}


@Override
public String toString( String field )
{
return getClass().getSimpleName() + ", term:" + term + ", field:" + field;
}

private class PrefixTermsEnum extends FilteredTermsEnum
{
private BytesRef prefix;

PrefixTermsEnum( TermsEnum termEnum, BytesRef prefix )
{
super( termEnum );
this.prefix = prefix;
setInitialSeekTerm( this.prefix );
}

@Override
protected AcceptStatus accept( BytesRef term ) throws IOException
{
return StringHelper.startsWith( term, prefix ) ? AcceptStatus.YES : AcceptStatus.END;
}
}
}

private static class DocWithId
{
private final Document document;
Expand Down
Expand Up @@ -42,8 +42,6 @@
import java.util.Map;
import java.util.function.Function;

import static com.sun.corba.se.spi.activation.IIOP_CLEAR_TEXT.value;

public class IndexReaderStub extends LeafReader
{

Expand Down

0 comments on commit 306c259

Please sign in to comment.