Skip to content

Commit

Permalink
Better lucene index writer config for index population
Browse files Browse the repository at this point in the history
This commit makes IndexWriterConfig configurable when LuceneSchemaIndex is
created. Then during population an improved version of the config is used. It
disables flushing by number of documents and enables flushing by RAM buffer
size. Value of this size is also increased from default 16M to 50M so flushes
of newly added documents happen less frequently.

Each IndexWriterConfig is attached to a single IndexWriter and can't be
reused to create other writers. That is why Supplier<IndexWriterConfig>
is used everywhere until actual writer creation.
  • Loading branch information
lutovich committed Feb 15, 2016
1 parent 702cb71 commit 35f416d
Show file tree
Hide file tree
Showing 12 changed files with 64 additions and 18 deletions.
Expand Up @@ -21,6 +21,7 @@


import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;


import java.io.Closeable; import java.io.Closeable;
Expand All @@ -33,6 +34,7 @@
import java.util.Map; import java.util.Map;
import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Supplier;


import org.neo4j.graphdb.ResourceIterator; import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.helpers.ArrayUtil; import org.neo4j.helpers.ArrayUtil;
Expand All @@ -58,12 +60,14 @@ public abstract class AbstractLuceneIndex implements Closeable
protected final ReentrantLock partitionsLock = new ReentrantLock(); protected final ReentrantLock partitionsLock = new ReentrantLock();


protected final PartitionedIndexStorage indexStorage; protected final PartitionedIndexStorage indexStorage;
private final Supplier<IndexWriterConfig> writerConfigSupplier;
private List<IndexPartition> partitions = new CopyOnWriteArrayList<>(); private List<IndexPartition> partitions = new CopyOnWriteArrayList<>();
private volatile boolean open; private volatile boolean open;


public AbstractLuceneIndex( PartitionedIndexStorage indexStorage ) public AbstractLuceneIndex( PartitionedIndexStorage indexStorage, Supplier<IndexWriterConfig> writerConfigSupplier )
{ {
this.indexStorage = indexStorage; this.indexStorage = indexStorage;
this.writerConfigSupplier = writerConfigSupplier;
} }


/** /**
Expand Down Expand Up @@ -93,7 +97,8 @@ public void open() throws IOException
Map<File,Directory> indexDirectories = indexStorage.openIndexDirectories(); Map<File,Directory> indexDirectories = indexStorage.openIndexDirectories();
for ( Map.Entry<File,Directory> indexDirectory : indexDirectories.entrySet() ) for ( Map.Entry<File,Directory> indexDirectory : indexDirectories.entrySet() )
{ {
partitions.add( new IndexPartition( indexDirectory.getKey(), indexDirectory.getValue() ) ); partitions.add( new IndexPartition( indexDirectory.getKey(), indexDirectory.getValue(),
writerConfigSupplier.get() ) );
} }
open = true; open = true;
} }
Expand Down Expand Up @@ -363,8 +368,8 @@ public IndexPartition addNewPartition() throws IOException
try try
{ {
File partitionFolder = createNewPartitionFolder(); File partitionFolder = createNewPartitionFolder();
IndexPartition indexPartition = new IndexPartition( partitionFolder, Directory directory = indexStorage.openDirectory( partitionFolder );
indexStorage.openDirectory( partitionFolder ) ); IndexPartition indexPartition = new IndexPartition( partitionFolder, directory, writerConfigSupplier.get() );
partitions.add( indexPartition ); partitions.add( indexPartition );
return indexPartition; return indexPartition;
} }
Expand Down
Expand Up @@ -40,12 +40,15 @@ public final class IndexWriterConfigs
private static final double MERGE_POLICY_MIN_MERGE_MB = private static final double MERGE_POLICY_MIN_MERGE_MB =
FeatureToggles.getDouble( IndexWriterConfigs.class, "min.merge", 0.1 ); FeatureToggles.getDouble( IndexWriterConfigs.class, "min.merge", 0.1 );


private static final int POPULATION_RAM_BUFFER_SIZE_MB =
FeatureToggles.getInteger( IndexWriterConfigs.class, "population.ram.buffer.size", 50 );

private IndexWriterConfigs() private IndexWriterConfigs()
{ {
throw new AssertionError( "Not for instantiation!" ); throw new AssertionError( "Not for instantiation!" );
} }


public static IndexWriterConfig standardConfig() public static IndexWriterConfig standard()
{ {
IndexWriterConfig writerConfig = new IndexWriterConfig( LuceneDataSource.KEYWORD_ANALYZER ); IndexWriterConfig writerConfig = new IndexWriterConfig( LuceneDataSource.KEYWORD_ANALYZER );


Expand All @@ -61,4 +64,12 @@ public static IndexWriterConfig standardConfig()


return writerConfig; return writerConfig;
} }

public static IndexWriterConfig population()
{
IndexWriterConfig writerConfig = standard();
writerConfig.setMaxBufferedDocs( IndexWriterConfig.DISABLE_AUTO_FLUSH );
writerConfig.setRAMBufferSizeMB( POPULATION_RAM_BUFFER_SIZE_MB );
return writerConfig;
}
} }
Expand Up @@ -20,6 +20,7 @@
package org.neo4j.kernel.api.impl.index.partition; package org.neo4j.kernel.api.impl.index.partition;


import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.SearcherFactory; import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager; import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
Expand All @@ -30,7 +31,6 @@


import org.neo4j.graphdb.ResourceIterator; import org.neo4j.graphdb.ResourceIterator;
import org.neo4j.io.IOUtils; import org.neo4j.io.IOUtils;
import org.neo4j.kernel.api.impl.index.IndexWriterConfigs;
import org.neo4j.kernel.api.impl.index.backup.LuceneIndexSnapshotFileIterator; import org.neo4j.kernel.api.impl.index.backup.LuceneIndexSnapshotFileIterator;


/** /**
Expand All @@ -45,11 +45,12 @@ public class IndexPartition implements Closeable
private final SearcherManager searcherManager; private final SearcherManager searcherManager;
private final File indexFolder; private final File indexFolder;


public IndexPartition( File partitionFolder, Directory directory ) throws IOException public IndexPartition( File partitionFolder, Directory directory, IndexWriterConfig writerConfig )
throws IOException
{ {
this.indexFolder = partitionFolder; this.indexFolder = partitionFolder;
this.directory = directory; this.directory = directory;
this.indexWriter = new IndexWriter( directory, IndexWriterConfigs.standardConfig() ); this.indexWriter = new IndexWriter( directory, writerConfig );
this.searcherManager = new SearcherManager( indexWriter, true, new SearcherFactory() ); this.searcherManager = new SearcherManager( indexWriter, true, new SearcherFactory() );
} }


Expand Down
Expand Up @@ -24,6 +24,7 @@
import java.util.List; import java.util.List;


import org.neo4j.kernel.api.impl.index.AbstractLuceneIndex; import org.neo4j.kernel.api.impl.index.AbstractLuceneIndex;
import org.neo4j.kernel.api.impl.index.IndexWriterConfigs;
import org.neo4j.kernel.api.impl.index.partition.IndexPartition; import org.neo4j.kernel.api.impl.index.partition.IndexPartition;
import org.neo4j.kernel.api.impl.index.partition.PartitionSearcher; import org.neo4j.kernel.api.impl.index.partition.PartitionSearcher;
import org.neo4j.kernel.api.impl.index.storage.PartitionedIndexStorage; import org.neo4j.kernel.api.impl.index.storage.PartitionedIndexStorage;
Expand Down Expand Up @@ -51,7 +52,7 @@ public class LuceneLabelScanIndex extends AbstractLuceneIndex


public LuceneLabelScanIndex( BitmapDocumentFormat format, PartitionedIndexStorage indexStorage ) public LuceneLabelScanIndex( BitmapDocumentFormat format, PartitionedIndexStorage indexStorage )
{ {
super( indexStorage ); super( indexStorage, IndexWriterConfigs::standard );
this.format = format; this.format = format;
this.storageStrategy = new NodeRangeDocumentLabelScanStorageStrategy( format ); this.storageStrategy = new NodeRangeDocumentLabelScanStorageStrategy( format );
} }
Expand Down
Expand Up @@ -21,12 +21,14 @@


import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;


import java.io.IOException; import java.io.IOException;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;


import org.neo4j.helpers.TaskCoordinator; import org.neo4j.helpers.TaskCoordinator;
import org.neo4j.kernel.api.exceptions.index.IndexEntryConflictException; import org.neo4j.kernel.api.exceptions.index.IndexEntryConflictException;
Expand Down Expand Up @@ -63,9 +65,9 @@ public class LuceneSchemaIndex extends AbstractLuceneIndex
private final TaskCoordinator taskCoordinator = new TaskCoordinator( 10, TimeUnit.MILLISECONDS ); private final TaskCoordinator taskCoordinator = new TaskCoordinator( 10, TimeUnit.MILLISECONDS );


public LuceneSchemaIndex( PartitionedIndexStorage indexStorage, IndexConfiguration config, public LuceneSchemaIndex( PartitionedIndexStorage indexStorage, IndexConfiguration config,
IndexSamplingConfig samplingConfig ) IndexSamplingConfig samplingConfig, Supplier<IndexWriterConfig> writerConfigSupplier )
{ {
super( indexStorage ); super( indexStorage, writerConfigSupplier );
this.config = config; this.config = config;
this.samplingConfig = samplingConfig; this.samplingConfig = samplingConfig;
} }
Expand Down
Expand Up @@ -19,9 +19,14 @@
*/ */
package org.neo4j.kernel.api.impl.schema; package org.neo4j.kernel.api.impl.schema;


import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;

import java.util.Map; import java.util.Map;
import java.util.function.Supplier;


import org.neo4j.graphdb.factory.GraphDatabaseSettings; import org.neo4j.graphdb.factory.GraphDatabaseSettings;
import org.neo4j.kernel.api.impl.index.IndexWriterConfigs;
import org.neo4j.kernel.api.impl.index.builder.AbstractLuceneIndexBuilder; import org.neo4j.kernel.api.impl.index.builder.AbstractLuceneIndexBuilder;
import org.neo4j.kernel.api.index.IndexConfiguration; import org.neo4j.kernel.api.index.IndexConfiguration;
import org.neo4j.kernel.configuration.Config; import org.neo4j.kernel.configuration.Config;
Expand All @@ -41,13 +46,15 @@ public class LuceneSchemaIndexBuilder extends AbstractLuceneIndexBuilder<LuceneS
{ {
private IndexSamplingConfig samplingConfig = new IndexSamplingConfig( new Config() ); private IndexSamplingConfig samplingConfig = new IndexSamplingConfig( new Config() );
private IndexConfiguration indexConfig = IndexConfiguration.NON_UNIQUE; private IndexConfiguration indexConfig = IndexConfiguration.NON_UNIQUE;
private Supplier<IndexWriterConfig> writerConfigSupplier = IndexWriterConfigs::standard;


private LuceneSchemaIndexBuilder() private LuceneSchemaIndexBuilder()
{ {
} }


/** /**
* Create new lucene schema index builder. * Create new lucene schema index builder.
*
* @return new LuceneSchemaIndexBuilder * @return new LuceneSchemaIndexBuilder
*/ */
public static LuceneSchemaIndexBuilder create() public static LuceneSchemaIndexBuilder create()
Expand All @@ -57,6 +64,7 @@ public static LuceneSchemaIndexBuilder create()


/** /**
* Specify lucene schema index sampling config * Specify lucene schema index sampling config
*
* @param samplingConfig sampling config * @param samplingConfig sampling config
* @return index builder * @return index builder
*/ */
Expand All @@ -68,6 +76,7 @@ public LuceneSchemaIndexBuilder withSamplingConfig( IndexSamplingConfig sampling


/** /**
* Specify lucene schema index sampling buffer size * Specify lucene schema index sampling buffer size
*
* @param size sampling buffer size * @param size sampling buffer size
* @return index builder * @return index builder
*/ */
Expand All @@ -81,6 +90,7 @@ public LuceneSchemaIndexBuilder withSamplingBufferSize( int size )


/** /**
* Specify lucene schema index config * Specify lucene schema index config
*
* @param indexConfig index config * @param indexConfig index config
* @return index builder * @return index builder
*/ */
Expand All @@ -90,8 +100,21 @@ public LuceneSchemaIndexBuilder withIndexConfig( IndexConfiguration indexConfig
return this; return this;
} }


/**
* Specify {@link Supplier} of lucene {@link IndexWriterConfig} to create {@link IndexWriter}s.
*
* @param writerConfigSupplier the supplier of writer configs
* @return index builder
*/
public LuceneSchemaIndexBuilder withWriterConfig( Supplier<IndexWriterConfig> writerConfigSupplier )
{
this.writerConfigSupplier = writerConfigSupplier;
return this;
}

/** /**
* Transform builder to build unique index * Transform builder to build unique index
*
* @return index builder * @return index builder
*/ */
public LuceneSchemaIndexBuilder uniqueIndex() public LuceneSchemaIndexBuilder uniqueIndex()
Expand All @@ -102,11 +125,12 @@ public LuceneSchemaIndexBuilder uniqueIndex()


/** /**
* Build lucene schema index with specified configuration * Build lucene schema index with specified configuration
*
* @return lucene schema index * @return lucene schema index
*/ */
public LuceneSchemaIndex build() public LuceneSchemaIndex build()
{ {
return new LuceneSchemaIndex( storageBuilder.build(), indexConfig, samplingConfig ); return new LuceneSchemaIndex( storageBuilder.build(), indexConfig, samplingConfig, writerConfigSupplier );
} }


} }
Expand Up @@ -30,6 +30,7 @@


import org.neo4j.io.fs.FileSystemAbstraction; import org.neo4j.io.fs.FileSystemAbstraction;
import org.neo4j.io.pagecache.PageCache; import org.neo4j.io.pagecache.PageCache;
import org.neo4j.kernel.api.impl.index.IndexWriterConfigs;
import org.neo4j.kernel.api.impl.index.storage.DirectoryFactory; import org.neo4j.kernel.api.impl.index.storage.DirectoryFactory;
import org.neo4j.kernel.api.impl.index.storage.IndexStorageFactory; import org.neo4j.kernel.api.impl.index.storage.IndexStorageFactory;
import org.neo4j.kernel.api.impl.index.storage.PartitionedIndexStorage; import org.neo4j.kernel.api.impl.index.storage.PartitionedIndexStorage;
Expand Down Expand Up @@ -76,6 +77,7 @@ public IndexPopulator getPopulator( long indexId, IndexDescriptor descriptor,
.withIndexConfig( config ) .withIndexConfig( config )
.withSamplingConfig( samplingConfig ) .withSamplingConfig( samplingConfig )
.withIndexStorage( getIndexStorage( indexId ) ) .withIndexStorage( getIndexStorage( indexId ) )
.withWriterConfig( IndexWriterConfigs::population )
.build(); .build();
if ( config.isUnique() ) if ( config.isUnique() )
{ {
Expand Down
Expand Up @@ -181,7 +181,7 @@ private static class TestLuceneIndex extends AbstractLuceneIndex
{ {
TestLuceneIndex( PartitionedIndexStorage indexStorage ) TestLuceneIndex( PartitionedIndexStorage indexStorage )
{ {
super( indexStorage ); super( indexStorage, IndexWriterConfigs::standard );
} }
} }


Expand Down
Expand Up @@ -59,7 +59,7 @@ public void initializeLuceneResources() throws IOException
{ {
indexDir = testDir.directory(); indexDir = testDir.directory();
dir = new RAMDirectory(); dir = new RAMDirectory();
writer = new IndexWriter( dir, IndexWriterConfigs.standardConfig() ); writer = new IndexWriter( dir, IndexWriterConfigs.standard() );
} }


@After @After
Expand Down
Expand Up @@ -186,7 +186,7 @@ private Directory createRandomLuceneDir( File rootFolder ) throws IOException
File folder = createRandomFolder( rootFolder ); File folder = createRandomFolder( rootFolder );
DirectoryFactory directoryFactory = getOrCreateDirFactory( fs ); DirectoryFactory directoryFactory = getOrCreateDirFactory( fs );
Directory directory = directoryFactory.open( folder ); Directory directory = directoryFactory.open( folder );
try ( IndexWriter writer = new IndexWriter( directory, IndexWriterConfigs.standardConfig() ) ) try ( IndexWriter writer = new IndexWriter( directory, IndexWriterConfigs.standard() ) )
{ {
writer.addDocument( randomDocument() ); writer.addDocument( randomDocument() );
writer.commit(); writer.commit();
Expand Down
Expand Up @@ -75,7 +75,7 @@ public void initLuceneResources() throws Exception
{ {
dirFactory = new DirectoryFactory.InMemoryDirectoryFactory(); dirFactory = new DirectoryFactory.InMemoryDirectoryFactory();
Directory dir = dirFactory.open( testDir.directory( "test" ) ); Directory dir = dirFactory.open( testDir.directory( "test" ) );
writer = new IndexWriter( dir, IndexWriterConfigs.standardConfig() ); writer = new IndexWriter( dir, IndexWriterConfigs.standard() );
searcherManager = new SearcherManager( writer, true, new SearcherFactory() ); searcherManager = new SearcherManager( writer, true, new SearcherFactory() );
} }


Expand Down
Expand Up @@ -51,10 +51,10 @@
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.locks.Lock;
import java.util.stream.Stream; import java.util.stream.Stream;


import org.neo4j.kernel.api.impl.index.IndexReaderStub; import org.neo4j.kernel.api.impl.index.IndexReaderStub;
import org.neo4j.kernel.api.impl.index.IndexWriterConfigs;
import org.neo4j.kernel.api.impl.index.partition.IndexPartition; import org.neo4j.kernel.api.impl.index.partition.IndexPartition;
import org.neo4j.kernel.api.impl.index.partition.PartitionSearcher; import org.neo4j.kernel.api.impl.index.partition.PartitionSearcher;
import org.neo4j.kernel.api.impl.index.storage.DirectoryFactory; import org.neo4j.kernel.api.impl.index.storage.DirectoryFactory;
Expand Down Expand Up @@ -344,7 +344,7 @@ private static class StubIndexPartition extends IndexPartition


StubIndexPartition( File folder, Directory directory ) throws IOException StubIndexPartition( File folder, Directory directory ) throws IOException
{ {
super( folder, directory ); super( folder, directory, IndexWriterConfigs.standard() );
this.directory = directory; this.directory = directory;
} }


Expand Down

0 comments on commit 35f416d

Please sign in to comment.