Skip to content

Commit

Permalink
Exposes --high-io option in neo4j-admin import
Browse files Browse the repository at this point in the history
Because it's critical performance-wise for specific use cases
  • Loading branch information
tinwelint committed Mar 9, 2018
1 parent bce708a commit 27b66c8
Show file tree
Hide file tree
Showing 8 changed files with 24 additions and 15 deletions.
Expand Up @@ -70,6 +70,7 @@ class CsvImporter implements Importer
private final boolean ignoreBadRelationships;
private final boolean ignoreDuplicateNodes;
private final boolean ignoreExtraColumns;
private final Boolean highIO;

CsvImporter( Args args, Config databaseConfig, OutsideWorld outsideWorld ) throws IncorrectUsage
{
Expand All @@ -94,6 +95,7 @@ class CsvImporter implements Importer
idType = args.interpretOption( "id-type", withDefault( IdType.STRING ),
from -> IdType.valueOf( from.toUpperCase() ) );
inputEncoding = Charset.forName( args.get( "input-encoding", defaultCharset().name() ) );
highIO = args.getBoolean( "high-io", null, true ); // intentionally left as null if not specified
this.databaseConfig = databaseConfig;
}

Expand All @@ -110,7 +112,7 @@ public void doImport() throws IOException
collect( ignoreBadRelationships, ignoreDuplicateNodes, ignoreExtraColumns ) );

Configuration configuration = new WrappedBatchImporterConfigurationForNeo4jAdmin( importConfiguration(
null, false, databaseConfig, storeDir ) );
null, false, databaseConfig, storeDir, highIO ) );

// Extract the default time zone from the database configuration
LogTimeZone dbTimeZone = databaseConfig.get( GraphDatabaseSettings.db_timezone );
Expand Down
Expand Up @@ -156,7 +156,11 @@ private static void includeCsvArguments( Arguments arguments )
"File containing all arguments, used as an alternative to supplying all arguments on the command line directly."
+ "Each argument can be on a separate line or multiple arguments per line separated by space."
+ "Arguments containing spaces needs to be quoted."
+ "Supplying other arguments in addition to this file argument is not supported." ) );
+ "Supplying other arguments in addition to this file argument is not supported." ) )
.withArgument( new OptionalNamedArg( "high-io",
"true/false",
null,
"Ignore environment-based heuristics, and assume that the target storage subsystem can support parallel IO with high throughput." ) );
}

static
Expand Down
Expand Up @@ -215,6 +215,7 @@ public void shouldPrintNiceHelp() throws Throwable
" [--quote=<quotation-character>]%n" +
" [--max-memory=<max-memory-that-importer-can-use>]%n" +
" [--f=<File containing all arguments to this import>]%n" +
" [--high-io=<true/false>]%n" +
"usage: neo4j-admin import --mode=database [--database=<name>]%n" +
" [--additional-config=<config-file-path>]%n" +
" [--from=<source-directory>]%n" +
Expand Down Expand Up @@ -294,7 +295,10 @@ public void shouldPrintNiceHelp() throws Throwable
" arguments on the command line directly.Each argument can be on a separate%n" +
" line or multiple arguments per line separated by space.Arguments%n" +
" containing spaces needs to be quoted.Supplying other arguments in addition%n" +
" to this file argument is not supported. [default:]%n" ),
" to this file argument is not supported. [default:]%n" +
" --high-io=<true/false>%n" +
" Ignore environment-based heuristics, and assume that the target storage%n" +
" subsystem can support parallel IO with high throughput. [default:null]%n" ),
baos.toString() );
}
}
Expand Down
Expand Up @@ -716,11 +716,11 @@ public static void validateInputFiles( Collection<Option<File[]>> nodesFiles,
}

public static org.neo4j.unsafe.impl.batchimport.Configuration importConfiguration(
Number processors, boolean defaultSettingsSuitableForTests, Config dbConfig, File storeDir )
Number processors, boolean defaultSettingsSuitableForTests, Config dbConfig, File storeDir, Boolean defaultHighIO )
{
return importConfiguration(
processors, defaultSettingsSuitableForTests, dbConfig, null, storeDir,
DEFAULT.allowCacheAllocationOnHeap(), (Boolean)Options.HIGH_IO.defaultValue() );
DEFAULT.allowCacheAllocationOnHeap(), defaultHighIO );
}

public static org.neo4j.unsafe.impl.batchimport.Configuration importConfiguration(
Expand Down Expand Up @@ -754,7 +754,7 @@ public long maxMemoryUsage()
}

@Override
public boolean parallelRecordReadsWhenWriting()
public boolean highIO()
{
return defaultHighIO != null ? defaultHighIO : FileUtils.highIODevice( storeDir.toPath(), false );
}
Expand Down
Expand Up @@ -126,7 +126,7 @@ public int denseNodeThreshold()
}

@Override
public boolean parallelRecordReadsWhenWriting()
public boolean highIO()
{
return highIo;
}
Expand Down
Expand Up @@ -362,9 +362,9 @@ private void migrateWithBatchImporter( File storeDir, File migrationDir, long la
Configuration importConfig = new Configuration.Overridden( config )
{
@Override
public boolean parallelRecordReadsWhenWriting()
public boolean highIO()
{
return FileUtils.highIODevice( storeDir.toPath(), super.parallelRecordReadsWhenWriting() );
return FileUtils.highIODevice( storeDir.toPath(), super.highIO() );
}
};
AdditionalInitialIds additionalInitialIds =
Expand Down
Expand Up @@ -20,15 +20,14 @@
package org.neo4j.unsafe.impl.batchimport;

import org.neo4j.io.ByteUnit;
import org.neo4j.io.os.OsBeanUtil;
import org.neo4j.kernel.configuration.Config;
import org.neo4j.kernel.impl.pagecache.ConfiguringPageCacheFactory;
import org.neo4j.io.os.OsBeanUtil;
import org.neo4j.unsafe.impl.batchimport.staging.Stage;
import org.neo4j.unsafe.impl.batchimport.staging.Step;

import static java.lang.Math.min;
import static java.lang.Math.round;

import static org.neo4j.graphdb.factory.GraphDatabaseSettings.dense_node_threshold;
import static org.neo4j.graphdb.factory.GraphDatabaseSettings.pagecache_memory;
import static org.neo4j.io.ByteUnit.gibiBytes;
Expand Down Expand Up @@ -157,7 +156,7 @@ default boolean parallelRecordReads()
* Enabling will probably increase concurrent I/O to a point which reduces performance if underlying storage
* isn't great at concurrent I/O, especially if also {@link #parallelRecordWrites()} is enabled.
*/
default boolean parallelRecordReadsWhenWriting()
default boolean highIO()
{
// Defaults to false since some environments sees less performance with this enabled
return false;
Expand Down Expand Up @@ -253,9 +252,9 @@ public boolean parallelRecordReads()
}

@Override
public boolean parallelRecordReadsWhenWriting()
public boolean highIO()
{
return defaults.parallelRecordReadsWhenWriting();
return defaults.highIO();
}

@Override
Expand Down
Expand Up @@ -56,7 +56,7 @@ public ReadRecordsStep( StageControl control, Configuration config, boolean inRe

private static boolean parallelReading( Configuration config, boolean inRecordWritingStage )
{
return (inRecordWritingStage && config.parallelRecordReadsWhenWriting())
return (inRecordWritingStage && config.highIO())
|| (!inRecordWritingStage && config.parallelRecordReads());
}

Expand Down

0 comments on commit 27b66c8

Please sign in to comment.