Skip to content

Commit

Permalink
implemented missing defaults as wrappers to neo4j-admin import
Browse files Browse the repository at this point in the history
  • Loading branch information
praveenag committed May 17, 2017
1 parent 4da5424 commit f039131
Show file tree
Hide file tree
Showing 6 changed files with 186 additions and 15 deletions.
Expand Up @@ -69,8 +69,8 @@ default boolean legacyStyleQuoting()
{ {
return DEFAULT_LEGACY_STYLE_QUOTING; return DEFAULT_LEGACY_STYLE_QUOTING;
} }

int KB = 1024, MB = KB * KB; int KB = 1024, MB = KB * KB;
int DEFAULT_BUFFER_SIZE_4MB = 4 * MB;


class Default implements Configuration class Default implements Configuration
{ {
Expand All @@ -83,7 +83,7 @@ public char quotationCharacter()
@Override @Override
public int bufferSize() public int bufferSize()
{ {
return 4 * MB; return DEFAULT_BUFFER_SIZE_4MB;
} }


@Override @Override
Expand Down
Expand Up @@ -28,6 +28,8 @@


import org.neo4j.commandline.admin.IncorrectUsage; import org.neo4j.commandline.admin.IncorrectUsage;
import org.neo4j.commandline.admin.OutsideWorld; import org.neo4j.commandline.admin.OutsideWorld;
import org.neo4j.commandline.dbms.config.WrappedBatchImporterConfigurationForNeo4jAdmin;
import org.neo4j.commandline.dbms.config.WrappedCsvInputConfigurationForNeo4jAdmin;
import org.neo4j.dbms.DatabaseManagementSystemSettings; import org.neo4j.dbms.DatabaseManagementSystemSettings;
import org.neo4j.graphdb.factory.GraphDatabaseSettings; import org.neo4j.graphdb.factory.GraphDatabaseSettings;
import org.neo4j.helpers.Args; import org.neo4j.helpers.Args;
Expand All @@ -41,6 +43,7 @@
import org.neo4j.unsafe.impl.batchimport.input.csv.IdType; import org.neo4j.unsafe.impl.batchimport.input.csv.IdType;


import static java.nio.charset.Charset.defaultCharset; import static java.nio.charset.Charset.defaultCharset;

import static org.neo4j.kernel.impl.util.Converters.withDefault; import static org.neo4j.kernel.impl.util.Converters.withDefault;
import static org.neo4j.tooling.ImportTool.csvConfiguration; import static org.neo4j.tooling.ImportTool.csvConfiguration;
import static org.neo4j.tooling.ImportTool.extractInputFiles; import static org.neo4j.tooling.ImportTool.extractInputFiles;
Expand All @@ -58,15 +61,15 @@ class CsvImporter implements Importer
private final Collection<Args.Option<File[]>> nodesFiles, relationshipsFiles; private final Collection<Args.Option<File[]>> nodesFiles, relationshipsFiles;
private final IdType idType; private final IdType idType;
private final Charset inputEncoding; private final Charset inputEncoding;
private final Config config; private final Config databaseConfig;
private final Args args; private final Args args;
private final OutsideWorld outsideWorld; private final OutsideWorld outsideWorld;
private final String reportFileName; private final String reportFileName;
private final boolean ignoreBadRelationships; private final boolean ignoreBadRelationships;
private final boolean ignoreDuplicateNodes; private final boolean ignoreDuplicateNodes;
private final boolean ignoreExtraColumns; private final boolean ignoreExtraColumns;


CsvImporter( Args args, Config config, OutsideWorld outsideWorld ) throws IncorrectUsage CsvImporter( Args args, Config databaseConfig, OutsideWorld outsideWorld ) throws IncorrectUsage
{ {
this.args = args; this.args = args;
this.outsideWorld = outsideWorld; this.outsideWorld = outsideWorld;
Expand All @@ -89,28 +92,33 @@ class CsvImporter implements Importer
idType = args.interpretOption( "id-type", withDefault( IdType.STRING ), idType = args.interpretOption( "id-type", withDefault( IdType.STRING ),
from -> IdType.valueOf( from.toUpperCase() ) ); from -> IdType.valueOf( from.toUpperCase() ) );
inputEncoding = Charset.forName( args.get( "input-encoding", defaultCharset().name() ) ); inputEncoding = Charset.forName( args.get( "input-encoding", defaultCharset().name() ) );
this.config = config; this.databaseConfig = databaseConfig;
} }


@Override @Override
public void doImport() throws IOException public void doImport() throws IOException
{ {
FileSystemAbstraction fs = outsideWorld.fileSystem(); FileSystemAbstraction fs = outsideWorld.fileSystem();
File storeDir = config.get( DatabaseManagementSystemSettings.database_path ); File storeDir = databaseConfig.get( DatabaseManagementSystemSettings.database_path );
File logsDir = config.get( GraphDatabaseSettings.logs_directory ); File logsDir = databaseConfig.get( GraphDatabaseSettings.logs_directory );
File reportFile = new File( reportFileName ); File reportFile = new File( reportFileName );


OutputStream badOutput = new BufferedOutputStream( fs.openAsOutputStream( reportFile, false ) ); OutputStream badOutput = new BufferedOutputStream( fs.openAsOutputStream( reportFile, false ) );
Collector badCollector = badCollector( badOutput, isIgnoringSomething() ? BadCollector.UNLIMITED_TOLERANCE : 0, Collector badCollector = badCollector( badOutput, isIgnoringSomething() ? BadCollector.UNLIMITED_TOLERANCE : 0,
collect( ignoreBadRelationships, ignoreDuplicateNodes, ignoreExtraColumns ) ); collect( ignoreBadRelationships, ignoreDuplicateNodes, ignoreExtraColumns ) );


Configuration configuration = importConfiguration( null, false, config ); Configuration configuration = new WrappedBatchImporterConfigurationForNeo4jAdmin( importConfiguration( null, false,
CsvInput input = new CsvInput( nodeData( inputEncoding, nodesFiles ), defaultFormatNodeFileHeader(), databaseConfig ) );
relationshipData( inputEncoding, relationshipsFiles ), defaultFormatRelationshipFileHeader(), idType, CsvInput input = new CsvInput(
csvConfiguration( args, false ), badCollector, configuration.maxNumberOfProcessors() ); nodeData( inputEncoding, nodesFiles ), defaultFormatNodeFileHeader(),
relationshipData( inputEncoding, relationshipsFiles ), defaultFormatRelationshipFileHeader(),
idType,
new WrappedCsvInputConfigurationForNeo4jAdmin( csvConfiguration( args, false ) ),
badCollector,
configuration.maxNumberOfProcessors() );


ImportTool.doImport( outsideWorld.errorStream(), outsideWorld.errorStream(), storeDir, logsDir, reportFile, fs, ImportTool.doImport( outsideWorld.errorStream(), outsideWorld.errorStream(), storeDir, logsDir, reportFile, fs,
nodesFiles, relationshipsFiles, false, input, config, badOutput, configuration ); nodesFiles, relationshipsFiles, false, input, this.databaseConfig, badOutput, configuration );
} }


private boolean isIgnoringSomething() private boolean isIgnoringSomething()
Expand Down
@@ -0,0 +1,77 @@
/*
* Copyright (c) 2002-2017 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.commandline.dbms.config;

import org.neo4j.unsafe.impl.batchimport.Configuration;
/**
* Provides a wrapper around {@link Configuration} with overridden defaults for neo4j-admin import
* Use all available processors
*/
public class WrappedBatchImporterConfigurationForNeo4jAdmin implements Configuration
{
private Configuration defaults;

public WrappedBatchImporterConfigurationForNeo4jAdmin( Configuration defaults)
{
this.defaults = defaults;
}

@Override
public int batchSize()
{
return defaults.batchSize();
}

@Override
public int movingAverageSize()
{
return defaults.movingAverageSize();
}

@Override
public int maxNumberOfProcessors()
{
return Configuration.allAvailableProcessors();
}

@Override
public int denseNodeThreshold()
{
return defaults.denseNodeThreshold();
}

@Override
public long pageCacheMemory()
{
return defaults.pageCacheMemory();
}

@Override
public long maxMemoryUsage()
{
return defaults.maxMemoryUsage();
}

@Override
public boolean sequentialBackgroundFlushing()
{
return defaults.sequentialBackgroundFlushing();
}
}
@@ -0,0 +1,80 @@
/*
* Copyright (c) 2002-2017 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.commandline.dbms.config;

import org.neo4j.unsafe.impl.batchimport.input.csv.Configuration;
/**
* Provides a wrapper around {@link Configuration} with overridden defaults for neo4j-admin import
* Always trim strings
* Import emptyQuotedStrings as empty Strings
* Buffer size is set to 4MB
*/

public class WrappedCsvInputConfigurationForNeo4jAdmin implements Configuration
{
private Configuration defaults;

public WrappedCsvInputConfigurationForNeo4jAdmin( Configuration defaults)
{
this.defaults = defaults;
}

@Override
public char delimiter()
{
return defaults.delimiter();
}

@Override
public char arrayDelimiter()
{
return defaults.arrayDelimiter();
}

@Override
public char quotationCharacter()
{
return defaults.quotationCharacter();
}

@Override
public int bufferSize()
{
return DEFAULT_BUFFER_SIZE_4MB;
}

@Override
public boolean multilineFields()
{
return defaults.multilineFields();
}

@Override
public boolean trimStrings()
{
return true;
}

@Override
public boolean emptyQuotedStringsAsNull()
{
return false;
}
}
Expand Up @@ -19,16 +19,16 @@
*/ */
package org.neo4j.commandline.dbms; package org.neo4j.commandline.dbms;


import org.junit.Rule;
import org.junit.Test;

import java.io.File; import java.io.File;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.nio.file.Files; import java.nio.file.Files;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;


import org.junit.Rule;
import org.junit.Test;

import org.neo4j.commandline.admin.RealOutsideWorld; import org.neo4j.commandline.admin.RealOutsideWorld;
import org.neo4j.dbms.DatabaseManagementSystemSettings; import org.neo4j.dbms.DatabaseManagementSystemSettings;
import org.neo4j.graphdb.factory.GraphDatabaseSettings; import org.neo4j.graphdb.factory.GraphDatabaseSettings;
Expand All @@ -38,6 +38,7 @@
import org.neo4j.test.rule.TestDirectory; import org.neo4j.test.rule.TestDirectory;


import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;

import static org.neo4j.helpers.collection.MapUtil.stringMap; import static org.neo4j.helpers.collection.MapUtil.stringMap;


public class CsvImporterTest public class CsvImporterTest
Expand Down
Expand Up @@ -77,6 +77,11 @@ default int movingAverageSize()
* of a processor. * of a processor.
*/ */
default int maxNumberOfProcessors() default int maxNumberOfProcessors()
{
return allAvailableProcessors();
}

static int allAvailableProcessors()
{ {
return Runtime.getRuntime().availableProcessors(); return Runtime.getRuntime().availableProcessors();
} }
Expand Down

0 comments on commit f039131

Please sign in to comment.