Skip to content

Commit

Permalink
Adds ability to specify disjoint id groups in import tool
Browse files Browse the repository at this point in the history
using the input headers, group belonging is specified with an added
parentheses containing the name of the group, like :ID(MyGroup) and
similar. Likewise relationships can specify, for start/end ids,
individually from which group to look up those ids.

This commit also changes delimiter between multiple files from space to
comma since space was broken, disallowing files with spaces.
  • Loading branch information
tinwelint committed Jan 21, 2015
1 parent d5adb82 commit 8828c45
Show file tree
Hide file tree
Showing 24 changed files with 668 additions and 222 deletions.
Expand Up @@ -28,6 +28,7 @@
import java.util.Map.Entry;

import org.neo4j.function.Function;
import org.neo4j.function.Function2;
import org.neo4j.function.Functions;
import org.neo4j.helpers.Args;
import org.neo4j.helpers.Args.Option;
Expand Down Expand Up @@ -71,15 +72,6 @@
*/
public class ImportTool
{
private static final Function<String,IdType> TO_ID_TYPE = new Function<String,IdType>()
{
@Override
public IdType apply( String from )
{
return IdType.valueOf( from.toUpperCase() );
}
};

enum Options
{
STORE_DIR( "into", "<store-dir>", "Database directory to import into. " + "Must not contain existing database." ),
Expand Down Expand Up @@ -142,9 +134,14 @@ String key()
return key;
}

String argument()
{
return "--" + key();
}

void printUsage( PrintStream out )
{
out.println( "--" + key + " " + usage );
out.println( argument() + " " + usage );
for ( String line : Args.splitLongLine( description.replace( "`", "" ), 80 ) )
{
out.println( "\t" + line );
Expand All @@ -155,7 +152,7 @@ String manPageEntry()
{
String filteredDescription = description.replace( availableProcessorsHint(), "" );
String usageString = (usage.length() > 0) ? " " + usage : "";
return "*--" + key + usageString + "*::\n" + filteredDescription + "\n\n";
return "*" + argument() + usageString + "*::\n" + filteredDescription + "\n\n";
}

private static String availableProcessorsHint()
Expand All @@ -167,7 +164,7 @@ private static String availableProcessorsHint()
/**
* Delimiter used between files in an input group.
*/
static final String MULTI_FILE_DELIMITER = " ";
static final String MULTI_FILE_DELIMITER = ",";

public static void main( String[] incomingArguments )
{
Expand All @@ -180,33 +177,20 @@ public static void main( String[] incomingArguments )

FileSystemAbstraction fs = new DefaultFileSystemAbstraction();
File storeDir;
// The input groups
Collection<Option<File[]>> nodesFiles, relationshipsFiles;
boolean enableStacktrace;
Number processors = null;
Input input = null;
try
{
storeDir =
args.interpretOption( Options.STORE_DIR.key(), Converters.<File> mandatory(), Converters.toFile(),
Validators.DIRECTORY_IS_WRITABLE, Validators.CONTAINS_NO_EXISTING_DATABASE );
nodesFiles =
args.interpretOptionsWithMetadata( Options.NODE_DATA.key(), Converters.<File[]> mandatory(),
Converters.toFiles( MULTI_FILE_DELIMITER ), Validators.FILES_EXISTS,
Validators.<File> atLeast( 1 ) );
relationshipsFiles =
args.interpretOptionsWithMetadata( Options.RELATIONSHIP_DATA.key(),
Converters.<File[]> optional(), Converters.toFiles( MULTI_FILE_DELIMITER ),
Validators.FILES_EXISTS, Validators.<File> atLeast( 1 ) );
storeDir = args.interpretOption( Options.STORE_DIR.key(), Converters.<File>mandatory(),
Converters.toFile(), Validators.DIRECTORY_IS_WRITABLE, Validators.CONTAINS_NO_EXISTING_DATABASE );
nodesFiles = INPUT_FILES_EXTRACTOR.apply( args, Options.NODE_DATA.key() );
relationshipsFiles = INPUT_FILES_EXTRACTOR.apply( args, Options.RELATIONSHIP_DATA.key() );
enableStacktrace = args.getBoolean( Options.STACKTRACE.key(), Boolean.FALSE, Boolean.TRUE );
processors = args.getNumber( Options.PROCESSORS.key(), null );
input = new CsvInput(
nodeData( nodesFiles ),
defaultFormatNodeFileHeader(),
relationshipData( relationshipsFiles ),
defaultFormatRelationshipFileHeader(),
args.interpretOption( Options.ID_TYPE.key(), withDefault( IdType.STRING ), TO_ID_TYPE ),
csvConfiguration( args ) );
IdType idType = args.interpretOption( Options.ID_TYPE.key(), withDefault( IdType.STRING ), TO_ID_TYPE );
input = input( nodesFiles, relationshipsFiles, INPUT_FILES_EXTRACTOR, idType, csvConfiguration( args ) );
}
catch ( IllegalArgumentException e )
{
Expand Down Expand Up @@ -254,6 +238,19 @@ Converters.<File[]> optional(), Converters.toFiles( MULTI_FILE_DELIMITER ),
}
}

private static Input input( Collection<Option<File[]>> nodesFiles, Collection<Option<File[]>> relationshipsFiles,
Function2<Args,String,Collection<Option<File[]>>> inputFilesExtractor,
IdType idType, Configuration configuration )
{
Iterable<DataFactory<InputNode>> nodeData = nodeData( nodesFiles );
Iterable<DataFactory<InputRelationship>> relationshipData = relationshipData( relationshipsFiles );

return new CsvInput(
nodeData, defaultFormatNodeFileHeader(),
relationshipData, defaultFormatRelationshipFileHeader(),
idType, configuration );
}

private static org.neo4j.unsafe.impl.batchimport.Configuration importConfiguration( final Number processors )
{
return new org.neo4j.unsafe.impl.batchimport.Configuration.Default()
Expand Down Expand Up @@ -307,17 +304,17 @@ protected DataFactory<InputRelationship> underlyingObjectToObject( Option<File[]
};
}

private static Iterable<DataFactory<InputNode>> nodeData( Collection<Option<File[]>> files )
private static Iterable<DataFactory<InputNode>> nodeData( Collection<Option<File[]>> nodesFiles )
{
return new IterableWrapper<DataFactory<InputNode>,Option<File[]>>( files )
return new IterableWrapper<DataFactory<InputNode>,Option<File[]>>( nodesFiles )
{
@Override
protected DataFactory<InputNode> underlyingObjectToObject( Option<File[]> group )
protected DataFactory<InputNode> underlyingObjectToObject( Option<File[]> input )
{
Function<InputNode,InputNode> decorator = group.metadata() != null
? additiveLabels( group.metadata().split( ":" ) )
Function<InputNode,InputNode> decorator = input.metadata() != null
? additiveLabels( input.metadata().split( ":" ) )
: Functions.<InputNode>identity();
return data( decorator, group.value() );
return data( decorator, input.value() );
}
};
}
Expand Down Expand Up @@ -401,6 +398,15 @@ public char quotationCharacter()
};
}

private static final Function<String,IdType> TO_ID_TYPE = new Function<String,IdType>()
{
@Override
public IdType apply( String from )
{
return IdType.valueOf( from.toUpperCase() );
}
};

private static final Function<String,Character> DELIMITER_CONVERTER = new Function<String,Character>()
{
private final Function<String,Character> fallback = Converters.toCharacter();
Expand All @@ -415,4 +421,16 @@ public Character apply( String value ) throws RuntimeException
return fallback.apply( value );
}
};

private static final Function2<Args,String,Collection<Option<File[]>>> INPUT_FILES_EXTRACTOR =
new Function2<Args,String,Collection<Option<File[]>>>()
{
@Override
public Collection<Option<File[]>> apply( Args args, String key )
{
return args.interpretOptionsWithMetadata( key, Converters.<File[]>optional(),
Converters.toFiles( MULTI_FILE_DELIMITER ), Validators.FILES_EXISTS,
Validators.<File>atLeast( 1 ) );
}
};
}
Expand Up @@ -212,16 +212,16 @@ public static void main( String[] arguments ) throws IOException
Configuration config = Configuration.COMMAS;
Extractors extractors = new Extractors( config.arrayDelimiter() );
Header nodeHeader = new Header( new Entry[] {
new Entry( null, Type.ID, extractors.string() ),
new Entry( "name", Type.PROPERTY, extractors.string() ),
new Entry( "age", Type.PROPERTY, extractors.int_() ),
new Entry( "something", Type.PROPERTY, extractors.string() ),
new Entry( null, Type.LABEL, extractors.stringArray() ),
new Entry( null, Type.ID, null, extractors.string() ),
new Entry( "name", Type.PROPERTY, null, extractors.string() ),
new Entry( "age", Type.PROPERTY, null, extractors.int_() ),
new Entry( "something", Type.PROPERTY, null, extractors.string() ),
new Entry( null, Type.LABEL, null, extractors.stringArray() ),
} );
Header relationshipHeader = new Header( new Entry[] {
new Entry( null, Type.START_ID, extractors.string() ),
new Entry( null, Type.END_ID, extractors.string() ),
new Entry( null, Type.TYPE, extractors.string() )
new Entry( null, Type.START_ID, null, extractors.string() ),
new Entry( null, Type.END_ID, null, extractors.string() ),
new Entry( null, Type.TYPE, null, extractors.string() )
} );

ProgressListener progress = textual( System.out ).singlePart( "Generating", nodeCount + relationshipCount );
Expand Down
Expand Up @@ -238,7 +238,7 @@ public void printOptionsForManpage() throws Exception
{
try (PrintStream out = new PrintStream( file( "man", "options.adoc" ) ))
{
for ( Options option : ImportTool.Options.values() )
for ( Options option : Options.values() )
{
out.print( option.manPageEntry() );
}
Expand Down

0 comments on commit 8828c45

Please sign in to comment.