Skip to content

Commit

Permalink
CsvInput verifies id spaces before starting import
Browse files Browse the repository at this point in the history
such that it will fail fast if there are relationship headers specifying
id spaces which no node header has will fail explaining just that instead of
treating all those relationships as bad.
  • Loading branch information
tinwelint committed Oct 11, 2017
1 parent 9bf63fb commit 9263a35
Show file tree
Hide file tree
Showing 8 changed files with 429 additions and 58 deletions.
Expand Up @@ -19,6 +19,7 @@
*/
package org.neo4j.unsafe.impl.batchimport.input;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

Expand Down Expand Up @@ -49,8 +50,7 @@ public synchronized Group getOrCreate( String name )
{
if ( global != globalMode.booleanValue() )
{
throw new IllegalStateException( "Mixing specified and unspecified group belongings " +
"in a single import isn't supported" );
throw mixingOfGroupModesException();
}
}

Expand All @@ -66,4 +66,36 @@ public synchronized Group getOrCreate( String name )
}
return group;
}

private IllegalStateException mixingOfGroupModesException()
{
return new IllegalStateException( "Mixing specified and unspecified group belongings " +
"in a single import isn't supported" );
}

public synchronized Group get( String name )
{
boolean global = name == null;
if ( globalMode != null && global != globalMode.booleanValue() )
{
throw mixingOfGroupModesException();
}

if ( name == null )
{
return Group.GLOBAL;
}

Group group = byName.get( name );
if ( group == null )
{
throw new HeaderException( "Group '" + name + "' not found. Available groups are: " + groupNames() );
}
return group;
}

private String groupNames()
{
return Arrays.toString( byName.keySet().toArray( new String[byName.keySet().size()] ) );
}
}
Expand Up @@ -25,4 +25,9 @@ public HeaderException( String message )
{
super( message );
}

public HeaderException( String message, Throwable cause )
{
super( message, cause );
}
}
Expand Up @@ -19,8 +19,11 @@
*/
package org.neo4j.unsafe.impl.batchimport.input.csv;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.HashMap;
import java.util.Map;

import org.neo4j.csv.reader.CharSeeker;
import org.neo4j.kernel.impl.util.Validators;
import org.neo4j.unsafe.impl.batchimport.InputIterable;
Expand All @@ -29,13 +32,18 @@
import org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdGenerator;
import org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper;
import org.neo4j.unsafe.impl.batchimport.input.Collector;
import org.neo4j.unsafe.impl.batchimport.input.Group;
import org.neo4j.unsafe.impl.batchimport.input.Groups;
import org.neo4j.unsafe.impl.batchimport.input.HeaderException;
import org.neo4j.unsafe.impl.batchimport.input.Input;
import org.neo4j.unsafe.impl.batchimport.input.InputNode;
import org.neo4j.unsafe.impl.batchimport.input.InputRelationship;
import org.neo4j.unsafe.impl.batchimport.input.MissingRelationshipDataException;
import org.neo4j.unsafe.impl.batchimport.input.csv.InputGroupsDeserializer.DeserializerFactory;

import static java.lang.String.format;

import static org.neo4j.csv.reader.CharSeekers.charSeeker;
import static org.neo4j.unsafe.impl.batchimport.input.csv.DeserializerFactories.defaultNodeDeserializer;
import static org.neo4j.unsafe.impl.batchimport.input.csv.DeserializerFactories.defaultRelationshipDeserializer;

Expand Down Expand Up @@ -90,17 +98,74 @@ public CsvInput(
this.config = config;
this.badCollector = badCollector;
this.validateRelationshipData = validateRelationshipData;

verifyHeaders();
}

/**
* Verifies so that all headers in input files looks sane:
* <ul>
* <li>node/relationship headers can be parsed correctly</li>
* <li>relationship headers uses ID spaces previously defined in node headers</li>
* </ul>
*/
private void verifyHeaders()
{
try
{
// parse all node headers and remember all ID spaces
for ( DataFactory<InputNode> dataFactory : nodeDataFactory )
{
try ( CharSeeker dataStream = charSeeker( dataFactory.create( config ).stream(), config, true ) )
{
Header header = nodeHeaderFactory.create( dataStream, config, idType );
Header.Entry idHeader = header.entry( Type.ID );
if ( idHeader != null )
{
// will create this group inside groups, so no need to do something with the result of it right now
groups.getOrCreate( idHeader.groupName() );
}
}
}

// parse all relationship headers and verify all ID spaces
for ( DataFactory<InputRelationship> dataFactory : relationshipDataFactory )
{
try ( CharSeeker dataStream = charSeeker( dataFactory.create( config ).stream(), config, true ) )
{
Header header = relationshipHeaderFactory.create( dataStream, config, idType );
verifyRelationshipHeader( header, Type.START_ID, dataStream.sourceDescription() );
verifyRelationshipHeader( header, Type.END_ID, dataStream.sourceDescription() );
}
}
}
catch ( IOException e )
{
throw new UncheckedIOException( e );
}
}

private void verifyRelationshipHeader( Header header, Type type, String source )
{
Header.Entry entry = header.entry( type );
String groupName = entry.groupName();
if ( groups.get( groupName ) == null )
{
throw new HeaderException(
format( "Relationship header %s in %s refers to ID space %s which no node header specifies",
header, source, groupName != null ? groupName : Group.GLOBAL.name() ) );
}
}

private void assertSaneConfiguration( Configuration config )
private static void assertSaneConfiguration( Configuration config )
{
Map<Character,String> delimiters = new HashMap<>();
delimiters.put( config.delimiter(), "delimiter" );
checkUniqueCharacter( delimiters, config.arrayDelimiter(), "array delimiter" );
checkUniqueCharacter( delimiters, config.quotationCharacter(), "quotation character" );
}

private void checkUniqueCharacter( Map<Character,String> characters, char character, String characterDescription )
private static void checkUniqueCharacter( Map<Character,String> characters, char character, String characterDescription )
{
String conflict = characters.put( character, characterDescription );
if ( conflict != null )
Expand Down
Expand Up @@ -314,7 +314,7 @@ else if ( isRecognizedType( typeSpec ) )
else
{
type = Type.PROPERTY;
extractor = extractors.valueOf( typeSpec );
extractor = parsePropertyType( typeSpec, extractors );
}

return new Header.Entry( name, type, groupName, extractor );
Expand Down Expand Up @@ -362,11 +362,24 @@ else if ( isRecognizedType( typeSpec ) )
else
{
type = Type.PROPERTY;
extractor = extractors.valueOf( typeSpec );
extractor = parsePropertyType( typeSpec, extractors );
}

return new Header.Entry( name, type, groupName, extractor );
}

}

private static Extractor<?> parsePropertyType( String typeSpec, Extractors extractors )
{
try
{
return extractors.valueOf( typeSpec );
}
catch ( IllegalArgumentException e )
{
throw new HeaderException( "Unable to parse header", e );
}
}

@SafeVarargs
Expand Down
Expand Up @@ -49,8 +49,8 @@ public InputRelationshipDeserialization( Header header, SourceTraceability sourc
@Override
public void initialize()
{
this.startNodeGroup = groups.getOrCreate( header.entry( Type.START_ID ).groupName() );
this.endNodeGroup = groups.getOrCreate( header.entry( Type.END_ID ).groupName() );
this.startNodeGroup = groups.get( header.entry( Type.START_ID ).groupName() );
this.endNodeGroup = groups.get( header.entry( Type.END_ID ).groupName() );
}

@Override
Expand Down
Expand Up @@ -24,6 +24,8 @@
import org.neo4j.test.Race;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.fail;

public class GroupsTest
{
Expand All @@ -50,4 +52,109 @@ public void shouldHandleConcurrentGetOrCreate() throws Throwable
Group otherGroup = groups.getOrCreate( "MyOtherGroup" );
assertEquals( 1, otherGroup.id() );
}

@Test
public void shouldFailOnMixedGroupModeInGetOrCreate() throws Exception
{
// given
Groups groups = new Groups();
groups.getOrCreate( null );

try
{
// when
groups.getOrCreate( "Something" );
fail( "Should fail" );
}
catch ( IllegalStateException e )
{
// then OK
}
}

@Test
public void shouldFailOnMixedGroupModeInGetOrCreate2() throws Exception
{
// given
Groups groups = new Groups();
groups.getOrCreate( "Something" );

try
{
// when
groups.getOrCreate( null );
fail( "Should fail" );
}
catch ( IllegalStateException e )
{
// then OK
}
}

@Test
public void shouldGetCreatedGroup() throws Exception
{
// given
Groups groups = new Groups();
String name = "Something";
Group createdGroup = groups.getOrCreate( name );

// when
Group gottenGroup = groups.get( name );

// then
assertSame( createdGroup, gottenGroup );
}

@Test
public void shouldGetGlobalGroup() throws Exception
{
// given
Groups groups = new Groups();
groups.getOrCreate( null );

// when
Group group = groups.get( null );

// then
assertSame( Group.GLOBAL, group );
}

@Test
public void shouldFailOnMixedGroupModeInGet() throws Exception
{
// given
Groups groups = new Groups();
groups.getOrCreate( "Something" );

try
{
// when
groups.get( null );
fail( "Should fail" );
}
catch ( IllegalStateException e )
{
// then OK
}
}

@Test
public void shouldFailOnMixedGroupModeInGet2() throws Exception
{
// given
Groups groups = new Groups();
groups.getOrCreate( null );

try
{
// when
groups.get( "Something" );
fail( "Should fail" );
}
catch ( IllegalStateException e )
{
// then OK
}
}
}

0 comments on commit 9263a35

Please sign in to comment.