Skip to content

Commit

Permalink
Handles anonymous nodes explicitly in IdMapper
Browse files Browse the repository at this point in the history
previously anonymous nodes, i.e. nodes w/o :ID would not be put into the
IdMapper and so the EncodingIdMapper would have those data indexes set to
-1. Sometimes those -1 could affect sorting or lookup and potentially
collide with other encoded values by mistake.

This commit explicitly sets these anonymous values to 0 and treats that
value specifically during collision detection. Also, Encoder isn't allowed
to return value 0.
  • Loading branch information
tinwelint committed Apr 28, 2015
1 parent b10d9ca commit 4063751
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 175 deletions.
Expand Up @@ -74,6 +74,7 @@
import static org.neo4j.helpers.Exceptions.withMessage;
import static org.neo4j.helpers.collection.Iterables.filter;
import static org.neo4j.helpers.collection.IteratorUtil.count;
import static org.neo4j.helpers.collection.IteratorUtil.single;
import static org.neo4j.helpers.collection.IteratorUtil.singleOrNull;
import static org.neo4j.tooling.ImportTool.MULTI_FILE_DELIMITER;

Expand Down Expand Up @@ -559,6 +560,55 @@ public void shouldDisallowImportWithoutNodesInput() throws Exception
}
}

@Test
public void shouldBeAbleToImportAnonymousNodes() throws Exception
{
// GIVEN
List<String> nodeIds = asList( "1", "", "", "", "3", "", "", "", "", "", "5" );
Configuration config = Configuration.COMMAS;
List<RelationshipDataLine> relationshipData = asList( relationship( "1", "3", "KNOWS" ) );

// WHEN
importTool(
"--into", dbRule.getStoreDir().getAbsolutePath(),
"--nodes", nodeData( true, config, nodeIds, alwaysTrue() ).getAbsolutePath(),
"--relationships", relationshipData( true, config, relationshipData.iterator(),
alwaysTrue(), true ).getAbsolutePath() );

// THEN
GraphDatabaseService db = dbRule.getGraphDatabaseService();
try ( Transaction tx = db.beginTx() )
{
Iterable<Node> allNodes = GlobalGraphOperations.at( db ).getAllNodes();
int anonymousCount = 0;
for ( final String id : nodeIds )
{
if ( id.isEmpty() )
{
anonymousCount++;
}
else
{
assertNotNull( single( filter( nodeFilter( id ), allNodes.iterator() ) ) );
}
}
assertEquals( anonymousCount, count( filter( nodeFilter( "" ), allNodes.iterator() ) ) );
tx.success();
}
}

private Predicate<Node> nodeFilter( final String id )
{
return new Predicate<Node>()
{
@Override
public boolean accept( Node node )
{
return node.getProperty( "id", "" ).equals( id );
}
};
}

protected void assertNodeHasLabels( Node node, String[] names )
{
for ( String name : names )
Expand Down
Expand Up @@ -20,7 +20,9 @@
package org.neo4j.unsafe.impl.batchimport.cache.idmapping.string;

/**
* Encodes a {@link Object} into a long.
* Encodes a {@link Object} into a long. The implementation should be designed to return as few collisions
* as possible, i.e. different incoming values being encoded into the same long.
* The returned encoded value must not be {@code 0} since that is a reserved value for {@link EncodingIdMapper}.
*/
public interface Encoder
{
Expand Down

0 comments on commit 4063751

Please sign in to comment.