Make TokenHolder.getOrCreateIds cope with duplicate names.

neo4j · May 22, 2018 · b111f11 · b111f11
1 parent 7e87d88
commit b111f11
Show file tree

Hide file tree

Showing 2 changed files with 101 additions and 19 deletions.
diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/core/DelegatingTokenHolder.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/core/DelegatingTokenHolder.java
@@ -21,6 +21,8 @@
 
 import org.eclipse.collections.api.set.primitive.IntSet;
 import org.eclipse.collections.api.set.primitive.MutableIntSet;
+import org.eclipse.collections.impl.map.mutable.primitive.IntIntHashMap;
+import org.eclipse.collections.impl.map.mutable.primitive.ObjectIntHashMap;
 import org.eclipse.collections.impl.set.mutable.primitive.IntHashSet;
 
 import java.util.List;
@@ -169,17 +171,60 @@ private synchronized void createMissingTokens( String[] names, int[] ids )
         if ( !unresolvedIndexes.isEmpty() )
         {
             // We still have unresolved ids to create.
-            createUnresolvedTokens( unresolvedIndexes, names, ids );
+            ObjectIntHashMap<String> createdTokens = createUnresolvedTokens( unresolvedIndexes, names, ids );
-            unresolvedIndexes.forEach( i ->
+            createdTokens.forEachKeyValue( (name,index) ->
-                    tokenCache.put( tokenFactory.newToken( names[i], ids[i] ) ) );
+                    tokenCache.put( tokenFactory.newToken( name, ids[index] ) ) );
         }
     }
 
-    private void createUnresolvedTokens( IntSet unresolvedIndexes, String[] names, int[] ids )
+    private ObjectIntHashMap<String> createUnresolvedTokens( IntSet unresolvedIndexes, String[] names, int[] ids )
     {
         try
         {
-            tokenCreator.createTokens( names, ids, unresolvedIndexes::contains );
+            // First, we need to filter out all of the tokens that are already resolved, so we only create tokens for
+            // indexes that are in the unresolvedIndexes set.
+            // However, we also need to deal with duplicate token names. For any token index we decide needs to have a
+            // token created, we will add a mapping from the token name, to the ids-index into which the token id will
+            // be written. This is the 'createdTokens' map. It maps token names to indexes into the 'ids' array.
+            // If we find that the 'created'Tokens' map already has an entry for a given name, then that name is a
+            // duplicate, and we will need to "remap" it later, by reading the token id from the correct index in the
+            // 'ids' array, and storing it at the indexes of the duplicates. This is what the 'remappingIndexes' map is
+            // for. This is a map from 'a' to 'b', where both 'a' and 'b' are indexes into the 'ids' array, and where
+            // the corresponding name for 'a' is a duplicate of the name for 'b', and where we have already decided
+            // that we will create a token id for index 'b'. After the token ids have been created, we go through the
+            // 'remappingIndexes' map, and for every '(a,b)' entry, we store the token id created for 'b' and 'ids'
+            // index 'a'.
+            ObjectIntHashMap<String> createdTokens = new ObjectIntHashMap<>();
+            IntIntHashMap remappingIndexes = new IntIntHashMap();
+            IntPredicate tokenCreateFilter = index ->
+            {
+                boolean needsCreate = unresolvedIndexes.contains( index );
+                if ( needsCreate )
+                {
+                    // The name at this index is unresolved.
+                    String name = names[index];
+                    int creatingIndex = createdTokens.getIfAbsentPut( name, index );
+                    if ( creatingIndex != index )
+                    {
+                        // This entry has a duplicate name, so we need to remap this entry instead of creating a token
+                        // for it.
+                        remappingIndexes.put( index, creatingIndex );
+                        needsCreate = false;
+                    }
+                }
+                return needsCreate;
+            };
+
+            // Create tokens for all the indexes that we don't filter out.
+            tokenCreator.createTokens( names, ids, tokenCreateFilter );
+
+            // Remap duplicate tokens to the token id we created for the first instance of any duplicate token name.
+            if ( remappingIndexes.notEmpty() )
+            {
+                remappingIndexes.forEachKeyValue( (index,creatingIndex) -> ids[index] = ids[creatingIndex] );
+            }
+
+            return createdTokens;
         }
         catch ( ReadOnlyDbException e )
         {

diff --git a/community/kernel/src/test/java/org/neo4j/kernel/impl/core/DelegatingTokenHolderTest.java b/community/kernel/src/test/java/org/neo4j/kernel/impl/core/DelegatingTokenHolderTest.java
@@ -27,6 +27,7 @@
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.function.IntPredicate;
 
+import org.neo4j.internal.kernel.api.exceptions.KernelException;
 import org.neo4j.storageengine.api.Token;
 
 import static java.util.Arrays.asList;
@@ -99,14 +100,29 @@ public void batchTokenGetMustReturnWhetherThereWereUnresolvedTokens()
     @Test
     public void batchTokenCreateMustIgnoreExistingTokens() throws Exception
     {
-        holder.setInitialTokens( asList(
+        initialTokensABC();
-                token( "a", 1 ),
-                token( "b", 2 ) ) );
 
-        when( creator.createToken( "c" ) ).thenReturn( 3 );
-        assertThat( holder.getOrCreateId( "c" ), is( 3 ) );
         AtomicInteger nextId = new AtomicInteger( 42 );
+        mockAssignNewTokenIdsInBatch( nextId );
+
+        String[] names = new String[]{"b", "X", "a", "Y", "c"};
+        int[] ids = new int[names.length];
+        holder.getOrCreateIds( names, ids );
+        assertThat( ids.length, is( 5 ) );
+        assertThat( ids[0], is( 2 ) );
+        assertThat( ids[1], isOneOf( 42, 43 ) );
+        assertThat( ids[2], is( 1 ) );
+        assertThat( ids[3], isOneOf( 42, 43 ) );
+        assertThat( ids[4], is( 3 ) );
+        assertThat( nextId.get(), is( 44 ) );
+
+        // And these should not throw.
+        holder.getTokenById( 42 );
+        holder.getTokenById( 43 );
+    }
 
+    private void mockAssignNewTokenIdsInBatch( AtomicInteger nextId ) throws KernelException
+    {
         doAnswer( inv ->
         {
             int[] ids = inv.getArgument( 1 );
@@ -120,21 +136,42 @@ public void batchTokenCreateMustIgnoreExistingTokens() throws Exception
             }
             return null;
         } ).when( creator ).createTokens( any( String[].class ), any( int[].class ), any( IntPredicate.class ) );
+    }
 
-        String[] names = new String[]{"b", "X", "a", "Y", "c"};
+    private void initialTokensABC() throws KernelException
+    {
+        holder.setInitialTokens( asList(
+                token( "a", 1 ),
+                token( "b", 2 ) ) );
+
+        when( creator.createToken( "c" ) ).thenReturn( 3 );
+        assertThat( holder.getOrCreateId( "c" ), is( 3 ) );
+    }
+
+    @Test
+    public void batchTokenCreateMustDeduplicateTokenCreates() throws Exception
+    {
+        initialTokensABC();
+
+        AtomicInteger nextId = new AtomicInteger( 42 );
+        mockAssignNewTokenIdsInBatch( nextId );
+
+        // NOTE: the existing 'b', and the missing 'X', tokens are in here twice:
+        String[] names = new String[]{"b", "b", "X", "a", "X", "c"};
         int[] ids = new int[names.length];
         holder.getOrCreateIds( names, ids );
-        assertThat( ids.length, is( 5 ) );
+
+        assertThat( ids.length, is( 6 ) );
         assertThat( ids[0], is( 2 ) );
-        assertThat( ids[1], isOneOf( 42, 43 ) );
+        assertThat( ids[1], is( 2 ) );
-        assertThat( ids[2], is( 1 ) );
+        assertThat( ids[2], is( 42 ) );
-        assertThat( ids[3], isOneOf( 42, 43 ) );
+        assertThat( ids[3], is( 1 ) );
-        assertThat( ids[4], is( 3 ) );
+        assertThat( ids[4], is( 42 ) );
-        assertThat( nextId.get(), is( 44 ) );
+        assertThat( ids[5], is( 3 ) );
+        assertThat( nextId.get(), is( 43 ) );
 
-        // And these should not throw.
+        // And this should not throw.
         holder.getTokenById( 42 );
-        holder.getTokenById( 43 );
     }
 
     @Test( expected = IllegalArgumentException.class )