diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/store/format/standard/PropertyRecordFormat.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/store/format/standard/PropertyRecordFormat.java index e17d3343801ee..6af8b3850dfc1 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/store/format/standard/PropertyRecordFormat.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/store/format/standard/PropertyRecordFormat.java @@ -157,6 +157,11 @@ public boolean isInUse( PageCursor cursor ) for ( int i = 0; i < blocks; i++ ) { long block = cursor.getLong(); + // Since there's no inUse byte we have to check the special case of first block == 0, which will mean that it's deleted + if ( i == 0 && block == 0 ) + { + return false; + } if ( PropertyType.getPropertyTypeOrNull( block ) != null ) { return true; diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/transaction/state/PropertyDeleter.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/transaction/state/PropertyDeleter.java index 07d7ffcd02e3e..7f6757970a83a 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/transaction/state/PropertyDeleter.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/transaction/state/PropertyDeleter.java @@ -46,24 +46,28 @@ public void deletePropertyChain( PrimitiveRecord primitive, // TODO forChanging/forReading piggy-backing PropertyRecord propRecord = propertyChange.forChangingData(); - for ( PropertyBlock block : propRecord ) - { - for ( DynamicRecord valueRecord : block.getValueRecords() ) - { - assert valueRecord.inUse(); - valueRecord.setInUse( false ); - propRecord.addDeletedRecord( valueRecord ); - } - } + deletePropertyRecordIncludingValueRecords( propRecord ); nextProp = propRecord.getNextProp(); - propRecord.setInUse( false ); propRecord.setChanged( primitive ); - // We do not remove them individually, but all together here - propRecord.clearPropertyBlocks(); } primitive.setNextProp( Record.NO_NEXT_PROPERTY.intValue() ); } + public static void deletePropertyRecordIncludingValueRecords( PropertyRecord record ) + { + for ( PropertyBlock block : record ) + { + for ( DynamicRecord valueRecord : block.getValueRecords() ) + { + assert valueRecord.inUse(); + valueRecord.setInUse( false ); + record.addDeletedRecord( valueRecord ); + } + } + record.clearPropertyBlocks(); + record.setInUse( false ); + } + /** * Removes property with given {@code propertyKey} from property chain owner by the primitive found in * {@code primitiveProxy} if it exists. diff --git a/community/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/DataImporter.java b/community/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/DataImporter.java index 4107e4a74384b..d2881894ca5fe 100644 --- a/community/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/DataImporter.java +++ b/community/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/DataImporter.java @@ -100,6 +100,26 @@ public void propertiesImported( long properties ) this.properties.add( properties ); } + public void propertiesRemoved( long properties ) + { + this.properties.add( -properties ); + } + + public long nodesImported() + { + return this.nodes.sum(); + } + + public long propertiesImported() + { + return this.properties.sum(); + } + + public long relationshipsImported() + { + return this.relationships.sum(); + } + @Override public String toString() { diff --git a/community/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/DeleteDuplicateNodesStage.java b/community/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/DeleteDuplicateNodesStage.java index 9b94ac54517a7..a3ff3836dd7f5 100644 --- a/community/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/DeleteDuplicateNodesStage.java +++ b/community/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/DeleteDuplicateNodesStage.java @@ -39,6 +39,7 @@ public DeleteDuplicateNodesStage( Configuration config, PrimitiveLongIterator du BatchingNeoStores neoStore, DataImporter.Monitor storeMonitor ) { super( "DEDUP", null, config, 0 ); - add( new DeleteDuplicateNodesStep( control(), config, duplicateNodeIds, neoStore.getNodeStore(), storeMonitor ) ); + add( new DeleteDuplicateNodesStep( control(), config, duplicateNodeIds, neoStore.getNodeStore(), neoStore.getPropertyStore(), + storeMonitor ) ); } } diff --git a/community/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/DeleteDuplicateNodesStep.java b/community/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/DeleteDuplicateNodesStep.java index 8f2029303a85d..f79fa97f14508 100644 --- a/community/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/DeleteDuplicateNodesStep.java +++ b/community/kernel/src/main/java/org/neo4j/unsafe/impl/batchimport/DeleteDuplicateNodesStep.java @@ -22,26 +22,34 @@ import java.io.IOException; import org.neo4j.collection.primitive.PrimitiveLongIterator; import org.neo4j.kernel.impl.store.NodeStore; +import org.neo4j.kernel.impl.store.PropertyStore; import org.neo4j.kernel.impl.store.RecordCursor; +import org.neo4j.kernel.impl.store.record.DynamicRecord; import org.neo4j.kernel.impl.store.record.NodeRecord; +import org.neo4j.kernel.impl.store.record.PropertyRecord; +import org.neo4j.kernel.impl.store.record.Record; import org.neo4j.unsafe.impl.batchimport.staging.LonelyProcessingStep; import org.neo4j.unsafe.impl.batchimport.staging.StageControl; import static org.neo4j.kernel.impl.store.record.RecordLoad.NORMAL; +import static org.neo4j.kernel.impl.transaction.state.PropertyDeleter.deletePropertyRecordIncludingValueRecords; public class DeleteDuplicateNodesStep extends LonelyProcessingStep { private final NodeStore nodeStore; + private final PropertyStore propertyStore; private final PrimitiveLongIterator nodeIds; private final DataImporter.Monitor storeMonitor; - private int nodesRemoved; + private long nodesRemoved; + private long propertiesRemoved; public DeleteDuplicateNodesStep( StageControl control, Configuration config, PrimitiveLongIterator nodeIds, NodeStore nodeStore, - DataImporter.Monitor storeMonitor ) + PropertyStore propertyStore, DataImporter.Monitor storeMonitor ) { super( control, "DEDUP", config ); this.nodeStore = nodeStore; + this.propertyStore = propertyStore; this.nodeIds = nodeIds; this.storeMonitor = storeMonitor; } @@ -49,15 +57,41 @@ public DeleteDuplicateNodesStep( StageControl control, Configuration config, Pri @Override protected void process() throws IOException { - NodeRecord record = nodeStore.newRecord(); - RecordCursor cursor = nodeStore.newRecordCursor( record ).acquire( 0, NORMAL ); - while ( nodeIds.hasNext() ) + NodeRecord nodeRecord = nodeStore.newRecord(); + PropertyRecord propertyRecord = propertyStore.newRecord(); + try ( RecordCursor cursor = nodeStore.newRecordCursor( nodeRecord ).acquire( 0, NORMAL ); + RecordCursor propertyCursor = propertyStore.newRecordCursor( propertyRecord ).acquire( 0, NORMAL ) ) { - long duplicateNodeId = nodeIds.next(); - cursor.next( duplicateNodeId ); - record.setInUse( false ); - nodeStore.updateRecord( record ); - nodesRemoved++; + while ( nodeIds.hasNext() ) + { + long duplicateNodeId = nodeIds.next(); + cursor.next( duplicateNodeId ); + assert nodeRecord.inUse() : nodeRecord; + // Ensure heavy so that the dynamic label records gets loaded (and then deleted) too + nodeStore.ensureHeavy( nodeRecord ); + + // Delete property records + long nextProp = nodeRecord.getNextProp(); + while ( !Record.NULL_REFERENCE.is( nextProp ) ) + { + propertyCursor.next( nextProp ); + assert propertyRecord.inUse() : propertyRecord + " for " + nodeRecord; + propertyStore.ensureHeavy( propertyRecord ); + propertiesRemoved += propertyRecord.numberOfProperties(); + nextProp = propertyRecord.getNextProp(); + deletePropertyRecordIncludingValueRecords( propertyRecord ); + propertyStore.updateRecord( propertyRecord ); + } + + // Delete node (and dynamic label records, if any) + nodeRecord.setInUse( false ); + for ( DynamicRecord labelRecord : nodeRecord.getDynamicLabelRecords() ) + { + labelRecord.setInUse( false ); + } + nodeStore.updateRecord( nodeRecord ); + nodesRemoved++; + } } } @@ -66,5 +100,6 @@ public void close() throws Exception { super.close(); storeMonitor.nodesRemoved( nodesRemoved ); + storeMonitor.propertiesRemoved( propertiesRemoved ); } } diff --git a/community/kernel/src/test/java/org/neo4j/test/rule/NeoStoresRule.java b/community/kernel/src/test/java/org/neo4j/test/rule/NeoStoresRule.java index a49e5ec8ea5c1..5557c41682baf 100644 --- a/community/kernel/src/test/java/org/neo4j/test/rule/NeoStoresRule.java +++ b/community/kernel/src/test/java/org/neo4j/test/rule/NeoStoresRule.java @@ -95,6 +95,7 @@ private static Config configOf( String... config ) protected void after( boolean successful ) throws Throwable { IOUtils.closeAll( neoStores, rulePageCache ); + neoStores = null; if ( ruleFs != null ) { ruleFs.close(); diff --git a/community/kernel/src/test/java/org/neo4j/unsafe/impl/batchimport/DeleteDuplicateNodesStepTest.java b/community/kernel/src/test/java/org/neo4j/unsafe/impl/batchimport/DeleteDuplicateNodesStepTest.java new file mode 100644 index 0000000000000..97867f5469ece --- /dev/null +++ b/community/kernel/src/test/java/org/neo4j/unsafe/impl/batchimport/DeleteDuplicateNodesStepTest.java @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2002-2018 "Neo Technology," + * Network Engine for Objects in Lund AB [http://neotechnology.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.unsafe.impl.batchimport; + +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.RuleChain; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import org.neo4j.collection.primitive.PrimitiveLongCollections; +import org.neo4j.helpers.collection.PrefetchingIterator; +import org.neo4j.kernel.impl.store.AbstractDynamicStore; +import org.neo4j.kernel.impl.store.NeoStores; +import org.neo4j.kernel.impl.store.NodeLabelsField; +import org.neo4j.kernel.impl.store.NodeStore; +import org.neo4j.kernel.impl.store.PropertyStore; +import org.neo4j.kernel.impl.store.record.DynamicRecord; +import org.neo4j.kernel.impl.store.record.NodeRecord; +import org.neo4j.kernel.impl.store.record.PrimitiveRecord; +import org.neo4j.kernel.impl.store.record.PropertyBlock; +import org.neo4j.kernel.impl.store.record.PropertyRecord; +import org.neo4j.kernel.impl.store.record.Record; +import org.neo4j.kernel.impl.transaction.state.Loaders; +import org.neo4j.kernel.impl.transaction.state.PropertyCreator; +import org.neo4j.kernel.impl.transaction.state.PropertyTraverser; +import org.neo4j.kernel.impl.transaction.state.RecordAccess; +import org.neo4j.test.Randoms; +import org.neo4j.test.rule.NeoStoresRule; +import org.neo4j.test.rule.RandomRule; +import org.neo4j.test.rule.RepeatRule; +import org.neo4j.test.rule.RepeatRule.Repeat; +import org.neo4j.unsafe.batchinsert.internal.DirectRecordAccess; +import org.neo4j.unsafe.impl.batchimport.staging.SimpleStageControl; +import org.neo4j.values.storable.Values; + +import static org.junit.Assert.assertEquals; + +public class DeleteDuplicateNodesStepTest +{ + private final RandomRule random = new RandomRule().withConfiguration( new Randoms.Default() + { + @Override + public int stringMaxLength() + { + return 200; + } + + @Override + public int arrayMaxLength() + { + return 200; + } + } ); + private final NeoStoresRule neoStoresRule = new NeoStoresRule( getClass() ); + private final RepeatRule repeater = new RepeatRule(); + + @Rule + public final RuleChain rules = RuleChain.outerRule( repeater ).around( random ).around( neoStoresRule ); + + @Repeat( times = 10 ) + @Test + public void shouldDeleteEverythingAboutTheDuplicatedNodes() throws Exception + { + // given + NeoStores neoStores = neoStoresRule.builder().build(); + Ids[] ids = new Ids[9]; + DataImporter.Monitor monitor = new DataImporter.Monitor(); + ids[0] = createNode( monitor, neoStores, 10, 10 ); // node with many properties and many labels + ids[1] = createNode( monitor, neoStores, 10, 1 ); // node with many properties and few labels + ids[2] = createNode( monitor, neoStores, 10, 0 ); // node with many properties and no labels + ids[3] = createNode( monitor, neoStores, 1, 10 ); // node with few properties and many labels + ids[4] = createNode( monitor, neoStores, 1, 1 ); // node with few properties and few labels + ids[5] = createNode( monitor, neoStores, 1, 0 ); // node with few properties and no labels + ids[6] = createNode( monitor, neoStores, 0, 10 ); // node with no properties and many labels + ids[7] = createNode( monitor, neoStores, 0, 1 ); // node with no properties and few labels + ids[8] = createNode( monitor, neoStores, 0, 0 ); // node with no properties and no labels + + // when + long[] duplicateNodeIds = randomNodes( ids ); + SimpleStageControl control = new SimpleStageControl(); + try ( DeleteDuplicateNodesStep step = new DeleteDuplicateNodesStep( control, Configuration.DEFAULT, + PrimitiveLongCollections.iterator( duplicateNodeIds ), neoStores.getNodeStore(), neoStores.getPropertyStore(), monitor ) ) + { + control.steps( step ); + startAndAwaitCompletionOf( step ); + } + control.assertHealthy(); + + // then + int expectedNodes = 0; + int expectedProperties = 0; + for ( Ids entity : ids ) + { + boolean expectedToBeInUse = !PrimitiveLongCollections.contains( duplicateNodeIds, entity.node.getId() ); + int stride = expectedToBeInUse ? 1 : 0; + expectedNodes += stride; + + // Verify node record + assertEquals( expectedToBeInUse, neoStores.getNodeStore().isInUse( entity.node.getId() ) ); + + // Verify label records + for ( DynamicRecord labelRecord : entity.node.getDynamicLabelRecords() ) + { + assertEquals( expectedToBeInUse, neoStores.getNodeStore().getDynamicLabelStore().isInUse( labelRecord.getId() ) ); + } + + // Verify property records + for ( PropertyRecord propertyRecord : entity.properties ) + { + assertEquals( expectedToBeInUse, neoStores.getPropertyStore().isInUse( propertyRecord.getId() ) ); + for ( PropertyBlock property : propertyRecord ) + { + // Verify property dynamic value records + for ( DynamicRecord valueRecord : property.getValueRecords() ) + { + AbstractDynamicStore valueStore; + switch ( property.getType() ) + { + case STRING: + valueStore = neoStores.getPropertyStore().getStringStore(); + break; + case ARRAY: + valueStore = neoStores.getPropertyStore().getArrayStore(); + break; + default: throw new IllegalArgumentException( propertyRecord + " " + property ); + } + assertEquals( expectedToBeInUse, valueStore.isInUse( valueRecord.getId() ) ); + } + expectedProperties += stride; + } + } + } + + assertEquals( expectedNodes, monitor.nodesImported() ); + assertEquals( expectedProperties, monitor.propertiesImported() ); + } + + private long[] randomNodes( Ids[] ids ) + { + long[] nodeIds = new long[ids.length]; + int cursor = 0; + for ( int i = 0; i < ids.length; i++ ) + { + if ( random.nextBoolean() ) + { + nodeIds[cursor++] = ids[i].node.getId(); + } + } + + // If none was selected, then pick just one + if ( cursor == 0 ) + { + nodeIds[cursor++] = random.among( ids ).node.getId(); + } + return Arrays.copyOf( nodeIds, cursor ); + } + + private static class Ids + { + private final NodeRecord node; + private final PropertyRecord[] properties; + + Ids( NodeRecord node, PropertyRecord[] properties ) + { + this.node = node; + this.properties = properties; + } + } + + private Ids createNode( DataImporter.Monitor monitor, NeoStores neoStores, int propertyCount, int labelCount ) + { + PropertyStore propertyStore = neoStores.getPropertyStore(); + RecordAccess propertyRecordAccess = + new DirectRecordAccess<>( propertyStore, new Loaders( neoStores ).propertyLoader() ); + NodeStore nodeStore = neoStores.getNodeStore(); + NodeRecord nodeRecord = nodeStore.newRecord(); + nodeRecord.setId( nodeStore.nextId() ); + nodeRecord.setInUse( true ); + NodeLabelsField.parseLabelsField( nodeRecord ).put( labelIds( labelCount ), nodeStore, nodeStore.getDynamicLabelStore() ); + long nextProp = new PropertyCreator( propertyStore, new PropertyTraverser() ) + .createPropertyChain( nodeRecord, properties( propertyStore, propertyCount ), propertyRecordAccess ); + nodeRecord.setNextProp( nextProp ); + nodeStore.updateRecord( nodeRecord ); + PropertyRecord[] propertyRecords = extractPropertyRecords( propertyRecordAccess, nextProp ); + propertyRecordAccess.close(); + monitor.nodesImported( 1 ); + monitor.propertiesImported( propertyCount ); + return new Ids( nodeRecord, propertyRecords ); + } + + private static PropertyRecord[] extractPropertyRecords( RecordAccess propertyRecordAccess, + long nextProp ) + { + List result = new ArrayList<>(); + while ( !Record.NULL_REFERENCE.is( nextProp ) ) + { + PropertyRecord record = propertyRecordAccess.getIfLoaded( nextProp ).forReadingLinkage(); + result.add( record ); + nextProp = record.getNextProp(); + } + return result.toArray( new PropertyRecord[result.size()] ); + } + + private Iterator properties( PropertyStore propertyStore, int propertyCount ) + { + return new PrefetchingIterator() + { + private int i; + + @Override + protected PropertyBlock fetchNextOrNull() + { + if ( i >= propertyCount ) + { + return null; + } + PropertyBlock block = new PropertyBlock(); + propertyStore.encodeValue( block, i, Values.of( random.propertyValue() ) ); + i++; + return block; + } + }; + } + + private static long[] labelIds( int labelCount ) + { + long[] result = new long[labelCount]; + for ( int i = 0; i < labelCount; i++ ) + { + result[i] = i; + } + return result; + } + + private static void startAndAwaitCompletionOf( DeleteDuplicateNodesStep step ) throws InterruptedException + { + step.start( 0 ); + step.receive( 0, null ); + while ( !step.isCompleted() ) + { + Thread.sleep( 10 ); + } + } +} diff --git a/community/primitive-collections/src/main/java/org/neo4j/collection/primitive/PrimitiveLongCollections.java b/community/primitive-collections/src/main/java/org/neo4j/collection/primitive/PrimitiveLongCollections.java index d5ceae7332424..c41c03b8f8e63 100644 --- a/community/primitive-collections/src/main/java/org/neo4j/collection/primitive/PrimitiveLongCollections.java +++ b/community/primitive-collections/src/main/java/org/neo4j/collection/primitive/PrimitiveLongCollections.java @@ -643,6 +643,18 @@ public static int count( PrimitiveLongIterator iterator ) return count; } + public static boolean contains( long[] values, long candidate ) + { + for ( int i = 0; i < values.length; i++ ) + { + if ( values[i] == candidate ) + { + return true; + } + } + return false; + } + public static long[] asArray( PrimitiveLongIterator iterator ) { long[] array = new long[8];