Skip to content

Commit

Permalink
Proper progress even for double record units
Browse files Browse the repository at this point in the history
  • Loading branch information
tinwelint committed Dec 4, 2017
1 parent cddfa8b commit 6e328e9
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 14 deletions.
Expand Up @@ -36,8 +36,6 @@ public interface Input
{
interface Estimates
{
long UNKNOWN = -1;

/**
* @return estimated number of nodes for the entire input.
*/
Expand Down
Expand Up @@ -215,7 +215,6 @@ ESTIMATED_REQUIRED_MEMORY_USAGE, bytes(
private void initializeRelationshipImport( Estimates estimates, IdMapper idMapper, BatchingNeoStores neoStores )
{
long numberOfRelationships = estimates.numberOfRelationships();
// TODO how to handle UNKNOWN?
printStageHeader( "(2/4) Relationship import",
ESTIMATED_NUMBER_OF_RELATIONSHIPS, count( numberOfRelationships ),
ESTIMATED_DISK_SPACE_USAGE, bytes(
Expand All @@ -234,28 +233,34 @@ private void initializeLinking( BatchingNeoStores neoStores,
ESTIMATED_REQUIRED_MEMORY_USAGE, bytes(
baselineMemoryRequirement( neoStores ) +
defensivelyPadMemoryEstimate( nodeRelationshipCache.calculateMemoryUsage( distribution.getNodeCount() ) ) ) );
// The reason the highId of the relationship store is used, as opposed to actual number of imported relationships
// is that the stages underneath operate on id ranges, not knowing which records are actually in use.
long relationshipRecordIdCount = neoStores.getRelationshipStore().getHighId();
// The progress counting of linking stages is special anyway, in that it uses the "progress" stats key,
// which is based on actual number of relationships, not relationship ids.
long actualRelationshipCount = distribution.getRelationshipCount();
initializeProgress(
actualRelationshipCount + // node degrees
relationshipRecordIdCount + // node degrees
actualRelationshipCount * 2 + // start/end forwards, see RelationshipLinkingProgress
actualRelationshipCount * 2 // start/end backwards, see RelationshipLinkingProgress
);
}

private void initializeMisc( BatchingNeoStores stores, DataStatistics distribution )
private void initializeMisc( BatchingNeoStores neoStores, DataStatistics distribution )
{
printStageHeader( "(4/4) Post processing",
ESTIMATED_REQUIRED_MEMORY_USAGE, bytes( baselineMemoryRequirement( stores ) ) );
// written groups + node counts + relationship counts
ESTIMATED_REQUIRED_MEMORY_USAGE, bytes( baselineMemoryRequirement( neoStores ) ) );
long actualNodeCount = distribution.getNodeCount();
long actualRelationshipCount = distribution.getRelationshipCount();
long groupCount = stores.getTemporaryRelationshipGroupStore().getHighId();
// The reason the highId of the relationship store is used, as opposed to actual number of imported relationships
// is that the stages underneath operate on id ranges, not knowing which records are actually in use.
long relationshipRecordIdCount = neoStores.getRelationshipStore().getHighId();
long groupCount = neoStores.getTemporaryRelationshipGroupStore().getHighId();
initializeProgress(
groupCount + // Count groups
groupCount + // Write groups
groupCount + // Node --> Group
actualNodeCount + // Node counts
actualRelationshipCount ); // Relationship counts
groupCount + // Count groups
groupCount + // Write groups
groupCount + // Node --> Group
actualNodeCount + // Node counts
relationshipRecordIdCount ); // Relationship counts
}

private static long defensivelyPadMemoryEstimate( long bytes )
Expand Down

0 comments on commit 6e328e9

Please sign in to comment.