Skip to content

Commit

Permalink
A bit of tuning for the default check-point settings
Browse files Browse the repository at this point in the history
When a check-point is running, it consumes IO, and this may negatively affect
the performance of the database. Especially on platforms where IO performance
is low (like rotating hard disk drives) or throttled (like AWS EBS volumes).
Therefor, I have tried to modify the defaults to minimase this influence:

* Tripled the check-point inteval so it runs every 15 minutes instead of every
  5 minutes. I have left the 100.000 transaction threshold the same, though.
* Cut the default IOPS limit by two thirds. I observed that we never appeared
  to reach the 1.000 IOPS default on AWS EBS anyway. Instead we were only
  reaching about 600 to 700 IOPS. The new default is half of this.
* Add a random offset to the initial check-point time, so instances that start
  in a cluster simultaneusly will attempt to schedule their check-point cycles
  out of phase.
  • Loading branch information
chrisvest committed Dec 5, 2016
1 parent 913d665 commit 66e6a65
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,55 @@
*/
package org.neo4j.graphdb.factory;

import java.io.File;
import java.util.List;
import java.util.stream.Collectors;

import org.neo4j.graphdb.config.Setting;
import org.neo4j.helpers.AdvertisedSocketAddress;
import org.neo4j.helpers.ListenSocketAddress;
import org.neo4j.io.ByteUnit;
import org.neo4j.kernel.configuration.*;
import org.neo4j.kernel.configuration.Config;
import org.neo4j.kernel.configuration.ConfigurationMigrator;
import org.neo4j.kernel.configuration.GraphDatabaseConfigurationMigrator;
import org.neo4j.kernel.configuration.Group;
import org.neo4j.kernel.configuration.GroupSettingSupport;
import org.neo4j.kernel.configuration.Internal;
import org.neo4j.kernel.configuration.Migrator;
import org.neo4j.kernel.configuration.Settings;
import org.neo4j.kernel.configuration.Title;
import org.neo4j.kernel.impl.cache.MonitorGc;
import org.neo4j.logging.Level;

import java.io.File;
import java.util.List;
import java.util.stream.Collectors;

import static org.neo4j.graphdb.factory.GraphDatabaseSettings.BoltConnector.EncryptionLevel.OPTIONAL;
import static org.neo4j.graphdb.factory.GraphDatabaseSettings.Connector.ConnectorType.BOLT;
import static org.neo4j.kernel.configuration.GroupSettingSupport.enumerate;
import static org.neo4j.kernel.configuration.Settings.*;
import static org.neo4j.kernel.configuration.Settings.ANY;
import static org.neo4j.kernel.configuration.Settings.BOOLEAN;
import static org.neo4j.kernel.configuration.Settings.BYTES;
import static org.neo4j.kernel.configuration.Settings.DEFAULT;
import static org.neo4j.kernel.configuration.Settings.DOUBLE;
import static org.neo4j.kernel.configuration.Settings.DURATION;
import static org.neo4j.kernel.configuration.Settings.FALSE;
import static org.neo4j.kernel.configuration.Settings.INTEGER;
import static org.neo4j.kernel.configuration.Settings.LONG;
import static org.neo4j.kernel.configuration.Settings.NO_DEFAULT;
import static org.neo4j.kernel.configuration.Settings.PATH;
import static org.neo4j.kernel.configuration.Settings.STRING;
import static org.neo4j.kernel.configuration.Settings.STRING_LIST;
import static org.neo4j.kernel.configuration.Settings.TRUE;
import static org.neo4j.kernel.configuration.Settings.advertisedAddress;
import static org.neo4j.kernel.configuration.Settings.derivedSetting;
import static org.neo4j.kernel.configuration.Settings.illegalValueMessage;
import static org.neo4j.kernel.configuration.Settings.legacyFallback;
import static org.neo4j.kernel.configuration.Settings.list;
import static org.neo4j.kernel.configuration.Settings.listenAddress;
import static org.neo4j.kernel.configuration.Settings.matches;
import static org.neo4j.kernel.configuration.Settings.max;
import static org.neo4j.kernel.configuration.Settings.min;
import static org.neo4j.kernel.configuration.Settings.options;
import static org.neo4j.kernel.configuration.Settings.pathSetting;
import static org.neo4j.kernel.configuration.Settings.setting;

/**
* Settings for Neo4j. Use this with {@link GraphDatabaseBuilder}.
Expand Down Expand Up @@ -241,7 +274,7 @@ public abstract class GraphDatabaseSettings
"of a crash. On the other hand, a longer check-point interval can also reduce the I/O load that " +
"the database places on the system, as each check-point implies a flushing and forcing of all the " +
"store files." )
public static final Setting<Long> check_point_interval_time = setting( "dbms.checkpoint.interval.time", DURATION, "5m" );
public static final Setting<Long> check_point_interval_time = setting( "dbms.checkpoint.interval.time", DURATION, "15m" );

@Description( "Limit the number of IOs the background checkpoint process will consume per second. " +
"This setting is advisory, is ignored in Neo4j Community Edition, and is followed to " +
Expand All @@ -255,7 +288,7 @@ public abstract class GraphDatabaseSettings
"The configuration can also be commented out to remove the limitation entirely, and " +
"let the checkpointer flush data as fast as the hardware will go. " +
"Set this to -1 to disable the IOPS limit.")
public static final Setting<Integer> check_point_iops_limit = setting( "dbms.checkpoint.iops.limit", INTEGER, "1000" );
public static final Setting<Integer> check_point_iops_limit = setting( "dbms.checkpoint.iops.limit", INTEGER, "300" );

// Auto Indexing
@Description("Controls the auto indexing feature for nodes. Setting it to `false` shuts it down, " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.neo4j.kernel.impl.transaction.log.checkpoint;

import java.time.Clock;
import java.util.concurrent.ThreadLocalRandom;

public class TimeCheckPointThreshold extends AbstractCheckPointThreshold
{
Expand All @@ -33,7 +34,9 @@ public TimeCheckPointThreshold( long timeMillisThreshold, Clock clock )
{
this.timeMillisThreshold = timeMillisThreshold;
this.clock = clock;
this.nextCheckPointTime = clock.millis() + timeMillisThreshold;
// The random start offset means database in a cluster will not all check-point at the same time.
long randomStartOffset = ThreadLocalRandom.current().nextLong( timeMillisThreshold );
this.nextCheckPointTime = clock.millis() + timeMillisThreshold + randomStartOffset;

}

Expand Down

0 comments on commit 66e6a65

Please sign in to comment.