From 487d25b4af2c4d3c52336ed06093989299a2a4df Mon Sep 17 00:00:00 2001
From: Lawrence Jones <lawrjone@gmail.com>
Date: Fri, 17 May 2019 11:20:08 +0100
Subject: [PATCH] Force checkpoint before rewind

Prior to running pg_rewind, force a checkpoint on the source database to
ensure the pg_control file used by pg_rewind is up-to-date. Failure to
do so can result in pg_rewind not detecting a timeline fork and exiting
with no action, leading stolon to fallback on pg_basebackup (which
itself will checkpoint) to ensure a resync is successful.
---
 cmd/keeper/cmd/keeper.go          |  3 ++-
 internal/postgresql/postgresql.go | 28 +++++++++++++++++++++++++++-
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/cmd/keeper/cmd/keeper.go b/cmd/keeper/cmd/keeper.go
index f354e91f3..9b49f81ec 100644
--- a/cmd/keeper/cmd/keeper.go
+++ b/cmd/keeper/cmd/keeper.go
@@ -811,7 +811,8 @@ func (p *PostgresKeeper) resync(db, followedDB *cluster.DB, tryPgrewind bool) er
 	if tryPgrewind && p.usePgrewind(db) {
 		connParams := p.getSUConnParams(db, followedDB)
 		log.Infow("syncing using pg_rewind", "followedDB", followedDB.UID, "keeper", followedDB.Spec.KeeperUID)
-		if err := pgm.SyncFromFollowedPGRewind(connParams, p.pgSUPassword); err != nil {
+		// TODO: Make the forceCheckpoint parameter use cluster specification
+		if err := pgm.SyncFromFollowedPGRewind(connParams, p.pgSUPassword, true); err != nil {
 			// log pg_rewind error and fallback to pg_basebackup
 			log.Errorw("error syncing with pg_rewind", zap.Error(err))
 		} else {
diff --git a/internal/postgresql/postgresql.go b/internal/postgresql/postgresql.go
index c7e0ab4a4..4ec54bbaf 100644
--- a/internal/postgresql/postgresql.go
+++ b/internal/postgresql/postgresql.go
@@ -754,7 +754,7 @@ func (p *Manager) createPostgresqlAutoConf() error {
 	return nil
 }
 
-func (p *Manager) SyncFromFollowedPGRewind(followedConnParams ConnParams, password string) error {
+func (p *Manager) SyncFromFollowedPGRewind(followedConnParams ConnParams, password string, forceCheckpoint bool) error {
 	// Remove postgresql.auto.conf since pg_rewind will error if it's a symlink to /dev/null
 	pgAutoConfPath := filepath.Join(p.dataDir, postgresAutoConf)
 	if err := os.Remove(pgAutoConfPath); err != nil && !os.IsNotExist(err) {
@@ -780,6 +780,32 @@ func (p *Manager) SyncFromFollowedPGRewind(followedConnParams ConnParams, passwo
 	followedConnParams.Set("options", "-c synchronous_commit=off")
 	followedConnString := followedConnParams.ConnString()
 
+	// TODO: Follow up with tests. We need to issue a checkpoint on the primary prior to us
+	// starting our recovery, as until the primary checkpoints the global/pg_control file
+	// won't contain up-to-date information about what timeline the primary exists in.
+	//
+	// Imagine everyone is on timeline 1, then we promote a node to timeline 2. Standbys
+	// attempt to replicate from the newly promoted node but fail due to diverged timelines.
+	// pg_rewind is then used to resync the standbys, but if the new primary hasn't yet
+	// checkpointed, the pg_control file will tell us we're both on the same timeline (1)
+	// and pg_rewind will exit without performing any action.
+	//
+	// If we checkpoint before invoking pg_rewind we will avoid this problem, at the slight
+	// cost of forcing a checkpoint on a newly promoted node, which might hurt performance.
+	// We (GoCardless) can't afford this, so we take the performance penalty to avoid hours
+	// of downtime.
+	if forceCheckpoint {
+		log.Infow("issuing checkpoint on primary")
+		psqlName := filepath.Join(p.pgBinPath, "psql")
+		cmd := exec.Command(psqlName, followedConnString, "-c", "CHECKPOINT;")
+		cmd.Env = append(os.Environ(), fmt.Sprintf("PGPASSFILE=%s", pgpass.Name()))
+		cmd.Stdout = os.Stdout
+		cmd.Stderr = os.Stderr
+		if err := cmd.Run(); err != nil {
+			return fmt.Errorf("error: %v", err)
+		}
+	}
+
 	log.Infow("running pg_rewind")
 	name := filepath.Join(p.pgBinPath, "pg_rewind")
 	cmd := exec.Command(name, "--debug", "-D", p.dataDir, "--source-server="+followedConnString)