-
Notifications
You must be signed in to change notification settings - Fork 224
/
failover.go
130 lines (108 loc) · 3.21 KB
/
failover.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
package postgres
import (
"context"
"fmt"
"time"
"github.com/avast/retry-go/v4"
"github.com/spf13/cobra"
"github.com/superfly/flyctl/agent"
"github.com/superfly/flyctl/client"
"github.com/superfly/flyctl/flaps"
"github.com/superfly/flyctl/flypg"
"github.com/superfly/flyctl/internal/app"
"github.com/superfly/flyctl/internal/command"
"github.com/superfly/flyctl/internal/command/apps"
"github.com/superfly/flyctl/internal/flag"
mach "github.com/superfly/flyctl/internal/machine"
"github.com/superfly/flyctl/internal/watch"
"github.com/superfly/flyctl/iostreams"
)
func newFailover() *cobra.Command {
const (
short = "Failover to a new primary"
long = short + "\n"
usage = "failover"
)
cmd := command.New(usage, short, long, runFailover,
command.RequireSession,
command.RequireAppName,
)
flag.Add(
cmd,
flag.App(),
flag.AppConfig(),
)
return cmd
}
func runFailover(ctx context.Context) (err error) {
var (
MinPostgresHaVersion = "0.0.20"
MinPostgresFlexVersion = "0.0.3"
MinPostgresStandaloneVersion = "0.0.7"
io = iostreams.FromContext(ctx)
client = client.FromContext(ctx).API()
appName = app.NameFromContext(ctx)
)
app, err := client.GetAppCompact(ctx, appName)
if err != nil {
return fmt.Errorf("get app: %w", err)
}
if !app.IsPostgresApp() {
return fmt.Errorf("app %s is not a Postgres app", app.Name)
}
if app.PlatformVersion != "machines" {
return fmt.Errorf("failover is only supported for machines apps")
}
ctx, err = apps.BuildContext(ctx, app)
if err != nil {
return err
}
machines, releaseFunc, err := mach.AcquireAllLeases(ctx)
defer releaseFunc(ctx, machines)
if err != nil {
return fmt.Errorf("machines could not be retrieved %w", err)
}
if err := hasRequiredVersionOnMachines(machines, MinPostgresHaVersion, MinPostgresFlexVersion, MinPostgresStandaloneVersion); err != nil {
return err
}
// You can not failerover for single node postgres
if len(machines) <= 1 {
return fmt.Errorf("failover is not available for standalone postgres")
}
leader, err := pickLeader(ctx, machines)
if err != nil {
return err
}
if leader.ImageRef.Repository == "flyio/postgres-flex" {
return fmt.Errorf("the 'flyio/postgres-flex' image does not currently support manual failovers")
}
flapsClient := flaps.FromContext(ctx)
dialer := agent.DialerFromContext(ctx)
pgclient := flypg.NewFromInstance(leader.PrivateIP, dialer)
fmt.Fprintf(io.Out, "Performing a failover\n")
if err := pgclient.Failover(ctx); err != nil {
return fmt.Errorf("failed to trigger failover %w", err)
}
// Wait until the leader lost its role
if err := retry.Do(
func() error {
var err error
leader, err = flapsClient.Get(ctx, leader.ID)
if err != nil {
return err
} else if machineRole(leader) == "leader" {
return fmt.Errorf("%s hasn't lost its leader role", leader.ID)
}
return nil
},
retry.Context(ctx), retry.Attempts(30), retry.Delay(time.Second), retry.DelayType(retry.FixedDelay),
); err != nil {
return err
}
// wait for health checks to pass
if err := watch.MachinesChecks(ctx, machines); err != nil {
return fmt.Errorf("failed to wait for health checks to pass: %w", err)
}
fmt.Fprintf(io.Out, "Failover complete\n")
return
}