@@ -58,6 +58,11 @@ type Etcd struct {
58
58
59
59
args []string
60
60
client * etcd.Client
61
+
62
+ // if the new member was added as a learner during the service start, its ID is kept here
63
+ learnerMemberID uint64
64
+
65
+ promoteCtxCancel context.CancelFunc
61
66
}
62
67
63
68
// ID implements the Service interface.
@@ -95,6 +100,9 @@ func (e *Etcd) PreFunc(ctx context.Context, r runtime.Runtime) (err error) {
95
100
return fmt .Errorf ("failed to pull image %q: %w" , r .Config ().Cluster ().Etcd ().Image (), err )
96
101
}
97
102
103
+ // Clear any previously set learner member ID
104
+ e .learnerMemberID = 0
105
+
98
106
switch t := r .Config ().Machine ().Type (); t {
99
107
case machine .TypeInit :
100
108
return e .argsForInit (ctx , r )
@@ -111,6 +119,10 @@ func (e *Etcd) PreFunc(ctx context.Context, r runtime.Runtime) (err error) {
111
119
112
120
// PostFunc implements the Service interface.
113
121
func (e * Etcd ) PostFunc (r runtime.Runtime , state events.ServiceState ) (err error ) {
122
+ if e .promoteCtxCancel != nil {
123
+ e .promoteCtxCancel ()
124
+ }
125
+
114
126
if e .client != nil {
115
127
e .client .Close () //nolint:errcheck
116
128
}
@@ -157,6 +169,20 @@ func (e *Etcd) Runner(r runtime.Runtime) (runner.Runner, error) {
157
169
158
170
env = append (env , "ETCD_CIPHER_SUITES=TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305,TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305" ) //nolint:lll
159
171
172
+ if e .learnerMemberID != 0 {
173
+ var promoteCtx context.Context
174
+
175
+ promoteCtx , e .promoteCtxCancel = context .WithCancel (context .Background ())
176
+
177
+ go func () {
178
+ if err := promoteMember (promoteCtx , r , e .learnerMemberID ); err != nil && ! errors .Is (err , context .Canceled ) {
179
+ log .Printf ("failed promoting member: %s" , err )
180
+ } else if err == nil {
181
+ log .Printf ("successfully promoted etcd member" )
182
+ }
183
+ }()
184
+ }
185
+
160
186
return restart .New (containerd .NewRunner (
161
187
r .Config ().Debug (),
162
188
& args ,
@@ -304,7 +330,7 @@ func addMember(ctx context.Context, r runtime.Runtime, addrs []string, name stri
304
330
}
305
331
}
306
332
307
- add , err := client .MemberAdd (ctx , addrs )
333
+ add , err := client .MemberAddAsLearner (ctx , addrs )
308
334
if err != nil {
309
335
return nil , 0 , fmt .Errorf ("error adding member: %w" , err )
310
336
}
@@ -317,7 +343,9 @@ func addMember(ctx context.Context, r runtime.Runtime, addrs []string, name stri
317
343
return list , add .Member .ID , nil
318
344
}
319
345
320
- func buildInitialCluster (ctx context.Context , r runtime.Runtime , name , ip string ) (initial string , err error ) {
346
+ func buildInitialCluster (ctx context.Context , r runtime.Runtime , name , ip string ) (initial string , learnerMemberID uint64 , err error ) {
347
+ var id uint64
348
+
321
349
err = retry .Constant (10 * time .Minute ,
322
350
retry .WithUnits (3 * time .Second ),
323
351
retry .WithJitter (time .Second ),
@@ -326,7 +354,6 @@ func buildInitialCluster(ctx context.Context, r runtime.Runtime, name, ip string
326
354
var (
327
355
peerAddrs = []string {"https://" + net .FormatAddress (ip ) + ":2380" }
328
356
resp * clientv3.MemberListResponse
329
- id uint64
330
357
)
331
358
332
359
attemptCtx , attemptCtxCancel := context .WithTimeout (ctx , 30 * time .Second )
@@ -362,10 +389,10 @@ func buildInitialCluster(ctx context.Context, r runtime.Runtime, name, ip string
362
389
})
363
390
364
391
if err != nil {
365
- return "" , fmt .Errorf ("failed to build cluster arguments: %w" , err )
392
+ return "" , 0 , fmt .Errorf ("failed to build cluster arguments: %w" , err )
366
393
}
367
394
368
- return initial , nil
395
+ return initial , id , nil
369
396
}
370
397
371
398
//nolint:gocyclo
@@ -441,7 +468,7 @@ func (e *Etcd) argsForInit(ctx context.Context, r runtime.Runtime) error {
441
468
if upgraded {
442
469
denyListArgs .Set ("initial-cluster-state" , "existing" )
443
470
444
- initialCluster , err = buildInitialCluster (ctx , r , hostname , primaryAddr )
471
+ initialCluster , e . learnerMemberID , err = buildInitialCluster (ctx , r , hostname , primaryAddr )
445
472
if err != nil {
446
473
return err
447
474
}
@@ -534,7 +561,7 @@ func (e *Etcd) argsForControlPlane(ctx context.Context, r runtime.Runtime) error
534
561
if e .Bootstrap {
535
562
initialCluster = fmt .Sprintf ("%s=https://%s:2380" , hostname , net .FormatAddress (primaryAddr ))
536
563
} else {
537
- initialCluster , err = buildInitialCluster (ctx , r , hostname , primaryAddr )
564
+ initialCluster , e . learnerMemberID , err = buildInitialCluster (ctx , r , hostname , primaryAddr )
538
565
if err != nil {
539
566
return fmt .Errorf ("failed to build initial etcd cluster: %w" , err )
540
567
}
@@ -591,6 +618,27 @@ func (e *Etcd) recoverFromSnapshot(hostname, primaryAddr string) error {
591
618
return nil
592
619
}
593
620
621
+ func promoteMember (ctx context.Context , r runtime.Runtime , memberID uint64 ) error {
622
+ // try to promote a member until it succeeds (call might fail until the member catches up with the leader)
623
+ // promote member call will fail until member catches up with the master
624
+ return retry .Constant (10 * time .Minute ,
625
+ retry .WithUnits (10 * time .Second ),
626
+ retry .WithJitter (time .Second ),
627
+ retry .WithErrorLogging (true ),
628
+ ).RetryWithContext (ctx , func (ctx context.Context ) error {
629
+ client , err := etcd .NewClientFromControlPlaneIPs (ctx , r .Config ().Cluster ().CA (), r .Config ().Cluster ().Endpoint ())
630
+ if err != nil {
631
+ return retry .ExpectedError (err )
632
+ }
633
+
634
+ defer client .Close () //nolint:errcheck
635
+
636
+ _ , err = client .MemberPromote (ctx , memberID )
637
+
638
+ return retry .ExpectedError (err )
639
+ })
640
+ }
641
+
594
642
// IsDirEmpty checks if a directory is empty or not.
595
643
func IsDirEmpty (name string ) (bool , error ) {
596
644
f , err := os .Open (name )
0 commit comments