Skip to content

Commit 3e23f10

Browse files
authored
[cherry-pick] Add stopBalancer timeout; Retry setBalancerState; Improve logging (#1929) (#1940)
/cherry-pick Signed-off-by: Arnob kumar saha <arnob@appscode.com>
1 parent 48c97d7 commit 3e23f10

File tree

1 file changed

+71
-9
lines changed

1 file changed

+71
-9
lines changed

pkg/backup.go

Lines changed: 71 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,7 @@ func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, err
607607
}
608608

609609
if secHost != primary {
610+
klog.Infof("Primary %s & Secondary %s found for mongoDSN %s \n", primary, secHost, mongoDSN)
610611
return primary, secHost, nil
611612
}
612613
}
@@ -623,10 +624,18 @@ func disabelBalancer(mongosHost string) error {
623624
"config",
624625
"--host", mongosHost,
625626
"--quiet",
626-
"--eval", "JSON.stringify(sh.stopBalancer())",
627+
"--eval", "JSON.stringify(sh.stopBalancer(600000,1000))",
627628
}, mongoCreds...)
628629
// disable balancer
629-
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
630+
output, err := sh.Command(MongoCMD, args...).Output()
631+
if err != nil {
632+
klog.Errorf("Error while stopping balancer : %s ; output : %s \n", err.Error(), output)
633+
return err
634+
}
635+
636+
err = json.Unmarshal(output, &v)
637+
if err != nil {
638+
klog.Errorf("Unmarshal error while stopping balancer : %s ; output = %s \n", err.Error(), output)
630639
return err
631640
}
632641

@@ -642,8 +651,10 @@ func disabelBalancer(mongosHost string) error {
642651
"--eval", "while(sh.isBalancerRunning()){ print('waiting for balancer to stop...'); sleep(1000);}",
643652
}, mongoCreds...)
644653
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").Run(); err != nil {
654+
klog.Errorf("Error while waiting for the balancer to stop : %s \n", err.Error())
645655
return err
646656
}
657+
klog.Info("Balancer successfully Disabled.")
647658
return nil
648659
}
649660

@@ -659,14 +670,33 @@ func enableBalancer(mongosHost string) error {
659670
"--quiet",
660671
"--eval", "JSON.stringify(sh.setBalancerState(true))",
661672
}, mongoCreds...)
662-
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
673+
674+
var (
675+
output []byte
676+
err error
677+
)
678+
cmd := sh.Command(MongoCMD, args...)
679+
for i := 0; i < 10; i++ {
680+
output, err = cmd.Output()
681+
if err != nil {
682+
klog.Errorf("Try #%d : Error on setBalancerState command : %s, output : %s .\n", i, err.Error(), output)
683+
time.Sleep(time.Second)
684+
} else {
685+
break
686+
}
687+
}
688+
689+
err = json.Unmarshal(output, &v)
690+
if err != nil {
691+
klog.Errorf("Unmarshal error while enabling balancer : %+v , output : %s \n", err.Error(), output)
663692
return err
664693
}
665694

666695
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
667-
return fmt.Errorf("unable to disable balancer. got response: %v", v)
696+
return fmt.Errorf("unable to enable balancer. got response: %v", v)
668697
}
669698

699+
klog.Info("Balancer successfully re-enabled.")
670700
return nil
671701
}
672702

@@ -685,7 +715,22 @@ func lockConfigServer(configSVRDSN, secondaryHost string) error {
685715
"--quiet",
686716
"--eval", "db.BackupControl.findAndModify({query: { _id: 'BackupControlDocument' }, update: { $inc: { counter : 1 } }, new: true, upsert: true, writeConcern: { w: 'majority', wtimeout: 15000 }});",
687717
}, mongoCreds...)
688-
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
718+
719+
output, err := sh.Command(MongoCMD, args...).Output()
720+
if err != nil {
721+
klog.Errorf("Error while running findAndModify to lock configServer : %s ; output : %s \n", err.Error(), output)
722+
return err
723+
}
724+
s := fmt.Sprintf(`/bin/echo '%s' | /usr/bin/tail -1`, strings.TrimSuffix(string(output), "\n"))
725+
output, err = sh.Command("/bin/sh", "-c", s).Output()
726+
if err != nil {
727+
klog.Errorf("Error while running tail in findAndModify to lock configServer : %s ; output : %s \n", err.Error(), output)
728+
return err
729+
}
730+
731+
err = json.Unmarshal(output, &v)
732+
if err != nil {
733+
klog.Errorf("Unmarshal error while running findAndModify to lock configServer : %s \n", err.Error())
689734
return err
690735
}
691736
val, ok := v["counter"].(float64)
@@ -739,14 +784,23 @@ func lockSecondaryMember(mongohost string) error {
739784
"--quiet",
740785
"--eval", "JSON.stringify(db.fsyncLock())",
741786
}, mongoCreds...)
742-
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
787+
788+
output, err := sh.Command(MongoCMD, args...).Output()
789+
if err != nil {
790+
klog.Errorf("Error while running fsyncLock on secondary : %s ; output : %s \n", err.Error(), output)
791+
return err
792+
}
793+
794+
err = json.Unmarshal(output, &v)
795+
if err != nil {
796+
klog.Errorf("Unmarshal error while running fsyncLock on secondary : %s \n", err.Error())
743797
return err
744798
}
745799

746800
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
747801
return fmt.Errorf("unable to lock the secondary host. got response: %v", v)
748802
}
749-
803+
klog.Infof("secondary %s locked.", mongohost)
750804
return nil
751805
}
752806

@@ -765,13 +819,21 @@ func unlockSecondaryMember(mongohost string) error {
765819
"--quiet",
766820
"--eval", "JSON.stringify(db.fsyncUnlock())",
767821
}, mongoCreds...)
768-
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
822+
823+
output, err := sh.Command(MongoCMD, args...).Output()
824+
if err != nil {
825+
klog.Errorf("Error while running fsyncUnlock on secondary : %s ; output : %s \n", err.Error(), output)
826+
return err
827+
}
828+
err = json.Unmarshal(output, &v)
829+
if err != nil {
830+
klog.Errorf("Unmarshal error while running fsyncUnlock on secondary : %s \n", err.Error())
769831
return err
770832
}
771833

772834
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
773835
return fmt.Errorf("unable to lock the secondary host. got response: %v", v)
774836
}
775-
837+
klog.Infof("secondary %s unlocked.", mongohost)
776838
return nil
777839
}

0 commit comments

Comments
 (0)