Skip to content

Commit 5f2f88e

Browse files
authored
Add stopBalancer timeout; Retry setBalancerState; Improve logging (#1929) (#1944)
/cherry-pick Signed-off-by: Arnob kumar saha <arnob@appscode.com>
1 parent 33a86b5 commit 5f2f88e

File tree

1 file changed

+65
-9
lines changed

1 file changed

+65
-9
lines changed

pkg/backup.go

Lines changed: 65 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,7 @@ func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, err
667667
}
668668

669669
if secHost != primary {
670+
klog.Infof("Primary %s & Secondary %s found for mongoDSN %s \n", primary, secHost, mongoDSN)
670671
return primary, secHost, nil
671672
}
672673
}
@@ -683,10 +684,18 @@ func disabelBalancer(mongosHost string) error {
683684
"config",
684685
"--host", mongosHost,
685686
"--quiet",
686-
"--eval", "JSON.stringify(sh.stopBalancer())",
687+
"--eval", "JSON.stringify(sh.stopBalancer(600000,1000))",
687688
}, mongoCreds...)
688689
// disable balancer
689-
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
690+
output, err := sh.Command(MongoCMD, args...).Output()
691+
if err != nil {
692+
klog.Errorf("Error while stopping balancer : %s ; output : %s \n", err.Error(), output)
693+
return err
694+
}
695+
696+
err = json.Unmarshal(output, &v)
697+
if err != nil {
698+
klog.Errorf("Unmarshal error while stopping balancer : %s ; output = %s \n", err.Error(), output)
690699
return err
691700
}
692701

@@ -702,8 +711,10 @@ func disabelBalancer(mongosHost string) error {
702711
"--eval", "while(sh.isBalancerRunning().mode != 'off'){ print('waiting for balancer to stop...'); sleep(1000);}",
703712
}, mongoCreds...)
704713
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").Run(); err != nil {
714+
klog.Errorf("Error while waiting for the balancer to stop : %s \n", err.Error())
705715
return err
706716
}
717+
klog.Info("Balancer successfully Disabled.")
707718
return nil
708719
}
709720

@@ -719,14 +730,33 @@ func enableBalancer(mongosHost string) error {
719730
"--quiet",
720731
"--eval", "JSON.stringify(sh.setBalancerState(true))",
721732
}, mongoCreds...)
722-
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
733+
734+
var (
735+
output []byte
736+
err error
737+
)
738+
cmd := sh.Command(MongoCMD, args...)
739+
for i := 0; i < 10; i++ {
740+
output, err = cmd.Output()
741+
if err != nil {
742+
klog.Errorf("Try #%d : Error on setBalancerState command : %s, output : %s .\n", i, err.Error(), output)
743+
time.Sleep(time.Second)
744+
} else {
745+
break
746+
}
747+
}
748+
749+
err = json.Unmarshal(output, &v)
750+
if err != nil {
751+
klog.Errorf("Unmarshal error while enabling balancer : %+v , output : %s \n", err.Error(), output)
723752
return err
724753
}
725754

726755
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
727-
return fmt.Errorf("unable to disable balancer. got response: %v", v)
756+
return fmt.Errorf("unable to enable balancer. got response: %v", v)
728757
}
729758

759+
klog.Info("Balancer successfully re-enabled.")
730760
return nil
731761
}
732762

@@ -745,7 +775,16 @@ func lockConfigServer(configSVRDSN, secondaryHost string) error {
745775
"--quiet",
746776
"--eval", "JSON.stringify(db.BackupControl.findAndModify({query: { _id: 'BackupControlDocument' }, update: { $inc: { counter : 1 } }, new: true, upsert: true, writeConcern: { w: 'majority', wtimeout: 15000 }}));",
747777
}, mongoCreds...)
748-
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
778+
779+
output, err := sh.Command(MongoCMD, args...).Output()
780+
if err != nil {
781+
klog.Errorf("Error while running findAndModify to lock configServer : %s ; output : %s \n", err.Error(), output)
782+
return err
783+
}
784+
785+
err = json.Unmarshal(output, &v)
786+
if err != nil {
787+
klog.Errorf("Unmarshal error while running findAndModify to lock configServer : %s \n", err.Error())
749788
return err
750789
}
751790
val, ok := v["counter"].(float64)
@@ -801,14 +840,23 @@ func lockSecondaryMember(mongohost string) error {
801840
"--quiet",
802841
"--eval", "JSON.stringify(db.fsyncLock())",
803842
}, mongoCreds...)
804-
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
843+
844+
output, err := sh.Command(MongoCMD, args...).Output()
845+
if err != nil {
846+
klog.Errorf("Error while running fsyncLock on secondary : %s ; output : %s \n", err.Error(), output)
847+
return err
848+
}
849+
850+
err = json.Unmarshal(output, &v)
851+
if err != nil {
852+
klog.Errorf("Unmarshal error while running fsyncLock on secondary : %s \n", err.Error())
805853
return err
806854
}
807855

808856
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
809857
return fmt.Errorf("unable to lock the secondary host. got response: %v", v)
810858
}
811-
859+
klog.Infof("secondary %s locked.", mongohost)
812860
return nil
813861
}
814862

@@ -827,14 +875,22 @@ func unlockSecondaryMember(mongohost string) error {
827875
"--quiet",
828876
"--eval", "JSON.stringify(db.fsyncUnlock())",
829877
}, mongoCreds...)
830-
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
878+
879+
output, err := sh.Command(MongoCMD, args...).Output()
880+
if err != nil {
881+
klog.Errorf("Error while running fsyncUnlock on secondary : %s ; output : %s \n", err.Error(), output)
882+
return err
883+
}
884+
err = json.Unmarshal(output, &v)
885+
if err != nil {
886+
klog.Errorf("Unmarshal error while running fsyncUnlock on secondary : %s \n", err.Error())
831887
return err
832888
}
833889

834890
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
835891
return fmt.Errorf("unable to lock the secondary host. got response: %v", v)
836892
}
837-
893+
klog.Infof("secondary %s unlocked.", mongohost)
838894
return nil
839895
}
840896

0 commit comments

Comments
 (0)