Skip to content

Commit b20054b

Browse files
Unlock and sync secondary (#1962)
Signed-off-by: sayedppqq <sayed@appscode.com>
1 parent f935717 commit b20054b

File tree

3 files changed

+321
-149
lines changed

3 files changed

+321
-149
lines changed

pkg/backup.go

Lines changed: 36 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
495495
}
496496

497497
// sharded cluster. so disable the balancer first. then perform the 'usual' tasks.
498-
primary, secondary, err := getPrimaryNSecondaryMember(parameters.ConfigServer)
498+
primary, secondary, secondaryMembers, err := getPrimaryNSecondaryMember(parameters.ConfigServer)
499499
if err != nil {
500500
return nil, err
501501
}
@@ -517,7 +517,19 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
517517
backupHost = secondary
518518
}
519519

520-
err = lockConfigServer(parameters.ConfigServer, secondary)
520+
// Check if secondary is already locked before locking it.
521+
// If yes, unlock it and sync with primary
522+
for _, secondary := range secondaryMembers {
523+
if err := checkIfSecondaryLockedAndSync(secondary); err != nil {
524+
return nil, err
525+
}
526+
}
527+
528+
if err := setupConfigServer(parameters.ConfigServer, secondary); err != nil {
529+
return nil, err
530+
}
531+
532+
err = lockSecondaryMember(secondary)
521533

522534
cleanupFuncs = append(cleanupFuncs, func() error {
523535
// even if error occurs, try to unlock the server
@@ -543,7 +555,7 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
543555
}
544556

545557
// do the task
546-
primary, secondary, err := getPrimaryNSecondaryMember(host)
558+
primary, secondary, secondaryMembers, err := getPrimaryNSecondaryMember(host)
547559
if err != nil {
548560
klog.Errorf("error while getting primary and secondary member of %v. error: %v", host, err)
549561
return nil, err
@@ -556,6 +568,14 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
556568
backupHost = secondary
557569
}
558570

571+
// Check if secondary is already locked before locking it.
572+
// If yes, unlock it and sync with primary
573+
for _, secondary := range secondaryMembers {
574+
if err := checkIfSecondaryLockedAndSync(secondary); err != nil {
575+
return nil, err
576+
}
577+
}
578+
559579
err = lockSecondaryMember(secondary)
560580
cleanupFuncs = append(cleanupFuncs, func() error {
561581
// even if error occurs, try to unlock the server
@@ -633,11 +653,10 @@ func getSSLUser(path string) (string, error) {
633653
return strings.TrimSpace(user), nil
634654
}
635655

636-
func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, err error) {
656+
func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, secondaryMembers []string, err error) {
637657
klog.Infoln("finding primary and secondary instances of", mongoDSN)
638658
v := make(map[string]interface{})
639659

640-
// stop balancer
641660
args := append([]interface{}{
642661
"config",
643662
"--host", mongoDSN,
@@ -646,33 +665,35 @@ func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, err
646665
}, mongoCreds...)
647666
// even --quiet doesn't skip replicaset PrimaryConnection log. so take tha last line. issue tracker: https://jira.mongodb.org/browse/SERVER-27159
648667
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
649-
return "", "", err
668+
return "", "", secondaryMembers, err
650669
}
651670

652671
primary, ok := v["primary"].(string)
653672
if !ok || primary == "" {
654-
return "", "", fmt.Errorf("unable to get primary instance using rs.isMaster(). got response: %v", v)
673+
return "", "", secondaryMembers, fmt.Errorf("unable to get primary instance using rs.isMaster(). got response: %v", v)
655674
}
656675

657676
hosts, ok := v["hosts"].([]interface{})
658677
if !ok {
659-
return "", "", fmt.Errorf("unable to get hosts using rs.isMaster(). got response: %v", v)
678+
return "", "", secondaryMembers, fmt.Errorf("unable to get hosts using rs.isMaster(). got response: %v", v)
660679
}
661680

662681
for _, host := range hosts {
663-
secHost, ok := host.(string)
664-
if !ok || secHost == "" {
682+
curHost, ok := host.(string)
683+
684+
if !ok || curHost == "" {
665685
err = fmt.Errorf("unable to get secondary instance using rs.isMaster(). got response: %v", v)
666686
continue
667687
}
668-
669-
if secHost != primary {
670-
klog.Infof("Primary %s & Secondary %s found for mongoDSN %s \n", primary, secHost, mongoDSN)
671-
return primary, secHost, nil
688+
if curHost != primary {
689+
secondaryMembers = append(secondaryMembers, curHost)
672690
}
673691
}
692+
if len(secondaryMembers) > 0 {
693+
return primary, secondaryMembers[0], secondaryMembers, err
694+
}
674695

675-
return primary, "", err
696+
return primary, "", secondaryMembers, err
676697
}
677698

678699
// run from mongos instance
@@ -760,140 +781,6 @@ func enableBalancer(mongosHost string) error {
760781
return nil
761782
}
762783

763-
func lockConfigServer(configSVRDSN, secondaryHost string) error {
764-
klog.Infoln("Attempting to lock configserver", configSVRDSN)
765-
766-
if secondaryHost == "" {
767-
klog.Warningln("locking configserver is skipped. secondary host is empty")
768-
return nil
769-
}
770-
v := make(map[string]interface{})
771-
// findAndModify BackupControlDocument. skip single quote inside single quote: https://stackoverflow.com/a/28786747/4628962
772-
args := append([]interface{}{
773-
"config",
774-
"--host", configSVRDSN,
775-
"--quiet",
776-
"--eval", "JSON.stringify(db.BackupControl.findAndModify({query: { _id: 'BackupControlDocument' }, update: { $inc: { counter : 1 } }, new: true, upsert: true, writeConcern: { w: 'majority', wtimeout: 15000 }}));",
777-
}, mongoCreds...)
778-
779-
output, err := sh.Command(MongoCMD, args...).Output()
780-
if err != nil {
781-
klog.Errorf("Error while running findAndModify to lock configServer : %s ; output : %s \n", err.Error(), output)
782-
return err
783-
}
784-
785-
err = json.Unmarshal(output, &v)
786-
if err != nil {
787-
klog.Errorf("Unmarshal error while running findAndModify to lock configServer : %s \n", err.Error())
788-
return err
789-
}
790-
val, ok := v["counter"].(float64)
791-
if !ok || int(val) == 0 {
792-
return fmt.Errorf("unable to modify BackupControlDocument. got response: %v", v)
793-
}
794-
val2 := float64(0)
795-
timer := 0 // wait approximately 5 minutes.
796-
v2 := make([]map[string]interface{}, 0)
797-
for timer < 60 && (int(val2) == 0 || int(val) != int(val2)) {
798-
timer++
799-
// find backupDocument from secondary configServer
800-
args = append([]interface{}{
801-
"config",
802-
"--host", secondaryHost,
803-
"--quiet",
804-
"--eval", "rs.secondaryOk(); JSON.stringify(db.BackupControl.find({ '_id' : 'BackupControlDocument' }).readConcern('majority').toArray());",
805-
}, mongoCreds...)
806-
807-
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v2); err != nil {
808-
return err
809-
}
810-
if len(v2) > 0 {
811-
val2, ok = v2[0]["counter"].(float64)
812-
if !ok {
813-
return fmt.Errorf("unable to get BackupControlDocument. got response: %v", v)
814-
}
815-
}
816-
if int(val) != int(val2) {
817-
klog.V(5).Infof("BackupDocument counter in secondary is not same. Expected %v, but got %v. Full response: %v", val, val2, v)
818-
time.Sleep(time.Second * 5)
819-
}
820-
}
821-
if timer >= 60 {
822-
return fmt.Errorf("timeout while waiting for BackupDocument counter in secondary to be same as primary. Expected %v, but got %v. Full response: %v", val, val2, v)
823-
}
824-
// lock secondary
825-
return lockSecondaryMember(secondaryHost)
826-
}
827-
828-
func lockSecondaryMember(mongohost string) error {
829-
klog.Infoln("Attempting to lock secondary member", mongohost)
830-
if mongohost == "" {
831-
klog.Warningln("locking secondary member is skipped. secondary host is empty")
832-
return nil
833-
}
834-
v := make(map[string]interface{})
835-
836-
// lock file
837-
args := append([]interface{}{
838-
"config",
839-
"--host", mongohost,
840-
"--quiet",
841-
"--eval", "JSON.stringify(db.fsyncLock())",
842-
}, mongoCreds...)
843-
844-
output, err := sh.Command(MongoCMD, args...).Output()
845-
if err != nil {
846-
klog.Errorf("Error while running fsyncLock on secondary : %s ; output : %s \n", err.Error(), output)
847-
return err
848-
}
849-
850-
err = json.Unmarshal(output, &v)
851-
if err != nil {
852-
klog.Errorf("Unmarshal error while running fsyncLock on secondary : %s \n", err.Error())
853-
return err
854-
}
855-
856-
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
857-
return fmt.Errorf("unable to lock the secondary host. got response: %v", v)
858-
}
859-
klog.Infof("secondary %s locked.", mongohost)
860-
return nil
861-
}
862-
863-
func unlockSecondaryMember(mongohost string) error {
864-
klog.Infoln("Attempting to unlock secondary member", mongohost)
865-
if mongohost == "" {
866-
klog.Warningln("skipped unlocking secondary member. secondary host is empty")
867-
return nil
868-
}
869-
v := make(map[string]interface{})
870-
871-
// unlock file
872-
args := append([]interface{}{
873-
"config",
874-
"--host", mongohost,
875-
"--quiet",
876-
"--eval", "JSON.stringify(db.fsyncUnlock())",
877-
}, mongoCreds...)
878-
879-
output, err := sh.Command(MongoCMD, args...).Output()
880-
if err != nil {
881-
klog.Errorf("Error while running fsyncUnlock on secondary : %s ; output : %s \n", err.Error(), output)
882-
return err
883-
}
884-
err = json.Unmarshal(output, &v)
885-
if err != nil {
886-
klog.Errorf("Unmarshal error while running fsyncUnlock on secondary : %s \n", err.Error())
887-
return err
888-
}
889-
890-
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
891-
return fmt.Errorf("unable to lock the secondary host. got response: %v", v)
892-
}
893-
klog.Infof("secondary %s unlocked.", mongohost)
894-
return nil
895-
}
896-
897784
func checkRoleExists(mongoDSN string) (bool, error) {
898785
v := make(map[string]interface{})
899786
args := append([]interface{}{

0 commit comments

Comments
 (0)