Skip to content

Commit 95f520d

Browse files
Unlock and sync secondary (#1963)
Signed-off-by: sayedppqq <sayed@appscode.com>
1 parent ff442bc commit 95f520d

File tree

3 files changed

+321
-147
lines changed

3 files changed

+321
-147
lines changed

pkg/backup.go

Lines changed: 36 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
494494
}
495495

496496
// sharded cluster. so disable the balancer first. then perform the 'usual' tasks.
497-
primary, secondary, err := getPrimaryNSecondaryMember(parameters.ConfigServer)
497+
primary, secondary, secondaryMembers, err := getPrimaryNSecondaryMember(parameters.ConfigServer)
498498
if err != nil {
499499
return nil, err
500500
}
@@ -516,7 +516,19 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
516516
backupHost = secondary
517517
}
518518

519-
err = lockConfigServer(parameters.ConfigServer, secondary)
519+
// Check if secondary is already locked before locking it.
520+
// If yes, unlock it and sync with primary
521+
for _, secondary := range secondaryMembers {
522+
if err := checkIfSecondaryLockedAndSync(secondary); err != nil {
523+
return nil, err
524+
}
525+
}
526+
527+
if err := setupConfigServer(parameters.ConfigServer, secondary); err != nil {
528+
return nil, err
529+
}
530+
531+
err = lockSecondaryMember(secondary)
520532

521533
cleanupFuncs = append(cleanupFuncs, func() error {
522534
// even if error occurs, try to unlock the server
@@ -541,7 +553,7 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
541553
}
542554
}
543555
// do the task
544-
primary, secondary, err := getPrimaryNSecondaryMember(host)
556+
primary, secondary, secondaryMembers, err := getPrimaryNSecondaryMember(host)
545557
if err != nil {
546558
klog.Errorf("error while getting primary and secondary member of %v. error: %v", host, err)
547559
return nil, err
@@ -554,6 +566,14 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
554566
backupHost = secondary
555567
}
556568

569+
// Check if secondary is already locked before locking it.
570+
// If yes, unlock it and sync with primary
571+
for _, secondary := range secondaryMembers {
572+
if err := checkIfSecondaryLockedAndSync(secondary); err != nil {
573+
return nil, err
574+
}
575+
}
576+
557577
err = lockSecondaryMember(secondary)
558578
cleanupFuncs = append(cleanupFuncs, func() error {
559579
// even if error occurs, try to unlock the server
@@ -635,11 +655,10 @@ func getSSLUser(path string) (string, error) {
635655
return strings.TrimSpace(user), nil
636656
}
637657

638-
func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, err error) {
658+
func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, secondaryMembers []string, err error) {
639659
klog.Infoln("finding primary and secondary instances of", mongoDSN)
640660
v := make(map[string]interface{})
641661

642-
// stop balancer
643662
args := append([]interface{}{
644663
"config",
645664
"--host", mongoDSN,
@@ -648,33 +667,35 @@ func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, err
648667
}, mongoCreds...)
649668
// even --quiet doesn't skip replicaset PrimaryConnection log. so take tha last line. issue tracker: https://jira.mongodb.org/browse/SERVER-27159
650669
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
651-
return "", "", err
670+
return "", "", secondaryMembers, err
652671
}
653672

654673
primary, ok := v["primary"].(string)
655674
if !ok || primary == "" {
656-
return "", "", fmt.Errorf("unable to get primary instance using rs.isMaster(). got response: %v", v)
675+
return "", "", secondaryMembers, fmt.Errorf("unable to get primary instance using rs.isMaster(). got response: %v", v)
657676
}
658677

659678
hosts, ok := v["hosts"].([]interface{})
660679
if !ok {
661-
return "", "", fmt.Errorf("unable to get hosts using rs.isMaster(). got response: %v", v)
680+
return "", "", secondaryMembers, fmt.Errorf("unable to get hosts using rs.isMaster(). got response: %v", v)
662681
}
663682

664683
for _, host := range hosts {
665-
secHost, ok := host.(string)
666-
if !ok || secHost == "" {
684+
curHost, ok := host.(string)
685+
686+
if !ok || curHost == "" {
667687
err = fmt.Errorf("unable to get secondary instance using rs.isMaster(). got response: %v", v)
668688
continue
669689
}
670-
671-
if secHost != primary {
672-
klog.Infof("Primary %s & Secondary %s found for mongoDSN %s \n", primary, secHost, mongoDSN)
673-
return primary, secHost, nil
690+
if curHost != primary {
691+
secondaryMembers = append(secondaryMembers, curHost)
674692
}
675693
}
694+
if len(secondaryMembers) > 0 {
695+
return primary, secondaryMembers[0], secondaryMembers, err
696+
}
676697

677-
return primary, "", err
698+
return primary, "", secondaryMembers, err
678699
}
679700

680701
// run from mongos instance
@@ -762,138 +783,6 @@ func enableBalancer(mongosHost string) error {
762783
return nil
763784
}
764785

765-
func lockConfigServer(configSVRDSN, secondaryHost string) error {
766-
klog.Infoln("Attempting to lock configserver", configSVRDSN)
767-
768-
if secondaryHost == "" {
769-
klog.Warningln("locking configserver is skipped. secondary host is empty")
770-
return nil
771-
}
772-
v := make(map[string]interface{})
773-
// findAndModify BackupControlDocument. skip single quote inside single quote: https://stackoverflow.com/a/28786747/4628962
774-
args := append([]interface{}{
775-
"config",
776-
"--host", configSVRDSN,
777-
"--quiet",
778-
"--eval", "db.BackupControl.findAndModify({query: { _id: 'BackupControlDocument' }, update: { $inc: { counter : 1 } }, new: true, upsert: true, writeConcern: { w: 'majority', wtimeout: 15000 }});",
779-
}, mongoCreds...)
780-
781-
output, err := sh.Command(MongoCMD, args...).Output()
782-
if err != nil {
783-
klog.Errorf("Error while running findAndModify to lock configServer : %s ; output : %s \n", err.Error(), output)
784-
return err
785-
}
786-
787-
err = json.Unmarshal(output, &v)
788-
if err != nil {
789-
klog.Errorf("Unmarshal error while running findAndModify to lock configServer : %s \n", err.Error())
790-
return err
791-
}
792-
val, ok := v["counter"].(float64)
793-
if !ok || int(val) == 0 {
794-
return fmt.Errorf("unable to modify BackupControlDocument. got response: %v", v)
795-
}
796-
val2 := float64(0)
797-
timer := 0 // wait approximately 5 minutes.
798-
for timer < 60 && (int(val2) == 0 || int(val) != int(val2)) {
799-
timer++
800-
// find backupDocument from secondary configServer
801-
args = append([]interface{}{
802-
"config",
803-
"--host", secondaryHost,
804-
"--quiet",
805-
"--eval", "rs.secondaryOk(); db.BackupControl.find({ '_id' : 'BackupControlDocument' }).readConcern('majority');",
806-
}, mongoCreds...)
807-
808-
if err := sh.Command(MongoCMD, args...).UnmarshalJSON(&v); err != nil {
809-
return err
810-
}
811-
812-
val2, ok = v["counter"].(float64)
813-
if !ok {
814-
return fmt.Errorf("unable to get BackupControlDocument. got response: %v", v)
815-
}
816-
if int(val) != int(val2) {
817-
klog.V(5).Infof("BackupDocument counter in secondary is not same. Expected %v, but got %v. Full response: %v", val, val2, v)
818-
time.Sleep(time.Second * 5)
819-
}
820-
}
821-
if timer >= 60 {
822-
return fmt.Errorf("timeout while waiting for BackupDocument counter in secondary to be same as primary. Expected %v, but got %v. Full response: %v", val, val2, v)
823-
}
824-
// lock secondary
825-
return lockSecondaryMember(secondaryHost)
826-
}
827-
828-
func lockSecondaryMember(mongohost string) error {
829-
klog.Infoln("Attempting to lock secondary member", mongohost)
830-
if mongohost == "" {
831-
klog.Warningln("locking secondary member is skipped. secondary host is empty")
832-
return nil
833-
}
834-
v := make(map[string]interface{})
835-
836-
// lock file
837-
args := append([]interface{}{
838-
"config",
839-
"--host", mongohost,
840-
"--quiet",
841-
"--eval", "JSON.stringify(db.fsyncLock())",
842-
}, mongoCreds...)
843-
844-
output, err := sh.Command(MongoCMD, args...).Output()
845-
if err != nil {
846-
klog.Errorf("Error while running fsyncLock on secondary : %s ; output : %s \n", err.Error(), output)
847-
return err
848-
}
849-
850-
err = json.Unmarshal(output, &v)
851-
if err != nil {
852-
klog.Errorf("Unmarshal error while running fsyncLock on secondary : %s \n", err.Error())
853-
return err
854-
}
855-
856-
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
857-
return fmt.Errorf("unable to lock the secondary host. got response: %v", v)
858-
}
859-
klog.Infof("secondary %s locked.", mongohost)
860-
return nil
861-
}
862-
863-
func unlockSecondaryMember(mongohost string) error {
864-
klog.Infoln("Attempting to unlock secondary member", mongohost)
865-
if mongohost == "" {
866-
klog.Warningln("skipped unlocking secondary member. secondary host is empty")
867-
return nil
868-
}
869-
v := make(map[string]interface{})
870-
871-
// unlock file
872-
args := append([]interface{}{
873-
"config",
874-
"--host", mongohost,
875-
"--quiet",
876-
"--eval", "JSON.stringify(db.fsyncUnlock())",
877-
}, mongoCreds...)
878-
879-
output, err := sh.Command(MongoCMD, args...).Output()
880-
if err != nil {
881-
klog.Errorf("Error while running fsyncUnlock on secondary : %s ; output : %s \n", err.Error(), output)
882-
return err
883-
}
884-
err = json.Unmarshal(output, &v)
885-
if err != nil {
886-
klog.Errorf("Unmarshal error while running fsyncUnlock on secondary : %s \n", err.Error())
887-
return err
888-
}
889-
890-
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
891-
return fmt.Errorf("unable to lock the secondary host. got response: %v", v)
892-
}
893-
klog.Infof("secondary %s unlocked.", mongohost)
894-
return nil
895-
}
896-
897786
func checkRoleExists(mongoDSN string) (bool, error) {
898787
v := make(map[string]interface{})
899788
args := append([]interface{}{

0 commit comments

Comments
 (0)