Skip to content

Commit 70f9d53

Browse files
Unlock and sync secondary (#1964)
Signed-off-by: sayedppqq <sayed@appscode.com>
1 parent be10f0b commit 70f9d53

File tree

3 files changed

+321
-147
lines changed

3 files changed

+321
-147
lines changed

pkg/backup.go

Lines changed: 36 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -494,7 +494,7 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
494494
}
495495

496496
// sharded cluster. so disable the balancer first. then perform the 'usual' tasks.
497-
primary, secondary, err := getPrimaryNSecondaryMember(parameters.ConfigServer)
497+
primary, secondary, secondaryMembers, err := getPrimaryNSecondaryMember(parameters.ConfigServer)
498498
if err != nil {
499499
return nil, err
500500
}
@@ -516,7 +516,19 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
516516
backupHost = secondary
517517
}
518518

519-
err = lockConfigServer(parameters.ConfigServer, secondary)
519+
// Check if secondary is already locked before locking it.
520+
// If yes, unlock it and sync with primary
521+
for _, secondary := range secondaryMembers {
522+
if err := checkIfSecondaryLockedAndSync(secondary); err != nil {
523+
return nil, err
524+
}
525+
}
526+
527+
if err := setupConfigServer(parameters.ConfigServer, secondary); err != nil {
528+
return nil, err
529+
}
530+
531+
err = lockSecondaryMember(secondary)
520532

521533
cleanupFuncs = append(cleanupFuncs, func() error {
522534
// even if error occurs, try to unlock the server
@@ -548,7 +560,7 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
548560
}
549561
}
550562
// do the task
551-
primary, secondary, err := getPrimaryNSecondaryMember(host)
563+
primary, secondary, secondaryMembers, err := getPrimaryNSecondaryMember(host)
552564
if err != nil {
553565
klog.Errorf("error while getting primary and secondary member of %v. error: %v", host, err)
554566
return nil, err
@@ -561,6 +573,14 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
561573
backupHost = secondary
562574
}
563575

576+
// Check if secondary is already locked before locking it.
577+
// If yes, unlock it and sync with primary
578+
for _, secondary := range secondaryMembers {
579+
if err := checkIfSecondaryLockedAndSync(secondary); err != nil {
580+
return nil, err
581+
}
582+
}
583+
564584
err = lockSecondaryMember(secondary)
565585
cleanupFuncs = append(cleanupFuncs, func() error {
566586
// even if error occurs, try to unlock the server
@@ -648,11 +668,10 @@ func getSSLUser(path string) (string, error) {
648668
return strings.TrimSpace(user), nil
649669
}
650670

651-
func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, err error) {
671+
func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, secondaryMembers []string, err error) {
652672
klog.Infoln("finding primary and secondary instances of", mongoDSN)
653673
v := make(map[string]interface{})
654674

655-
// stop balancer
656675
args := append([]interface{}{
657676
"config",
658677
"--host", mongoDSN,
@@ -661,33 +680,35 @@ func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, err
661680
}, mongoCreds...)
662681
// even --quiet doesn't skip replicaset PrimaryConnection log. so take tha last line. issue tracker: https://jira.mongodb.org/browse/SERVER-27159
663682
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
664-
return "", "", err
683+
return "", "", secondaryMembers, err
665684
}
666685

667686
primary, ok := v["primary"].(string)
668687
if !ok || primary == "" {
669-
return "", "", fmt.Errorf("unable to get primary instance using rs.isMaster(). got response: %v", v)
688+
return "", "", secondaryMembers, fmt.Errorf("unable to get primary instance using rs.isMaster(). got response: %v", v)
670689
}
671690

672691
hosts, ok := v["hosts"].([]interface{})
673692
if !ok {
674-
return "", "", fmt.Errorf("unable to get hosts using rs.isMaster(). got response: %v", v)
693+
return "", "", secondaryMembers, fmt.Errorf("unable to get hosts using rs.isMaster(). got response: %v", v)
675694
}
676695

677696
for _, host := range hosts {
678-
secHost, ok := host.(string)
679-
if !ok || secHost == "" {
697+
curHost, ok := host.(string)
698+
699+
if !ok || curHost == "" {
680700
err = fmt.Errorf("unable to get secondary instance using rs.isMaster(). got response: %v", v)
681701
continue
682702
}
683-
684-
if secHost != primary {
685-
klog.Infof("Primary %s & Secondary %s found for mongoDSN %s \n", primary, secHost, mongoDSN)
686-
return primary, secHost, nil
703+
if curHost != primary {
704+
secondaryMembers = append(secondaryMembers, curHost)
687705
}
688706
}
707+
if len(secondaryMembers) > 0 {
708+
return primary, secondaryMembers[0], secondaryMembers, err
709+
}
689710

690-
return primary, "", err
711+
return primary, "", secondaryMembers, err
691712
}
692713

693714
// run from mongos instance
@@ -775,138 +796,6 @@ func enableBalancer(mongosHost string) error {
775796
return nil
776797
}
777798

778-
func lockConfigServer(configSVRDSN, secondaryHost string) error {
779-
klog.Infoln("Attempting to lock configserver", configSVRDSN)
780-
781-
if secondaryHost == "" {
782-
klog.Warningln("locking configserver is skipped. secondary host is empty")
783-
return nil
784-
}
785-
v := make(map[string]interface{})
786-
// findAndModify BackupControlDocument. skip single quote inside single quote: https://stackoverflow.com/a/28786747/4628962
787-
args := append([]interface{}{
788-
"config",
789-
"--host", configSVRDSN,
790-
"--quiet",
791-
"--eval", "db.BackupControl.findAndModify({query: { _id: 'BackupControlDocument' }, update: { $inc: { counter : 1 } }, new: true, upsert: true, writeConcern: { w: 'majority', wtimeout: 15000 }});",
792-
}, mongoCreds...)
793-
794-
output, err := sh.Command(MongoCMD, args...).Output()
795-
if err != nil {
796-
klog.Errorf("Error while running findAndModify to lock configServer : %s ; output : %s \n", err.Error(), output)
797-
return err
798-
}
799-
800-
err = json.Unmarshal(output, &v)
801-
if err != nil {
802-
klog.Errorf("Unmarshal error while running findAndModify to lock configServer : %s \n", err.Error())
803-
return err
804-
}
805-
val, ok := v["counter"].(float64)
806-
if !ok || int(val) == 0 {
807-
return fmt.Errorf("unable to modify BackupControlDocument. got response: %v", v)
808-
}
809-
val2 := float64(0)
810-
timer := 0 // wait approximately 5 minutes.
811-
for timer < 60 && (int(val2) == 0 || int(val) != int(val2)) {
812-
timer++
813-
// find backupDocument from secondary configServer
814-
args = append([]interface{}{
815-
"config",
816-
"--host", secondaryHost,
817-
"--quiet",
818-
"--eval", "rs.secondaryOk(); db.BackupControl.find({ '_id' : 'BackupControlDocument' }).readConcern('majority');",
819-
}, mongoCreds...)
820-
821-
if err := sh.Command(MongoCMD, args...).UnmarshalJSON(&v); err != nil {
822-
return err
823-
}
824-
825-
val2, ok = v["counter"].(float64)
826-
if !ok {
827-
return fmt.Errorf("unable to get BackupControlDocument. got response: %v", v)
828-
}
829-
if int(val) != int(val2) {
830-
klog.V(5).Infof("BackupDocument counter in secondary is not same. Expected %v, but got %v. Full response: %v", val, val2, v)
831-
time.Sleep(time.Second * 5)
832-
}
833-
}
834-
if timer >= 60 {
835-
return fmt.Errorf("timeout while waiting for BackupDocument counter in secondary to be same as primary. Expected %v, but got %v. Full response: %v", val, val2, v)
836-
}
837-
// lock secondary
838-
return lockSecondaryMember(secondaryHost)
839-
}
840-
841-
func lockSecondaryMember(mongohost string) error {
842-
klog.Infoln("Attempting to lock secondary member", mongohost)
843-
if mongohost == "" {
844-
klog.Warningln("locking secondary member is skipped. secondary host is empty")
845-
return nil
846-
}
847-
v := make(map[string]interface{})
848-
849-
// lock file
850-
args := append([]interface{}{
851-
"config",
852-
"--host", mongohost,
853-
"--quiet",
854-
"--eval", "JSON.stringify(db.fsyncLock())",
855-
}, mongoCreds...)
856-
857-
output, err := sh.Command(MongoCMD, args...).Output()
858-
if err != nil {
859-
klog.Errorf("Error while running fsyncLock on secondary : %s ; output : %s \n", err.Error(), output)
860-
return err
861-
}
862-
863-
err = json.Unmarshal(output, &v)
864-
if err != nil {
865-
klog.Errorf("Unmarshal error while running fsyncLock on secondary : %s \n", err.Error())
866-
return err
867-
}
868-
869-
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
870-
return fmt.Errorf("unable to lock the secondary host. got response: %v", v)
871-
}
872-
klog.Infof("secondary %s locked.", mongohost)
873-
return nil
874-
}
875-
876-
func unlockSecondaryMember(mongohost string) error {
877-
klog.Infoln("Attempting to unlock secondary member", mongohost)
878-
if mongohost == "" {
879-
klog.Warningln("skipped unlocking secondary member. secondary host is empty")
880-
return nil
881-
}
882-
v := make(map[string]interface{})
883-
884-
// unlock file
885-
args := append([]interface{}{
886-
"config",
887-
"--host", mongohost,
888-
"--quiet",
889-
"--eval", "JSON.stringify(db.fsyncUnlock())",
890-
}, mongoCreds...)
891-
892-
output, err := sh.Command(MongoCMD, args...).Output()
893-
if err != nil {
894-
klog.Errorf("Error while running fsyncUnlock on secondary : %s ; output : %s \n", err.Error(), output)
895-
return err
896-
}
897-
err = json.Unmarshal(output, &v)
898-
if err != nil {
899-
klog.Errorf("Unmarshal error while running fsyncUnlock on secondary : %s \n", err.Error())
900-
return err
901-
}
902-
903-
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
904-
return fmt.Errorf("unable to lock the secondary host. got response: %v", v)
905-
}
906-
klog.Infof("secondary %s unlocked.", mongohost)
907-
return nil
908-
}
909-
910799
func checkRoleExists(mongoDSN string, roleName string) (bool, error) {
911800
v := make(map[string]interface{})
912801
args := append([]interface{}{

0 commit comments

Comments
 (0)