Skip to content

Commit 7f588f2

Browse files
authored
Unlock and sync secondary (#1972)
Signed-off-by: sayedppqq <sayed@appscode.com>
1 parent 704053f commit 7f588f2

File tree

3 files changed

+328
-153
lines changed

3 files changed

+328
-153
lines changed

pkg/backup.go

Lines changed: 36 additions & 153 deletions
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
442442

443443
if parameters.ConfigServer != "" {
444444
// sharded cluster. so disable the balancer first. then perform the 'usual' tasks.
445-
primary, secondary, err := getPrimaryNSecondaryMember(parameters.ConfigServer)
445+
primary, secondary, secondaryMembers, err := getPrimaryNSecondaryMember(parameters.ConfigServer)
446446
if err != nil {
447447
return nil, err
448448
}
@@ -464,7 +464,19 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
464464
backupHost = secondary
465465
}
466466

467-
err = lockConfigServer(parameters.ConfigServer, secondary)
467+
// Check if secondary is already locked before locking it.
468+
// If yes, unlock it and sync with primary
469+
for _, secondary := range secondaryMembers {
470+
if err := checkIfSecondaryLockedAndSync(secondary); err != nil {
471+
return nil, err
472+
}
473+
}
474+
475+
if err := setupConfigServer(parameters.ConfigServer, secondary); err != nil {
476+
return nil, err
477+
}
478+
479+
err = lockSecondaryMember(secondary)
468480

469481
cleanupFuncs = append(cleanupFuncs, func() error {
470482
// even if error occurs, try to unlock the server
@@ -479,7 +491,7 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
479491

480492
for key, host := range parameters.ReplicaSets {
481493
// do the task
482-
primary, secondary, err := getPrimaryNSecondaryMember(host)
494+
primary, secondary, secondaryMembers, err := getPrimaryNSecondaryMember(host)
483495
if err != nil {
484496
klog.Errorf("error while getting primary and secondary member of %v. error: %v", host, err)
485497
return nil, err
@@ -492,6 +504,14 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
492504
backupHost = secondary
493505
}
494506

507+
// Check if secondary is already locked before locking it.
508+
// If yes, unlock it and sync with primary
509+
for _, secondary := range secondaryMembers {
510+
if err := checkIfSecondaryLockedAndSync(secondary); err != nil {
511+
return nil, err
512+
}
513+
}
514+
495515
err = lockSecondaryMember(secondary)
496516
cleanupFuncs = append(cleanupFuncs, func() error {
497517
// even if error occurs, try to unlock the server
@@ -573,11 +593,10 @@ func getSSLUser(path string) (string, error) {
573593
return strings.TrimSpace(user), nil
574594
}
575595

576-
func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, err error) {
596+
func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, secondaryMembers []string, err error) {
577597
klog.Infoln("finding primary and secondary instances of", mongoDSN)
578598
v := make(map[string]interface{})
579599

580-
// stop balancer
581600
args := append([]interface{}{
582601
"config",
583602
"--host", mongoDSN,
@@ -586,33 +605,35 @@ func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, err
586605
}, mongoCreds...)
587606
// even --quiet doesn't skip replicaset PrimaryConnection log. so take tha last line. issue tracker: https://jira.mongodb.org/browse/SERVER-27159
588607
if err := sh.Command(MongoCMD, args...).Command("/usr/bin/tail", "-1").UnmarshalJSON(&v); err != nil {
589-
return "", "", err
608+
return "", "", secondaryMembers, err
590609
}
591610

592611
primary, ok := v["primary"].(string)
593612
if !ok || primary == "" {
594-
return "", "", fmt.Errorf("unable to get primary instance using rs.isMaster(). got response: %v", v)
613+
return "", "", secondaryMembers, fmt.Errorf("unable to get primary instance using rs.isMaster(). got response: %v", v)
595614
}
596615

597616
hosts, ok := v["hosts"].([]interface{})
598617
if !ok {
599-
return "", "", fmt.Errorf("unable to get hosts using rs.isMaster(). got response: %v", v)
618+
return "", "", secondaryMembers, fmt.Errorf("unable to get hosts using rs.isMaster(). got response: %v", v)
600619
}
601620

602621
for _, host := range hosts {
603-
secHost, ok := host.(string)
604-
if !ok || secHost == "" {
622+
curHost, ok := host.(string)
623+
624+
if !ok || curHost == "" {
605625
err = fmt.Errorf("unable to get secondary instance using rs.isMaster(). got response: %v", v)
606626
continue
607627
}
608-
609-
if secHost != primary {
610-
klog.Infof("Primary %s & Secondary %s found for mongoDSN %s \n", primary, secHost, mongoDSN)
611-
return primary, secHost, nil
628+
if curHost != primary {
629+
secondaryMembers = append(secondaryMembers, curHost)
612630
}
613631
}
632+
if len(secondaryMembers) > 0 {
633+
return primary, secondaryMembers[0], secondaryMembers, err
634+
}
614635

615-
return primary, "", err
636+
return primary, "", secondaryMembers, err
616637
}
617638

618639
// run from mongos instance
@@ -699,141 +720,3 @@ func enableBalancer(mongosHost string) error {
699720
klog.Info("Balancer successfully re-enabled.")
700721
return nil
701722
}
702-
703-
func lockConfigServer(configSVRDSN, secondaryHost string) error {
704-
klog.Infoln("Attempting to lock configserver", configSVRDSN)
705-
706-
if secondaryHost == "" {
707-
klog.Warningln("locking configserver is skipped. secondary host is empty")
708-
return nil
709-
}
710-
v := make(map[string]interface{})
711-
// findAndModify BackupControlDocument. skip single quote inside single quote: https://stackoverflow.com/a/28786747/4628962
712-
args := append([]interface{}{
713-
"config",
714-
"--host", configSVRDSN,
715-
"--quiet",
716-
"--eval", "db.BackupControl.findAndModify({query: { _id: 'BackupControlDocument' }, update: { $inc: { counter : 1 } }, new: true, upsert: true, writeConcern: { w: 'majority', wtimeout: 15000 }});",
717-
}, mongoCreds...)
718-
719-
output, err := sh.Command(MongoCMD, args...).Output()
720-
if err != nil {
721-
klog.Errorf("Error while running findAndModify to lock configServer : %s ; output : %s \n", err.Error(), output)
722-
return err
723-
}
724-
s := fmt.Sprintf(`/bin/echo '%s' | /usr/bin/tail -1`, strings.TrimSuffix(string(output), "\n"))
725-
output, err = sh.Command("/bin/sh", "-c", s).Output()
726-
if err != nil {
727-
klog.Errorf("Error while running tail in findAndModify to lock configServer : %s ; output : %s \n", err.Error(), output)
728-
return err
729-
}
730-
731-
err = json.Unmarshal(output, &v)
732-
if err != nil {
733-
klog.Errorf("Unmarshal error while running findAndModify to lock configServer : %s \n", err.Error())
734-
return err
735-
}
736-
val, ok := v["counter"].(float64)
737-
if !ok || int(val) == 0 {
738-
return fmt.Errorf("unable to modify BackupControlDocument. got response: %v", v)
739-
}
740-
val2 := float64(0)
741-
timer := 0 // wait approximately 5 minutes.
742-
for timer < 60 && (int(val2) == 0 || int(val) != int(val2)) {
743-
timer++
744-
// find backupDocument from secondary configServer
745-
args = append([]interface{}{
746-
"config",
747-
"--host", secondaryHost,
748-
"--quiet",
749-
"--eval", "rs.slaveOk(); db.BackupControl.find({ '_id' : 'BackupControlDocument' }).readConcern('majority');",
750-
}, mongoCreds...)
751-
752-
if err := sh.Command(MongoCMD, args...).UnmarshalJSON(&v); err != nil {
753-
return err
754-
}
755-
756-
val2, ok = v["counter"].(float64)
757-
if !ok {
758-
return fmt.Errorf("unable to get BackupControlDocument. got response: %v", v)
759-
}
760-
if int(val) != int(val2) {
761-
klog.V(5).Infof("BackupDocument counter in secondary is not same. Expected %v, but got %v. Full response: %v", val, val2, v)
762-
time.Sleep(time.Second * 5)
763-
}
764-
}
765-
if timer >= 60 {
766-
return fmt.Errorf("timeout while waiting for BackupDocument counter in secondary to be same as primary. Expected %v, but got %v. Full response: %v", val, val2, v)
767-
}
768-
// lock secondary
769-
return lockSecondaryMember(secondaryHost)
770-
}
771-
772-
func lockSecondaryMember(mongohost string) error {
773-
klog.Infoln("Attempting to lock secondary member", mongohost)
774-
if mongohost == "" {
775-
klog.Warningln("locking secondary member is skipped. secondary host is empty")
776-
return nil
777-
}
778-
v := make(map[string]interface{})
779-
780-
// lock file
781-
args := append([]interface{}{
782-
"config",
783-
"--host", mongohost,
784-
"--quiet",
785-
"--eval", "JSON.stringify(db.fsyncLock())",
786-
}, mongoCreds...)
787-
788-
output, err := sh.Command(MongoCMD, args...).Output()
789-
if err != nil {
790-
klog.Errorf("Error while running fsyncLock on secondary : %s ; output : %s \n", err.Error(), output)
791-
return err
792-
}
793-
794-
err = json.Unmarshal(output, &v)
795-
if err != nil {
796-
klog.Errorf("Unmarshal error while running fsyncLock on secondary : %s \n", err.Error())
797-
return err
798-
}
799-
800-
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
801-
return fmt.Errorf("unable to lock the secondary host. got response: %v", v)
802-
}
803-
klog.Infof("secondary %s locked.", mongohost)
804-
return nil
805-
}
806-
807-
func unlockSecondaryMember(mongohost string) error {
808-
klog.Infoln("Attempting to unlock secondary member", mongohost)
809-
if mongohost == "" {
810-
klog.Warningln("skipped unlocking secondary member. secondary host is empty")
811-
return nil
812-
}
813-
v := make(map[string]interface{})
814-
815-
// unlock file
816-
args := append([]interface{}{
817-
"config",
818-
"--host", mongohost,
819-
"--quiet",
820-
"--eval", "JSON.stringify(db.fsyncUnlock())",
821-
}, mongoCreds...)
822-
823-
output, err := sh.Command(MongoCMD, args...).Output()
824-
if err != nil {
825-
klog.Errorf("Error while running fsyncUnlock on secondary : %s ; output : %s \n", err.Error(), output)
826-
return err
827-
}
828-
err = json.Unmarshal(output, &v)
829-
if err != nil {
830-
klog.Errorf("Unmarshal error while running fsyncUnlock on secondary : %s \n", err.Error())
831-
return err
832-
}
833-
834-
if val, ok := v["ok"].(float64); !ok || int(val) != 1 {
835-
return fmt.Errorf("unable to lock the secondary host. got response: %v", v)
836-
}
837-
klog.Infof("secondary %s unlocked.", mongohost)
838-
return nil
839-
}

0 commit comments

Comments
 (0)