@@ -442,7 +442,7 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
442
442
443
443
if parameters .ConfigServer != "" {
444
444
// sharded cluster. so disable the balancer first. then perform the 'usual' tasks.
445
- primary , secondary , err := getPrimaryNSecondaryMember (parameters .ConfigServer )
445
+ primary , secondary , secondaryMembers , err := getPrimaryNSecondaryMember (parameters .ConfigServer )
446
446
if err != nil {
447
447
return nil , err
448
448
}
@@ -464,7 +464,19 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
464
464
backupHost = secondary
465
465
}
466
466
467
- err = lockConfigServer (parameters .ConfigServer , secondary )
467
+ // Check if secondary is already locked before locking it.
468
+ // If yes, unlock it and sync with primary
469
+ for _ , secondary := range secondaryMembers {
470
+ if err := checkIfSecondaryLockedAndSync (secondary ); err != nil {
471
+ return nil , err
472
+ }
473
+ }
474
+
475
+ if err := setupConfigServer (parameters .ConfigServer , secondary ); err != nil {
476
+ return nil , err
477
+ }
478
+
479
+ err = lockSecondaryMember (secondary )
468
480
469
481
cleanupFuncs = append (cleanupFuncs , func () error {
470
482
// even if error occurs, try to unlock the server
@@ -479,7 +491,7 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
479
491
480
492
for key , host := range parameters .ReplicaSets {
481
493
// do the task
482
- primary , secondary , err := getPrimaryNSecondaryMember (host )
494
+ primary , secondary , secondaryMembers , err := getPrimaryNSecondaryMember (host )
483
495
if err != nil {
484
496
klog .Errorf ("error while getting primary and secondary member of %v. error: %v" , host , err )
485
497
return nil , err
@@ -492,6 +504,14 @@ func (opt *mongoOptions) backupMongoDB(targetRef api_v1beta1.TargetRef) (*restic
492
504
backupHost = secondary
493
505
}
494
506
507
+ // Check if secondary is already locked before locking it.
508
+ // If yes, unlock it and sync with primary
509
+ for _ , secondary := range secondaryMembers {
510
+ if err := checkIfSecondaryLockedAndSync (secondary ); err != nil {
511
+ return nil , err
512
+ }
513
+ }
514
+
495
515
err = lockSecondaryMember (secondary )
496
516
cleanupFuncs = append (cleanupFuncs , func () error {
497
517
// even if error occurs, try to unlock the server
@@ -573,11 +593,10 @@ func getSSLUser(path string) (string, error) {
573
593
return strings .TrimSpace (user ), nil
574
594
}
575
595
576
- func getPrimaryNSecondaryMember (mongoDSN string ) (primary , secondary string , err error ) {
596
+ func getPrimaryNSecondaryMember (mongoDSN string ) (primary , secondary string , secondaryMembers [] string , err error ) {
577
597
klog .Infoln ("finding primary and secondary instances of" , mongoDSN )
578
598
v := make (map [string ]interface {})
579
599
580
- // stop balancer
581
600
args := append ([]interface {}{
582
601
"config" ,
583
602
"--host" , mongoDSN ,
@@ -586,33 +605,35 @@ func getPrimaryNSecondaryMember(mongoDSN string) (primary, secondary string, err
586
605
}, mongoCreds ... )
587
606
// even --quiet doesn't skip replicaset PrimaryConnection log. so take tha last line. issue tracker: https://jira.mongodb.org/browse/SERVER-27159
588
607
if err := sh .Command (MongoCMD , args ... ).Command ("/usr/bin/tail" , "-1" ).UnmarshalJSON (& v ); err != nil {
589
- return "" , "" , err
608
+ return "" , "" , secondaryMembers , err
590
609
}
591
610
592
611
primary , ok := v ["primary" ].(string )
593
612
if ! ok || primary == "" {
594
- return "" , "" , fmt .Errorf ("unable to get primary instance using rs.isMaster(). got response: %v" , v )
613
+ return "" , "" , secondaryMembers , fmt .Errorf ("unable to get primary instance using rs.isMaster(). got response: %v" , v )
595
614
}
596
615
597
616
hosts , ok := v ["hosts" ].([]interface {})
598
617
if ! ok {
599
- return "" , "" , fmt .Errorf ("unable to get hosts using rs.isMaster(). got response: %v" , v )
618
+ return "" , "" , secondaryMembers , fmt .Errorf ("unable to get hosts using rs.isMaster(). got response: %v" , v )
600
619
}
601
620
602
621
for _ , host := range hosts {
603
- secHost , ok := host .(string )
604
- if ! ok || secHost == "" {
622
+ curHost , ok := host .(string )
623
+
624
+ if ! ok || curHost == "" {
605
625
err = fmt .Errorf ("unable to get secondary instance using rs.isMaster(). got response: %v" , v )
606
626
continue
607
627
}
608
-
609
- if secHost != primary {
610
- klog .Infof ("Primary %s & Secondary %s found for mongoDSN %s \n " , primary , secHost , mongoDSN )
611
- return primary , secHost , nil
628
+ if curHost != primary {
629
+ secondaryMembers = append (secondaryMembers , curHost )
612
630
}
613
631
}
632
+ if len (secondaryMembers ) > 0 {
633
+ return primary , secondaryMembers [0 ], secondaryMembers , err
634
+ }
614
635
615
- return primary , "" , err
636
+ return primary , "" , secondaryMembers , err
616
637
}
617
638
618
639
// run from mongos instance
@@ -699,141 +720,3 @@ func enableBalancer(mongosHost string) error {
699
720
klog .Info ("Balancer successfully re-enabled." )
700
721
return nil
701
722
}
702
-
703
- func lockConfigServer (configSVRDSN , secondaryHost string ) error {
704
- klog .Infoln ("Attempting to lock configserver" , configSVRDSN )
705
-
706
- if secondaryHost == "" {
707
- klog .Warningln ("locking configserver is skipped. secondary host is empty" )
708
- return nil
709
- }
710
- v := make (map [string ]interface {})
711
- // findAndModify BackupControlDocument. skip single quote inside single quote: https://stackoverflow.com/a/28786747/4628962
712
- args := append ([]interface {}{
713
- "config" ,
714
- "--host" , configSVRDSN ,
715
- "--quiet" ,
716
- "--eval" , "db.BackupControl.findAndModify({query: { _id: 'BackupControlDocument' }, update: { $inc: { counter : 1 } }, new: true, upsert: true, writeConcern: { w: 'majority', wtimeout: 15000 }});" ,
717
- }, mongoCreds ... )
718
-
719
- output , err := sh .Command (MongoCMD , args ... ).Output ()
720
- if err != nil {
721
- klog .Errorf ("Error while running findAndModify to lock configServer : %s ; output : %s \n " , err .Error (), output )
722
- return err
723
- }
724
- s := fmt .Sprintf (`/bin/echo '%s' | /usr/bin/tail -1` , strings .TrimSuffix (string (output ), "\n " ))
725
- output , err = sh .Command ("/bin/sh" , "-c" , s ).Output ()
726
- if err != nil {
727
- klog .Errorf ("Error while running tail in findAndModify to lock configServer : %s ; output : %s \n " , err .Error (), output )
728
- return err
729
- }
730
-
731
- err = json .Unmarshal (output , & v )
732
- if err != nil {
733
- klog .Errorf ("Unmarshal error while running findAndModify to lock configServer : %s \n " , err .Error ())
734
- return err
735
- }
736
- val , ok := v ["counter" ].(float64 )
737
- if ! ok || int (val ) == 0 {
738
- return fmt .Errorf ("unable to modify BackupControlDocument. got response: %v" , v )
739
- }
740
- val2 := float64 (0 )
741
- timer := 0 // wait approximately 5 minutes.
742
- for timer < 60 && (int (val2 ) == 0 || int (val ) != int (val2 )) {
743
- timer ++
744
- // find backupDocument from secondary configServer
745
- args = append ([]interface {}{
746
- "config" ,
747
- "--host" , secondaryHost ,
748
- "--quiet" ,
749
- "--eval" , "rs.slaveOk(); db.BackupControl.find({ '_id' : 'BackupControlDocument' }).readConcern('majority');" ,
750
- }, mongoCreds ... )
751
-
752
- if err := sh .Command (MongoCMD , args ... ).UnmarshalJSON (& v ); err != nil {
753
- return err
754
- }
755
-
756
- val2 , ok = v ["counter" ].(float64 )
757
- if ! ok {
758
- return fmt .Errorf ("unable to get BackupControlDocument. got response: %v" , v )
759
- }
760
- if int (val ) != int (val2 ) {
761
- klog .V (5 ).Infof ("BackupDocument counter in secondary is not same. Expected %v, but got %v. Full response: %v" , val , val2 , v )
762
- time .Sleep (time .Second * 5 )
763
- }
764
- }
765
- if timer >= 60 {
766
- return fmt .Errorf ("timeout while waiting for BackupDocument counter in secondary to be same as primary. Expected %v, but got %v. Full response: %v" , val , val2 , v )
767
- }
768
- // lock secondary
769
- return lockSecondaryMember (secondaryHost )
770
- }
771
-
772
- func lockSecondaryMember (mongohost string ) error {
773
- klog .Infoln ("Attempting to lock secondary member" , mongohost )
774
- if mongohost == "" {
775
- klog .Warningln ("locking secondary member is skipped. secondary host is empty" )
776
- return nil
777
- }
778
- v := make (map [string ]interface {})
779
-
780
- // lock file
781
- args := append ([]interface {}{
782
- "config" ,
783
- "--host" , mongohost ,
784
- "--quiet" ,
785
- "--eval" , "JSON.stringify(db.fsyncLock())" ,
786
- }, mongoCreds ... )
787
-
788
- output , err := sh .Command (MongoCMD , args ... ).Output ()
789
- if err != nil {
790
- klog .Errorf ("Error while running fsyncLock on secondary : %s ; output : %s \n " , err .Error (), output )
791
- return err
792
- }
793
-
794
- err = json .Unmarshal (output , & v )
795
- if err != nil {
796
- klog .Errorf ("Unmarshal error while running fsyncLock on secondary : %s \n " , err .Error ())
797
- return err
798
- }
799
-
800
- if val , ok := v ["ok" ].(float64 ); ! ok || int (val ) != 1 {
801
- return fmt .Errorf ("unable to lock the secondary host. got response: %v" , v )
802
- }
803
- klog .Infof ("secondary %s locked." , mongohost )
804
- return nil
805
- }
806
-
807
- func unlockSecondaryMember (mongohost string ) error {
808
- klog .Infoln ("Attempting to unlock secondary member" , mongohost )
809
- if mongohost == "" {
810
- klog .Warningln ("skipped unlocking secondary member. secondary host is empty" )
811
- return nil
812
- }
813
- v := make (map [string ]interface {})
814
-
815
- // unlock file
816
- args := append ([]interface {}{
817
- "config" ,
818
- "--host" , mongohost ,
819
- "--quiet" ,
820
- "--eval" , "JSON.stringify(db.fsyncUnlock())" ,
821
- }, mongoCreds ... )
822
-
823
- output , err := sh .Command (MongoCMD , args ... ).Output ()
824
- if err != nil {
825
- klog .Errorf ("Error while running fsyncUnlock on secondary : %s ; output : %s \n " , err .Error (), output )
826
- return err
827
- }
828
- err = json .Unmarshal (output , & v )
829
- if err != nil {
830
- klog .Errorf ("Unmarshal error while running fsyncUnlock on secondary : %s \n " , err .Error ())
831
- return err
832
- }
833
-
834
- if val , ok := v ["ok" ].(float64 ); ! ok || int (val ) != 1 {
835
- return fmt .Errorf ("unable to lock the secondary host. got response: %v" , v )
836
- }
837
- klog .Infof ("secondary %s unlocked." , mongohost )
838
- return nil
839
- }
0 commit comments