Skip to content

Commit

Permalink
Merge pull request #29 from retroflexer/archive-data-dir-if-unstarted
Browse files Browse the repository at this point in the history
Bug 1806751: Archive data-dir if target member is unstarted
  • Loading branch information
openshift-merge-robot committed Feb 25, 2020
2 parents 5aff605 + b75ec24 commit 29304dc
Showing 1 changed file with 37 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -116,18 +116,20 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error {
var memberDir string
if strings.HasSuffix(o.DataDir, "member") {
memberDir = o.DataDir
o.DataDir = filepath.Dir(o.DataDir)
} else {
memberDir = filepath.Join(o.DataDir, "member")
}

memberDirExists := false
_, err := os.Stat(memberDir)
switch {
case os.IsNotExist(err):
// do nothing. This just means we fall through to the polling logic

case err == nil:
fmt.Fprintf(os.Stderr, "data-dir exists for %v\n",o.TargetName)
return nil
fmt.Fprintf(os.Stderr, "memberDir %s is present on %s\n", memberDir, o.TargetName)
memberDirExists = true

case err != nil:
return err
Expand Down Expand Up @@ -158,11 +160,26 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error {
fmt.Fprintf(os.Stderr, "#### sleeping...\n")
time.Sleep(1 * time.Second)
}
if err != nil {

switch {
case err != nil:
return err
}
if targetMember == nil {

case targetMember == nil && memberDirExists:
// we weren't able to locate other members and need to return based previous memberDir so we can restart. This is the off and on again flow.
fmt.Printf(o.TargetName)
return nil

case targetMember == nil && !memberDirExists:
// our member has not been added to the cluster and we have no previous data to start based on.
return fmt.Errorf("timed out")

case targetMember != nil && len(targetMember.Name) == 0 && memberDirExists:
// our member has been added to the cluster and has never been started before, but a data directory exists. This means that we have dirty data we must remove
archiveDataDir(memberDir)

default:
// a target member was found, but no exception circumstances.
}

etcdInitialClusterEntries := []string{}
Expand All @@ -175,11 +192,26 @@ func (o *DiscoverEtcdInitialClusterOptions) Run() error {
if len(targetMember.Name) == 0 {
etcdInitialClusterEntries = append(etcdInitialClusterEntries, fmt.Sprintf("%s=%s", o.TargetName, targetMember.PeerURLs[0]))
}

fmt.Printf(strings.Join(etcdInitialClusterEntries, ","))

return nil
}

// TO DO: instead of archiving, we should remove the directory to avoid any confusion with the backups.
func archiveDataDir(sourceDir string) error {
targetDir := filepath.Join(sourceDir+"-removed-archive", time.Now().Format(time.RFC3339))

// If dir already exists, add seconds to the dir name
if _, err := os.Stat(targetDir); err == nil {
targetDir = filepath.Join(sourceDir+"-removed-archive", time.Now().Add(time.Second).Format(time.RFC3339))
}
if err := os.Rename(sourceDir, targetDir); err != nil && !os.IsNotExist(err) {
return err
}
return nil
}

func stringifyMember(member *etcdserverpb.Member) string {
if member == nil {
return "nil"
Expand Down

0 comments on commit 29304dc

Please sign in to comment.