Skip to content

Commit

Permalink
keep bootstrapper pod alive when error occurs (#921)
Browse files Browse the repository at this point in the history
* keep bootstrapper pod alive when running inside k8s

* Edit message when error happens

* handle review feedback
  • Loading branch information
kunmingg authored and k8s-ci-robot committed Jun 6, 2018
1 parent d90ffcc commit 9dfda4f
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 13 deletions.
30 changes: 18 additions & 12 deletions bootstrap/cmd/bootstrap/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ func setupNamespace(namespaces type_v1.NamespaceInterface, name_space string) er
return err
}

func createComponent(opt *options.ServerOption, kfApp *kApp.App, fs *afero.Fs, args []string) {
func createComponent(opt *options.ServerOption, kfApp *kApp.App, fs *afero.Fs, args []string) error {
componentName := args[1]
componentPath := filepath.Join(opt.AppDir, "components", componentName+".jsonnet")

Expand All @@ -266,18 +266,19 @@ func createComponent(opt *options.ServerOption, kfApp *kApp.App, fs *afero.Fs, a
actions.OptionArguments: args,
})
if err != nil {
log.Fatalf("There was a problem creating protoype package kubeflow-core; error %v", err)
return errors.New(fmt.Sprintf("There was a problem creating component %v: %v", componentName, err))
}
} else {
log.Infof("Component %v already exists", componentName)
}
return nil
}

func appGenerate(opt *options.ServerOption, kfApp *kApp.App, fs *afero.Fs, bootConfig *BootConfig) {
func appGenerate(opt *options.ServerOption, kfApp *kApp.App, fs *afero.Fs, bootConfig *BootConfig) error {
libs, err := (*kfApp).Libraries()

if err != nil {
log.Fatalf("Could not list libraries for app; error %v", err)
return errors.New(fmt.Sprintf("Could not list libraries for app; error %v", err))
}

regUris := make(map[string]string)
Expand All @@ -289,7 +290,7 @@ func appGenerate(opt *options.ServerOption, kfApp *kApp.App, fs *afero.Fs, bootC
pkgName := p.Name
_, err = (*fs).Stat(path.Join(regUris[p.Registry], pkgName))
if err != nil {
log.Fatalf("Package %v didn't exist in registry %v", pkgName, regUris[p.Registry])
return errors.New(fmt.Sprintf("Package %v didn't exist in registry %v", pkgName, regUris[p.Registry]))
}
full := fmt.Sprintf("kubeflow/%v", pkgName)
log.Infof("Installing package %v", full)
Expand All @@ -305,7 +306,7 @@ func appGenerate(opt *options.ServerOption, kfApp *kApp.App, fs *afero.Fs, bootC
})

if err != nil {
log.Fatalf("There was a problem installing package %v; error %v", full, err)
return errors.New(fmt.Sprintf("There was a problem installing package %v; error %v", full, err))
}
}

Expand All @@ -325,7 +326,9 @@ func appGenerate(opt *options.ServerOption, kfApp *kApp.App, fs *afero.Fs, bootC
if val, ok := paramMapping[c.Name]; ok {
params = append(params, val...)
}
createComponent(opt, kfApp, fs, params)
if err = createComponent(opt, kfApp, fs, params); err != nil {
return err
}
}
// Apply Params
for _, p := range bootConfig.App.Parameters {
Expand All @@ -336,9 +339,10 @@ func appGenerate(opt *options.ServerOption, kfApp *kApp.App, fs *afero.Fs, bootC
actions.OptionValue: p.Value,
})
if err != nil {
log.Fatalf("Error when setting Parameters %v for Component %v: %v", p.Name, p.Component, err)
return errors.New(fmt.Sprintf("Error when setting Parameters %v for Component %v: %v", p.Name, p.Component, err))
}
}
return err
}

// Run the tool.
Expand Down Expand Up @@ -431,7 +435,7 @@ func Run(opt *options.ServerOption) error {
err := actions.RunInit(options)

if err != nil {
log.Fatalf("There was a problem initializing the app: %v", err)
return errors.New(fmt.Sprintf("There was a problem initializing the app: %v", err))
}

log.Infof("Successfully initialized the app %v.", opt.AppDir)
Expand All @@ -443,7 +447,7 @@ func Run(opt *options.ServerOption) error {
kfApp, err := kApp.Load(fs, opt.AppDir, true)

if err != nil {
log.Fatalf("There was a problem loading the app: %v", err)
return errors.New(fmt.Sprintf("There was a problem loading the app: %v", err))
}

for idx, registry := range bootConfig.Registries {
Expand All @@ -470,13 +474,15 @@ func Run(opt *options.ServerOption) error {

err = actions.RunRegistryAdd(options)
if err != nil {
log.Fatalf("There was a problem adding the registry: %v", err)
return errors.New(fmt.Sprintf("There was a problem adding registry %v: %v", registry.Name, err))
}
}
}

// Load default kubeflow apps
appGenerate(opt, &kfApp, &fs, bootConfig)
if err = appGenerate(opt, &kfApp, &fs, bootConfig); err != nil {
return err
}

// Component customization
for _, component := range bootConfig.App.Components {
Expand Down
11 changes: 10 additions & 1 deletion bootstrap/cmd/bootstrap/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"github.com/kubeflow/kubeflow/bootstrap/cmd/bootstrap/app"
"github.com/kubeflow/kubeflow/bootstrap/cmd/bootstrap/app/options"
"time"
)

func init() {
Expand All @@ -43,6 +44,14 @@ func main() {
}

if err := app.Run(s); err != nil {
log.Fatalf("%v\n", err)
if s.InCluster && s.KeepAlive {
log.Errorf("Bootstrapper failed with error: %v\n", err)
log.Infof("Keeping pod alive so user can ssh in and check error status.")
for {
time.Sleep(time.Minute)
}
} else {
log.Fatalf("%v\n", err)
}
}
}

0 comments on commit 9dfda4f

Please sign in to comment.