Skip to content

Commit

Permalink
fix: Fix docker image pull hanging forever (#994)
Browse files Browse the repository at this point in the history
## Description:
<!-- Describe this change, how it works, and the motivation behind it.
-->

## Is this change user facing?
NO
<!-- If yes, please add the "user facing" label to the PR -->
<!-- If yes, don't forget to include docs changes where relevant -->

## References (if applicable):
<!-- Add relevant Github Issues, Discord threads, or other helpful
information. -->
  • Loading branch information
Guillaume Bouvignies committed Jul 26, 2023
1 parent 06ec80b commit fd00d79
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 12 deletions.
Expand Up @@ -18,7 +18,7 @@ import (
)

const (
dockerClientTimeout = 30 * time.Second
dockerClientTimeout = 5 * time.Minute
)

// TODO Delete this when we split up KurtosisBackend into various parts
Expand Down
Expand Up @@ -1096,7 +1096,7 @@ func (manager *DockerManager) isImageAvailableLocally(ctx context.Context, image
}

func (manager *DockerManager) pullImage(context context.Context, imageName string) (err error) {
logrus.Infof("Pulling image '%s'...", imageName)
logrus.Infof("Pulling image '%s'", imageName)
err, retryWithLinuxAmd64 := pullImage(context, manager.dockerClient, imageName, defaultPlatform)
if err == nil {
return nil
Expand All @@ -1105,10 +1105,12 @@ func (manager *DockerManager) pullImage(context context.Context, imageName strin
return stacktrace.Propagate(err, "Tried pulling image '%v' but failed", imageName)
}
// we retry with linux/amd64
logrus.Debugf("Retrying pulling image '%s' for '%s'", imageName, linuxAmd64)
err, _ = pullImage(context, manager.dockerClient, imageName, linuxAmd64)
if err != nil {
return stacktrace.Propagate(err, "Had previously failed with a manifest error so tried pulling image '%v' for platform '%v' but failed", imageName, linuxAmd64)
}
logrus.Warnf("Image '%s' successfully pulled for '%s' which is not the architecture this OS is running on.", imageName, linuxAmd64)
return nil
}

Expand Down Expand Up @@ -1719,6 +1721,7 @@ func getEndpointSettingsForIpAddress(ipAddress string, alias string) *network.En
}

func pullImage(ctx context.Context, dockerClient *client.Client, imageName string, platform string) (error, bool) {
logrus.Tracef("Starting pulling '%s' for platform '%s'", imageName, platform)
out, err := dockerClient.ImagePull(ctx, imageName, types.ImagePullOptions{
All: false,
RegistryAuth: "",
Expand All @@ -1729,18 +1732,21 @@ func pullImage(ctx context.Context, dockerClient *client.Client, imageName strin
return stacktrace.Propagate(err, "Tried pulling image '%v' with platform '%v' but failed", imageName, platform), false
}
defer out.Close()
logrus.Tracef("Finished pulling '%s' for platform '%s'. Analyzing response to check for errors", imageName, platform)
responseDecoder := json.NewDecoder(out)
var jsonMessage *jsonmessage.JSONMessage
for {
if err = responseDecoder.Decode(&jsonMessage); err != nil {
if err == io.EOF {
break
}
jsonMessage := new(jsonmessage.JSONMessage)
err = responseDecoder.Decode(&jsonMessage)
if err == io.EOF {
break
}
if jsonMessage.Error == nil {
continue
if err != nil {
return stacktrace.Propagate(err, "ImagePull for '%s' on platform '%s' failed with an unexpected error", imageName, platform), false
}
if jsonMessage.Error != nil {
return stacktrace.NewError("ImagePull failed with the following error '%v'", jsonMessage.Error.Message), strings.HasPrefix(jsonMessage.Error.Message, architectureErrorString)
}
return stacktrace.NewError("ImagePull failed with the following error '%v'", jsonMessage.Error.Message), strings.HasPrefix(jsonMessage.Error.Message, architectureErrorString)
}
logrus.Tracef("No error pulling '%s' for platform '%s'. Returning", imageName, platform)
return nil, false
}
Expand Up @@ -45,15 +45,14 @@ func (validator *DockerImagesValidator) Validate(ctx context.Context, environmen
wg := &sync.WaitGroup{}
for image := range environment.requiredDockerImages {
wg.Add(1)
logrus.Debugf("Starting the download of image: '%s'", image)
go fetchImageFromBackend(ctx, wg, imageCurrentlyDownloading, validator.kurtosisBackend, image, pullErrors, imageDownloadStarted, imageDownloadFinished)
}
wg.Wait()

logrus.Debug("All image validation submitted, currently in progress.")
}

func fetchImageFromBackend(ctx context.Context, wg *sync.WaitGroup, imageCurrentlyDownloading chan bool, backend *backend_interface.KurtosisBackend, image string, pullErrors chan<- error, imageDownloadStarted chan<- string, imageDownloadFinished chan<- string) {
logrus.Debugf("Requesting the download of image: '%s'", image)
defer wg.Done()
imageCurrentlyDownloading <- true
imageDownloadStarted <- image
Expand All @@ -62,8 +61,10 @@ func fetchImageFromBackend(ctx context.Context, wg *sync.WaitGroup, imageCurrent
imageDownloadFinished <- image
}()

logrus.Debugf("Starting the download of image: '%s'", image)
err := (*backend).FetchImage(ctx, image)
if err != nil {
logrus.Warnf("Container image '%s' download failed. Error was: '%s'", image, err.Error())
pullErrors <- startosis_errors.NewValidationError("Failed fetching the required image '%v', make sure that the image exists and is public", image)
}
logrus.Debugf("Container image '%s' successfully downloaded", image)
Expand Down

0 comments on commit fd00d79

Please sign in to comment.