Skip to content

Commit

Permalink
Delete corrupt exist layers when pull image for overlay2 storage driver
Browse files Browse the repository at this point in the history
If lower layers corrupt with disk full, host or daemon crash or other reasons,
may let the layer link and lower file empty in the disk, which can lead new
image pull also get an invalid lower file, typically image lower file's
content ends with an colon. Despit the image pull success, but can not inspect
with error message "Error response from daemon: readlink /var/lib/docker/overlay2/l: invalid argument".
So, when pull image check exist layer, can add layer valid check logic. If
cause a corrupt layer, delete and pull register it again.

Signed-off-by: Jeff Zvier <zvier20@gmail.com>
  • Loading branch information
zvier authored and liuzekun committed Dec 26, 2021
1 parent 229bc1e commit 87a2fbb
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 6 deletions.
36 changes: 33 additions & 3 deletions daemon/graphdriver/overlay2/overlay.go
Original file line number Diff line number Diff line change
Expand Up @@ -663,10 +663,40 @@ func (d *Driver) Put(id string) error {
return nil
}

// Exists checks to see if the id is already mounted.
// Exists checks to see if the id is already mounted and valid.
func (d *Driver) Exists(id string) bool {
_, err := os.Stat(d.dir(id))
return err == nil
dir := d.dir(id)
_, err := os.Stat(dir)
if err != nil {
return false
}

// Check link file, link file must exist and has content.
link, err := os.ReadFile(path.Join(dir, "link"))
if err != nil {
return false
}
if len(link) == 0 {
return false
}

// Check lower file, lower file must has content if exist.
lowerFilePath := path.Join(dir, lowerFile)
_, err = os.Stat(lowerFilePath)
if os.IsNotExist(err) {
return true
}
if err != nil {
return false
}
lower, err := os.ReadFile(lowerFilePath)
if err != nil {
return false
}
if len(lower) == 0 {
return false
}
return true
}

// isParent determines whether the given parent is the direct parent of the
Expand Down
26 changes: 23 additions & 3 deletions layer/layer_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,14 @@ func (ls *layerStore) registerWithDescriptor(ts io.Reader, parent ChainID, descr
defer ls.layerL.Unlock()

if existingLayer := ls.getWithoutLock(layer.chainID); existingLayer != nil {
// Set error for cleanup, but do not return the error
err = errors.New("layer already exists")
return existingLayer.getReference(), nil
if ls.driver.String() != "overlay2" || ls.driver.Exists(existingLayer.cacheID) {
// Set error for cleanup, but do not return the error
err = errors.New("layer already exists")
return existingLayer.getReference(), nil
}
if err := ls.deleteCorruptLayer(existingLayer); err != nil {
logrus.Errorf("delete corrupt layer failed when register with cached id %s, chain id %s, error %s", existingLayer.cacheID, existingLayer.chainID, err.Error())
}
}

if err = tx.Commit(layer.chainID); err != nil {
Expand Down Expand Up @@ -398,10 +403,25 @@ func (ls *layerStore) Get(l ChainID) (Layer, error) {
if layer == nil {
return nil, ErrLayerDoesNotExist
}
if ls.driver.String() == "overlay2" && !ls.driver.Exists(layer.cacheID) {
if err := ls.deleteCorruptLayer(layer); err != nil {
logrus.Errorf("delete corrupt layer failed when get with cached id %s, chain id %s, error %s", layer.cacheID, layer.chainID, err.Error())
return nil, err
}
return nil, ErrLayerDoesNotExist
}

return layer.getReference(), nil
}

func (ls *layerStore) deleteCorruptLayer(l *roLayer) error {
logrus.Infof("delete corrupt layer with cached id %s, chain id %s", l.cacheID, l.chainID)
delete(ls.layerMap, l.chainID)
err := ls.driver.Remove(l.cacheID)

return err
}

func (ls *layerStore) Map() map[ChainID]Layer {
ls.layerL.Lock()
defer ls.layerL.Unlock()
Expand Down

0 comments on commit 87a2fbb

Please sign in to comment.