Skip to content

Commit

Permalink
Delete corrupt exist layers when pull image for overlay2 storage driver
Browse files Browse the repository at this point in the history
If lower layers corrupt with disk full, host or daemon crash or other reasons,
may let the layer link and lower file empty in the disk, which can lead new
image pull also get an invalid lower file, typically image lower file's
content ends with an colon. Despit the image pull success, but can not inspect
with error message "Error response from daemon: readlink /var/lib/docker/overlay2/l: invalid argument".
So, when pull image check exist layer, can add layer valid check logic. If
cause a corrupt layer, delete and pull register it again.

Signed-off-by: Jeff Zvier <zvier20@gmail.com>
  • Loading branch information
zvier committed Feb 23, 2022
1 parent 229bc1e commit f2d4cbb
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 6 deletions.
36 changes: 33 additions & 3 deletions daemon/graphdriver/overlay2/overlay.go
Original file line number Diff line number Diff line change
Expand Up @@ -663,10 +663,40 @@ func (d *Driver) Put(id string) error {
return nil
}

// Exists checks to see if the id is already mounted.
// Exists checks to see if the id is already mounted and valid.
func (d *Driver) Exists(id string) bool {
_, err := os.Stat(d.dir(id))
return err == nil
dir := d.dir(id)
_, err := os.Stat(dir)
if err != nil {
return false
}

// Check link file, link file must exist and has content.
link, err := os.ReadFile(path.Join(dir, "link"))
if err != nil {
return false
}
if len(link) == 0 {
return false
}

// Check lower file, lower file must has content if exist.
lowerFilePath := path.Join(dir, lowerFile)
_, err = os.Stat(lowerFilePath)
if os.IsNotExist(err) {
return true
}
if err != nil {
return false
}
lower, err := os.ReadFile(lowerFilePath)
if err != nil {
return false
}
if len(lower) == 0 {
return false
}
return true
}

// isParent determines whether the given parent is the direct parent of the
Expand Down
3 changes: 3 additions & 0 deletions layer/layer.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ var (
// attempted on a layer which does not exist.
ErrLayerDoesNotExist = errors.New("layer does not exist")

// ErrLayerCorrupt is used when layer cache metadata corrupt
ErrLayerCorrupt = errors.New("layer cache metadata corrupt")

// ErrLayerNotRetained is used when a release is
// attempted on a layer which is not retained.
ErrLayerNotRetained = errors.New("layer not retained")
Expand Down
31 changes: 28 additions & 3 deletions layer/layer_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,11 @@ func (ls *layerStore) registerWithDescriptor(ts io.Reader, parent ChainID, descr
return nil, err
}

// If storage driver is overlay2, check the layer cache metadata before register
if ls.driver.String() == "overlay2" && !ls.driver.Exists(layer.cacheID) {
return nil, ErrLayerCorrupt
}

if layer.parent == nil {
layer.chainID = ChainID(layer.diffID)
} else {
Expand All @@ -359,9 +364,14 @@ func (ls *layerStore) registerWithDescriptor(ts io.Reader, parent ChainID, descr
defer ls.layerL.Unlock()

if existingLayer := ls.getWithoutLock(layer.chainID); existingLayer != nil {
// Set error for cleanup, but do not return the error
err = errors.New("layer already exists")
return existingLayer.getReference(), nil
if ls.driver.String() != "overlay2" || ls.driver.Exists(existingLayer.cacheID) {
// Set error for cleanup, but do not return the error
err = errors.New("layer already exists")
return existingLayer.getReference(), nil
}
if err := ls.deleteCorruptLayer(existingLayer); err != nil {
logrus.Errorf("delete corrupt layer failed when register with cached id %s, chain id %s, error %s", existingLayer.cacheID, existingLayer.chainID, err.Error())
}
}

if err = tx.Commit(layer.chainID); err != nil {
Expand Down Expand Up @@ -398,10 +408,25 @@ func (ls *layerStore) Get(l ChainID) (Layer, error) {
if layer == nil {
return nil, ErrLayerDoesNotExist
}
if ls.driver.String() == "overlay2" && !ls.driver.Exists(layer.cacheID) {
if err := ls.deleteCorruptLayer(layer); err != nil {
logrus.Errorf("delete corrupt layer failed when get with cached id %s, chain id %s, error %s", layer.cacheID, layer.chainID, err.Error())
return nil, err
}
return nil, ErrLayerDoesNotExist
}

return layer.getReference(), nil
}

func (ls *layerStore) deleteCorruptLayer(l *roLayer) error {
logrus.Infof("delete corrupt layer with cached id %s, chain id %s", l.cacheID, l.chainID)
delete(ls.layerMap, l.chainID)
err := ls.driver.Remove(l.cacheID)

return err
}

func (ls *layerStore) Map() map[ChainID]Layer {
ls.layerL.Lock()
defer ls.layerL.Unlock()
Expand Down

0 comments on commit f2d4cbb

Please sign in to comment.