Skip to content

Commit

Permalink
Resize cache as needed (#1925)
Browse files Browse the repository at this point in the history
Fixes #1781
  • Loading branch information
muhamadazmy committed Mar 20, 2023
1 parent 27800f5 commit 885bdea
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 21 deletions.
2 changes: 1 addition & 1 deletion cmds/modules/storaged/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func action(cli *cli.Context) error {
workerNr uint = cli.Uint("workers")
)

storageModule, err := storage.New()
storageModule, err := storage.New(cli.Context)
if err != nil {
return errors.Wrap(err, "failed to initialize storage module")
}
Expand Down
12 changes: 7 additions & 5 deletions pkg/storage/filesystem/btrfs.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,17 +249,19 @@ func (p *btrfsPool) Usage() (usage Usage, err error) {
return usage, errors.Wrapf(err, "failed to list pool '%s' volumes", mnt)
}

var used uint64
usage.Size = p.device.Size

for _, volume := range volumes {
usage, err := volume.Usage()
vol, err := volume.Usage()
if err != nil {
return Usage{}, errors.Wrapf(err, "failed to calculate volume '%s' usage", volume.Path())
}

used += usage.Used
usage.Used += vol.Used
usage.Excl += vol.Excl
}

return Usage{Size: p.device.Size, Used: used}, nil
return
}

func (p *btrfsPool) maintenance() error {
Expand Down Expand Up @@ -374,7 +376,7 @@ func (v *btrfsVolume) Usage() (usage Usage, err error) {
}
}

return Usage{Used: used, Size: group.MaxRfer}, nil
return Usage{Used: used, Size: group.MaxRfer, Excl: group.Excl}, nil
}

// Limit size of volume, setting size to 0 means unlimited
Expand Down
24 changes: 24 additions & 0 deletions pkg/storage/filesystem/filesystem.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,32 @@ import (

// Usage struct (in bytes)
type Usage struct {
// Size is allocated space for this Volume
// if 0 it means it has no limit.
// if it has no-limit, the Used attribute
// will be the total size of actual files
// inside the volume. It also means the Used
// can keep growing to the max possible which
// is the size of the pool
Size uint64
// Used can be one of 2 things:
// - If Size is not zero (so size is limited), Used will always equal to size
// because that's the total reserved space for that volume.
// - If Size is zero, (no limit) Used will be the total actual size of all
// files in that volume.
// The reason Used is done this way, it will make it easier to compute
// all allocated space in a pool by going over all volumes and add the
// used on each. It does not matter if this space is reserved but not used
// because it means we can't allocate over that.
//
// NOTE: Special case, if this is a `zdb` volume the Used is instead
// have the total size of reserved namespaces in that volume
Used uint64

// In case of `limited` volume (with quota) Excl will have a "guessed"
// value of the total used space by files. This value is not accurate
// and Used should be used instead for all capacity planning.
Excl uint64
}

// Volume represents a logical volume in the pool. Volumes can be nested
Expand Down
75 changes: 60 additions & 15 deletions pkg/storage/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,12 @@ const (
// CacheTarget is the path where the cache disk is mounted
CacheTarget = "/var/cache"
// cacheLabel is the name of the cache
cacheLabel = "zos-cache"
gib = 1024 * 1024 * 1024
cacheSize = 100 * gib
cacheLabel = "zos-cache"
gib = 1024 * 1024 * 1024
cacheSize = 5 * gib
cacheGrowPercent = 80
cacheShrinkPercent = 20
cacheCheckDuration = 30 * time.Minute
)

var (
Expand Down Expand Up @@ -84,7 +87,7 @@ func (t *TypeCache) Get(name string) (pkg.DeviceType, bool) {
}

// New create a new storage module service
func New() (*Module, error) {
func New(ctx context.Context) (*Module, error) {
m := filesystem.DefaultDeviceManager()
cache, err := cache.VolatileDir("storage", 1*cache.Megabyte)
if err != nil && !os.IsExist(err) {
Expand All @@ -99,7 +102,7 @@ func New() (*Module, error) {
}

// go for a simple linear setup right now
return s, s.initialize()
return s, s.initialize(ctx)
}

// Total gives the total amount of storage available for a device type
Expand Down Expand Up @@ -164,7 +167,7 @@ What Initialize will do is the following:
*
*/
func (s *Module) initialize() error {
func (s *Module) initialize(ctx context.Context) error {
// lock for the entire initialization method, so other code which relies
// on this observes this as an atomic operation
s.mu.Lock()
Expand All @@ -175,10 +178,10 @@ func (s *Module) initialize() error {
log.Debug().Bool("is-vm", vm).Msg("debugging virtualization detection")

// Make sure we finish in 1 minute
ctx, cancel := context.WithTimeout(context.Background(), time.Minute*1)
subCtx, cancel := context.WithTimeout(context.Background(), time.Minute*1)
defer cancel()

devices, err := s.devices.Devices(ctx)
devices, err := s.devices.Devices(subCtx)
if err != nil {
return err
}
Expand Down Expand Up @@ -278,11 +281,11 @@ func (s *Module) initialize() error {
s.dump()

// just in case
if err := filesystem.Partprobe(ctx); err != nil {
if err := filesystem.Partprobe(subCtx); err != nil {
return err
}

if err := s.ensureCache(); err != nil {
if err := s.ensureCache(ctx); err != nil {
log.Error().Err(err).Msg("Error ensuring cache")
return err
}
Expand Down Expand Up @@ -561,7 +564,7 @@ func (s *Module) Cache() (pkg.Volume, error) {
}

// ensureCache creates a "cache" subvolume and mounts it in /var
func (s *Module) ensureCache() error {
func (s *Module) ensureCache(ctx context.Context) error {
log.Info().Msgf("Setting up cache")

log.Debug().Msgf("Checking pools for existing cache")
Expand Down Expand Up @@ -620,10 +623,7 @@ func (s *Module) ensureCache() error {

_ = app.DeleteFlag(app.LimitedCache)

log.Info().Msgf("set cache quota to %d GiB", cacheSize/gib)
if err := cacheFs.Limit(cacheSize); err != nil {
log.Error().Err(err).Msg("failed to set cache quota")
}
go s.watchCache(ctx, cacheFs)

if !filesystem.IsMountPoint(CacheTarget) {
log.Debug().Msgf("Mounting cache partition in %s", CacheTarget)
Expand All @@ -634,6 +634,51 @@ func (s *Module) ensureCache() error {
return nil
}

func (s *Module) checkAndResizeCache(cache filesystem.Volume, sizeMultiplier uint64) error {
usage, err := cache.Usage()
if err != nil {
return errors.Wrap(err, "failed to check cache usage")
}
log.Debug().Msgf("cache usage %+v", usage)
percent := usage.Excl * 100 / usage.Size
size := usage.Size
if percent >= cacheGrowPercent {
size += sizeMultiplier
} else if percent < cacheShrinkPercent && size > sizeMultiplier {
// if we go below 20% of the size of the cache
// we can assume the cache size can set comfortably
// around double the needed space
size = usage.Excl * 2
// then ceiled it to number of cache sizes (multipleOf)
size = ((size / sizeMultiplier) * sizeMultiplier) + sizeMultiplier
}

if size < sizeMultiplier {
size = sizeMultiplier
}
if usage.Size == size {
return nil
}

log.Info().Uint64("size", size).Msg("setting cache size")
return cache.Limit(size)
}

// watchCache will watch the system cache and increase (or decrease)
// it's size as needed
func (s *Module) watchCache(ctx context.Context, cache filesystem.Volume) {
for {
if err := s.checkAndResizeCache(cache, cacheSize); err != nil {
log.Error().Err(err).Msg("error while checking cache size")
}
select {
case <-time.After(cacheCheckDuration):
case <-ctx.Done():
return
}
}
}

// createSubvolWithQuota creates a subvolume with the given name and limits it to the given size
// if the requested disk type does not have a storage pool with enough free size available, an error is returned
// this methods does set a quota limit equal to size on the created volume
Expand Down
68 changes: 68 additions & 0 deletions pkg/storage/storage_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,3 +399,71 @@ func TestVDiskFindCandidatesOverProvision(t *testing.T) {
require.Error(err)

}

func TestCacheResize(t *testing.T) {
// resize down
var m Module
cacheSize := uint64(5)

vol := testVolume{
usage: filesystem.Usage{
Size: 100,
Used: 100,
Excl: 1,
},
}
vol.On("Limit", uint64(cacheSize)).Return(nil)
err := m.checkAndResizeCache(&vol, cacheSize)
require.NoError(t, err)

vol = testVolume{
usage: filesystem.Usage{
Size: 100,
Used: 100,
Excl: 19,
},
}
// the limit is then set to double the 19
// = 19 * 2 = 38
// this then is ceiled to multiple of cacheSize
// so (38/5)* 5 = 35
// then 35 + 5 = 40
vol.On("Limit", uint64(40)).Return(nil)
err = m.checkAndResizeCache(&vol, cacheSize)
require.NoError(t, err)

// resize down
vol = testVolume{
usage: filesystem.Usage{
Size: 100,
Used: 100,
Excl: 0, // no files
},
}
vol.On("Limit", uint64(cacheSize)).Return(nil)
err = m.checkAndResizeCache(&vol, cacheSize)
require.NoError(t, err)

// resize up
vol = testVolume{
usage: filesystem.Usage{
Size: 100,
Used: 100,
Excl: 91,
},
}
vol.On("Limit", uint64(100+cacheSize)).Return(nil)
err = m.checkAndResizeCache(&vol, cacheSize)
require.NoError(t, err)

// leave as is
vol = testVolume{
usage: filesystem.Usage{
Size: 100,
Used: 100,
Excl: 50,
},
}
err = m.checkAndResizeCache(&vol, cacheSize)
require.NoError(t, err)
}

0 comments on commit 885bdea

Please sign in to comment.