Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more size intervals to obj size histogram #18772

Merged
merged 1 commit into from Jan 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
99 changes: 92 additions & 7 deletions cmd/data-usage-cache.go
@@ -1,4 +1,4 @@
// Copyright (c) 2015-2021 MinIO, Inc.
// Copyright (c) 2015-2023 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
Expand Down Expand Up @@ -32,6 +32,7 @@ import (
"time"

"github.com/cespare/xxhash/v2"
"github.com/dustin/go-humanize"
"github.com/klauspost/compress/zstd"
"github.com/minio/madmin-go/v3"
"github.com/minio/minio/internal/bucket/lifecycle"
Expand All @@ -46,6 +47,10 @@ import (
// dataUsageHash is the hash type used.
type dataUsageHash string

// sizeHistogramV1 is size histogram V1, which has fewer intervals esp. between
// 1024B and 1MiB.
type sizeHistogramV1 [dataUsageBucketLenV1]uint64

// sizeHistogram is a size histogram.
type sizeHistogram [dataUsageBucketLen]uint64

Expand Down Expand Up @@ -204,8 +209,8 @@ func (r *replicationAllStats) clone() *replicationAllStats {
return &dst
}

//msgp:encode ignore dataUsageEntryV2 dataUsageEntryV3 dataUsageEntryV4 dataUsageEntryV5 dataUsageEntryV6
//msgp:marshal ignore dataUsageEntryV2 dataUsageEntryV3 dataUsageEntryV4 dataUsageEntryV5 dataUsageEntryV6
//msgp:encode ignore dataUsageEntryV2 dataUsageEntryV3 dataUsageEntryV4 dataUsageEntryV5 dataUsageEntryV6 dataUsageEntryV7
//msgp:marshal ignore dataUsageEntryV2 dataUsageEntryV3 dataUsageEntryV4 dataUsageEntryV5 dataUsageEntryV6 dataUsageEntryV7

//msgp:tuple dataUsageEntryV2
type dataUsageEntryV2 struct {
Expand Down Expand Up @@ -263,14 +268,28 @@ type dataUsageEntryV6 struct {
Compacted bool
}

type dataUsageEntryV7 struct {
Children dataUsageHashMap `msg:"ch"`
// These fields do no include any children.
Size int64 `msg:"sz"`
Objects uint64 `msg:"os"`
Versions uint64 `msg:"vs"` // Versions that are not delete markers.
DeleteMarkers uint64 `msg:"dms"`
ObjSizes sizeHistogramV1 `msg:"szs"`
ObjVersions versionsHistogram `msg:"vh"`
ReplicationStats *replicationAllStats `msg:"rs,omitempty"`
AllTierStats *allTierStats `msg:"ats,omitempty"`
Compacted bool `msg:"c"`
}

// dataUsageCache contains a cache of data usage entries latest version.
type dataUsageCache struct {
Info dataUsageCacheInfo
Cache map[string]dataUsageEntry
}

//msgp:encode ignore dataUsageCacheV2 dataUsageCacheV3 dataUsageCacheV4 dataUsageCacheV5 dataUsageCacheV6
//msgp:marshal ignore dataUsageCacheV2 dataUsageCacheV3 dataUsageCacheV4 dataUsageCacheV5 dataUsageCacheV6
//msgp:encode ignore dataUsageCacheV2 dataUsageCacheV3 dataUsageCacheV4 dataUsageCacheV5 dataUsageCacheV6 dataUsageCacheV7
//msgp:marshal ignore dataUsageCacheV2 dataUsageCacheV3 dataUsageCacheV4 dataUsageCacheV5 dataUsageCacheV6 dataUsageCacheV7

// dataUsageCacheV2 contains a cache of data usage entries version 2.
type dataUsageCacheV2 struct {
Expand Down Expand Up @@ -302,6 +321,12 @@ type dataUsageCacheV6 struct {
Cache map[string]dataUsageEntryV6
}

// dataUsageCacheV7 contains a cache of data usage entries version 7.
type dataUsageCacheV7 struct {
Info dataUsageCacheInfo
Cache map[string]dataUsageEntryV7
}

//msgp:ignore dataUsageEntryInfo
type dataUsageEntryInfo struct {
Name string
Expand Down Expand Up @@ -750,11 +775,42 @@ func (h *sizeHistogram) add(size int64) {
}
}

// mergeV1 is used to migrate data usage cache from sizeHistogramV1 to
// sizeHistogram
func (h *sizeHistogram) mergeV1(v sizeHistogramV1) {
var oidx, nidx int
for oidx < len(v) {
intOld, intNew := ObjectsHistogramIntervalsV1[oidx], ObjectsHistogramIntervals[nidx]
// skip intervals that aren't common to both histograms
if intOld.start != intNew.start || intOld.end != intNew.end {
nidx++
continue
}
h[nidx] += v[oidx]
oidx++
nidx++
}
}

// toMap returns the map to a map[string]uint64.
func (h *sizeHistogram) toMap() map[string]uint64 {
res := make(map[string]uint64, dataUsageBucketLen)
var splCount uint64
for i, count := range h {
res[ObjectsHistogramIntervals[i].name] = count
szInt := ObjectsHistogramIntervals[i]
switch {
case humanize.KiByte == szInt.start && szInt.end == humanize.MiByte-1:
// spl interval: [1024B, 1MiB)
res[szInt.name] = splCount
case humanize.KiByte <= szInt.start && szInt.end <= humanize.MiByte-1:
// intervals that fall within the spl interval above; they
// appear earlier in this array of intervals, see
// ObjectsHistogramIntervals
splCount += count
fallthrough
default:
res[szInt.name] = count
}
}
return res
}
Expand Down Expand Up @@ -1027,7 +1083,8 @@ func (d *dataUsageCache) save(ctx context.Context, store objectIO, name string)
// Bumping the cache version will drop data from previous versions
// and write new data with the new version.
const (
dataUsageCacheVerCurrent = 7
dataUsageCacheVerCurrent = 8
dataUsageCacheVerV7 = 7
dataUsageCacheVerV6 = 6
dataUsageCacheVerV5 = 5
dataUsageCacheVerV4 = 4
Expand Down Expand Up @@ -1275,6 +1332,34 @@ func (d *dataUsageCache) deserialize(r io.Reader) error {
}
d.Cache[k] = due
}
return nil
case dataUsageCacheVerV7:
// Zstd compressed.
dec, err := zstd.NewReader(r, zstd.WithDecoderConcurrency(2))
if err != nil {
return err
}
defer dec.Close()
dold := &dataUsageCacheV7{}
if err = dold.DecodeMsg(msgp.NewReader(dec)); err != nil {
return err
}
d.Info = dold.Info
d.Cache = make(map[string]dataUsageEntry, len(dold.Cache))
for k, v := range dold.Cache {
var szHist sizeHistogram
szHist.mergeV1(v.ObjSizes)
d.Cache[k] = dataUsageEntry{
Children: v.Children,
Size: v.Size,
Objects: v.Objects,
Versions: v.Versions,
ObjSizes: szHist,
ReplicationStats: v.ReplicationStats,
Compacted: v.Compacted,
}
}

return nil
case dataUsageCacheVerCurrent:
// Zstd compressed.
Expand Down