Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(repository): Add support to configure metadata compression algorithm #550

Closed
wants to merge 9 commits into from
6 changes: 5 additions & 1 deletion cli/command_policy_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,5 +309,9 @@ func supportedCompressionAlgorithms() []string {

sort.Strings(res)

return append([]string{inheritPolicyString, "none"}, res...)
return append([]string{"none"}, res...)
}

func withInherit(algos []string) []string {
return append([]string{inheritPolicyString}, algos...)
}
2 changes: 1 addition & 1 deletion cli/command_policy_set_compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ type policyCompressionFlags struct {

func (c *policyCompressionFlags) setup(cmd *kingpin.CmdClause) {
// Name of compression algorithm.
cmd.Flag("compression", "Compression algorithm").EnumVar(&c.policySetCompressionAlgorithm, supportedCompressionAlgorithms()...)
cmd.Flag("compression", "Compression algorithm").EnumVar(&c.policySetCompressionAlgorithm, withInherit(supportedCompressionAlgorithms())...)
cmd.Flag("compression-min-size", "Min size of file to attempt compression for").StringVar(&c.policySetCompressionMinSize)
cmd.Flag("compression-max-size", "Max size of file to attempt compression for").StringVar(&c.policySetCompressionMaxSize)

Expand Down
3 changes: 2 additions & 1 deletion cli/command_repository_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ func (c *commandRepositoryCreate) newRepositoryOptionsFromFlags() *repo.NewRepos
return &repo.NewRepositoryOptions{
BlockFormat: format.ContentFormat{
MutableParameters: format.MutableParameters{
Version: format.Version(c.createFormatVersion),
Version: format.Version(c.createFormatVersion),
MetadataCompression: format.DefaultMetadataCompressionAlgorithmName,
},
Hash: c.createBlockHashFormat,
Encryption: c.createBlockEncryptionFormat,
Expand Down
18 changes: 18 additions & 0 deletions cli/command_repository_set_parameters.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/kopia/kopia/internal/units"
"github.com/kopia/kopia/repo"
"github.com/kopia/kopia/repo/blob"
"github.com/kopia/kopia/repo/compression"
"github.com/kopia/kopia/repo/format"
"github.com/kopia/kopia/repo/maintenance"
)
Expand All @@ -35,6 +36,8 @@ type commandRepositorySetParameters struct {
removeRequiredFeature string
warnOnMissingRequiredFeature bool

metadataCompression string

svc appServices
}

Expand All @@ -56,6 +59,8 @@ func (c *commandRepositorySetParameters) setup(svc appServices, parent commandPa
cmd.Flag("epoch-delete-parallelism", "Epoch delete parallelism").IntVar(&c.epochDeleteParallelism)
cmd.Flag("epoch-checkpoint-frequency", "Checkpoint frequency").IntVar(&c.epochCheckpointFrequency)

cmd.Flag("metadata-compression", "Metadata Compression algorithm").EnumVar(&c.metadataCompression, supportedCompressionAlgorithms()...)

if svc.enableTestOnlyFlags() {
cmd.Flag("add-required-feature", "Add required feature which must be present to open the repository").Hidden().StringVar(&c.addRequiredFeature)
cmd.Flag("remove-required-feature", "Remove required feature").Hidden().StringVar(&c.removeRequiredFeature)
Expand Down Expand Up @@ -100,6 +105,17 @@ func (c *commandRepositorySetParameters) setIntParameter(ctx context.Context, v
log(ctx).Infof(" - setting %v to %v.\n", desc, v)
}

func (c *commandRepositorySetParameters) setMetadataCompressionParameter(ctx context.Context, v string, desc string, dst *compression.Name, anyChange *bool) {
if v == "" {
return
}

*dst = compression.Name(v)
*anyChange = true

log(ctx).Infof(" - setting %v to %v.\n", desc, v)
}

PrasadG193 marked this conversation as resolved.
Show resolved Hide resolved
func (c *commandRepositorySetParameters) setDurationParameter(ctx context.Context, v time.Duration, desc string, dst *time.Duration, anyChange *bool) {
if v == 0 {
return
Expand Down Expand Up @@ -225,6 +241,8 @@ func (c *commandRepositorySetParameters) run(ctx context.Context, rep repo.Direc
c.setIntParameter(ctx, c.epochDeleteParallelism, "epoch delete parallelism", &mp.EpochParameters.DeleteParallelism, &anyChange)
c.setIntParameter(ctx, c.epochCheckpointFrequency, "epoch checkpoint frequency", &mp.EpochParameters.FullCheckpointFrequency, &anyChange)

c.setMetadataCompressionParameter(ctx, c.metadataCompression, "metadata compression algorithm", &mp.MetadataCompression, &anyChange)

requiredFeatures = c.addRemoveUpdateRequiredFeatures(requiredFeatures, &anyChange)

if !anyChange {
Expand Down
18 changes: 18 additions & 0 deletions cli/command_repository_set_parameters_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,3 +281,21 @@ func (s *formatSpecificTestSuite) TestRepositorySetParametersRequiredFeatures_Se
// the server will soon notice the new required feature and shut down.
require.ErrorContains(t, wait(), "no-such-feature")
}

func (s *formatSpecificTestSuite) TestRepositorySetMetadataCompressionParameter(t *testing.T) {
env := s.setupInMemoryRepo(t)

// default metadata compression
out := env.RunAndExpectSuccess(t, "repository", "status")
require.Contains(t, out, "Metadata compression: zstd-fastest")

// disable metadata compression
env.RunAndExpectSuccess(t, "repository", "set-parameters", "--metadata-compression", "none")
out = env.RunAndExpectSuccess(t, "repository", "status")
require.Contains(t, out, "Metadata compression: disabled")

// set metadata compression
env.RunAndExpectSuccess(t, "repository", "set-parameters", "--metadata-compression", "zstd-best-compression")
out = env.RunAndExpectSuccess(t, "repository", "status")
require.Contains(t, out, "Metadata compression: zstd-best-compression")
}
11 changes: 11 additions & 0 deletions cli/command_repository_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,17 @@ func (c *commandRepositoryStatus) run(ctx context.Context, rep repo.Repository)
c.out.printStdout("Max pack length: %v\n", units.BytesString(int64(mp.MaxPackSize)))
c.out.printStdout("Index Format: v%v\n", mp.IndexVersion)

c.out.printStdout("\n")
switch {
case mp.MetadataCompression == "none":
c.out.printStdout("Metadata compression: disabled\n")
case mp.MetadataCompression != "":
c.out.printStdout("Metadata compression: %v\n", mp.MetadataCompression)
default:
// For older repo where MetadataCompression is not set, use ZstdFastest algorithm by default
c.out.printStdout("Metadata compression: %s\n", format.DefaultMetadataCompressionAlgorithmName)
}

emgr, epochMgrEnabled, emerr := dr.ContentReader().EpochManager(ctx)
if emerr != nil {
return errors.Wrap(emerr, "epoch manager")
Expand Down
15 changes: 12 additions & 3 deletions repo/content/content_manager_lock_free.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,18 @@ func (sm *SharedManager) maybeCompressAndEncryptDataForPacking(data gather.Bytes

// If the content is prefixed (which represents Kopia's own metadata as opposed to user data),
// and we're on V2 format or greater, enable internal compression even when not requested.
if contentID.HasPrefix() && comp == NoCompression && mp.IndexVersion >= index.Version2 {
// 'zstd-fastest' has a good mix of being fast, low memory usage and high compression for JSON.
comp = compression.HeaderZstdFastest
// comp will be overridden by the configured metadata compression value in repository params.
if contentID.HasPrefix() && mp.IndexVersion >= index.Version2 {
mp := sm.format.GetCachedMutableParameters()
switch {
case mp.MetadataCompression == "none":
comp = NoCompression
case mp.MetadataCompression != "":
comp = compression.ByName[mp.MetadataCompression].HeaderID()
default:
// For older repo where MetadataCompression is not set, use ZstdFastest algorithm by default
comp = compression.ByName[format.DefaultMetadataCompressionAlgorithmName].HeaderID()
}
plar marked this conversation as resolved.
Show resolved Hide resolved
}

//nolint:nestif
Expand Down
66 changes: 49 additions & 17 deletions repo/content/content_manager_test.go

Large diffs are not rendered by default.

13 changes: 9 additions & 4 deletions repo/format/content_format.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,13 @@ import (

"github.com/kopia/kopia/internal/epoch"
"github.com/kopia/kopia/internal/units"
"github.com/kopia/kopia/repo/compression"
"github.com/kopia/kopia/repo/content/index"
)

// 'zstd-fastest' has a good mix of being fast, low memory usage and high compression for JSON.
const DefaultMetadataCompressionAlgorithmName = "zstd-fastest"

// ContentFormat describes the rules for formatting contents in repository.
type ContentFormat struct {
Hash string `json:"hash,omitempty"` // identifier of the hash algorithm used
Expand Down Expand Up @@ -63,10 +67,11 @@ func (f *ContentFormat) SupportsPasswordChange() bool {
// MutableParameters represents parameters of the content manager that can be mutated after the repository
// is created.
type MutableParameters struct {
Version Version `json:"version,omitempty"` // version number, must be "1", "2" or "3"
MaxPackSize int `json:"maxPackSize,omitempty"` // maximum size of a pack object
IndexVersion int `json:"indexVersion,omitempty"` // force particular index format version (1,2,..)
EpochParameters epoch.Parameters `json:"epochParameters,omitempty"` // epoch manager parameters
Version Version `json:"version,omitempty"` // version number, must be "1", "2" or "3"
MaxPackSize int `json:"maxPackSize,omitempty"` // maximum size of a pack object
IndexVersion int `json:"indexVersion,omitempty"` // force particular index format version (1,2,..)
EpochParameters epoch.Parameters `json:"epochParameters,omitempty"` // epoch manager parameters
MetadataCompression compression.Name `json:"metadataCompression,omitempty"` // metadata compression algorithm name

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The approach implemented in this PR is to have a repo-wide setting in MutableParameters.

Does it make sense to make this a policy setting instead?
Are there any tradeoffs in doing so? and which ones?

Copy link
Collaborator Author

@PrasadG193 PrasadG193 Jun 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main disadvantage was the implementation and testing complexity. Since this feature is not expected to be used frequently (applicable only to special cases), we decided to go with the easiest option which will be quicker to implement. We don't need to set metadata compression settings per file. It's applicable for the whole repo instead of the file level.

}

// Validate validates the parameters.
Expand Down
Loading