Skip to content

Commit

Permalink
init: Add --copy-chunker-params option
Browse files Browse the repository at this point in the history
This allows creating multiple repositories with identical chunker
parameters which is required for working deduplication when copying
snapshots between different repositories.
  • Loading branch information
MichaelEischer committed Sep 19, 2020
1 parent 51465eb commit 270b82c
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 6 deletions.
7 changes: 7 additions & 0 deletions changelog/unreleased/issue-323
Expand Up @@ -9,5 +9,12 @@ and destination repository. Also, the transferred files are not re-chunked,
which may break deduplication between files already stored in the
destination repo and files copied there using this command.

To fully support deduplication between repositories when the copy command is
used, the init command now supports the `--copy-chunker-params` option,
which initializes the new repository with identical parameters for splitting
files into chunks as an already existing repository. This allows copied
snapshots to be equally deduplicated in both repositories.

https://github.com/restic/restic/issues/323
https://github.com/restic/restic/pull/2606
https://github.com/restic/restic/pull/2928
46 changes: 43 additions & 3 deletions cmd/restic/cmd_init.go
@@ -1,6 +1,7 @@
package main

import (
"github.com/restic/chunker"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/repository"

Expand All @@ -20,19 +21,36 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
`,
DisableAutoGenTag: true,
RunE: func(cmd *cobra.Command, args []string) error {
return runInit(globalOptions, args)
return runInit(initOptions, globalOptions, args)
},
}

// InitOptions bundles all options for the init command.
type InitOptions struct {
secondaryRepoOptions
CopyChunkerParameters bool
}

var initOptions InitOptions

func init() {
cmdRoot.AddCommand(cmdInit)

f := cmdInit.Flags()
initSecondaryRepoOptions(f, &initOptions.secondaryRepoOptions, "secondary", "to copy chunker parameter from")
f.BoolVar(&initOptions.CopyChunkerParameters, "copy-chunker-params", false, "copy chunker parameters from the secondary repository, useful in combination with the copy command")
}

func runInit(gopts GlobalOptions, args []string) error {
func runInit(opts InitOptions, gopts GlobalOptions, args []string) error {
if gopts.Repo == "" {
return errors.Fatal("Please specify repository location (-r)")
}

chunkerPolynomial, err := maybeReadChunkerPolynomial(opts, gopts)
if err != nil {
return err
}

be, err := create(gopts.Repo, gopts.extended)
if err != nil {
return errors.Fatalf("create repository at %s failed: %v\n", gopts.Repo, err)
Expand All @@ -47,7 +65,7 @@ func runInit(gopts GlobalOptions, args []string) error {

s := repository.New(be)

err = s.Init(gopts.ctx, gopts.password)
err = s.Init(gopts.ctx, gopts.password, chunkerPolynomial)
if err != nil {
return errors.Fatalf("create key in repository at %s failed: %v\n", gopts.Repo, err)
}
Expand All @@ -60,3 +78,25 @@ func runInit(gopts GlobalOptions, args []string) error {

return nil
}

func maybeReadChunkerPolynomial(opts InitOptions, gopts GlobalOptions) (*chunker.Pol, error) {
if opts.CopyChunkerParameters {
otherGopts, err := fillSecondaryGlobalOpts(opts.secondaryRepoOptions, gopts, "secondary")
if err != nil {
return nil, err
}

otherRepo, err := OpenRepository(otherGopts)
if err != nil {
return nil, err
}

pol := otherRepo.Config().ChunkerPolynomial
return &pol, nil
}

if opts.Repo != "" {
return nil, errors.Fatal("Secondary repository must only be specified when copying the chunker parameters")
}
return nil, nil
}
32 changes: 31 additions & 1 deletion cmd/restic/integration_test.go
Expand Up @@ -51,7 +51,7 @@ func testRunInit(t testing.TB, opts GlobalOptions) {
restic.TestDisableCheckPolynomial(t)
restic.TestSetLockTimeout(t, 0)

rtest.OK(t, runInit(opts, nil))
rtest.OK(t, runInit(InitOptions{}, opts, nil))
t.Logf("repository initialized at %v", opts.Repo)
}

Expand Down Expand Up @@ -731,6 +731,36 @@ func TestCopyIncremental(t *testing.T) {
len(copiedSnapshotIDs), len(snapshotIDs))
}

func TestInitCopyChunkerParams(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
env2, cleanup2 := withTestEnvironment(t)
defer cleanup2()

testRunInit(t, env2.gopts)

initOpts := InitOptions{
secondaryRepoOptions: secondaryRepoOptions{
Repo: env2.gopts.Repo,
password: env2.gopts.password,
},
}
rtest.Assert(t, runInit(initOpts, env.gopts, nil) != nil, "expected invalid init options to fail")

initOpts.CopyChunkerParameters = true
rtest.OK(t, runInit(initOpts, env.gopts, nil))

repo, err := OpenRepository(env.gopts)
rtest.OK(t, err)

otherRepo, err := OpenRepository(env2.gopts)
rtest.OK(t, err)

rtest.Assert(t, repo.Config().ChunkerPolynomial == otherRepo.Config().ChunkerPolynomial,
"expected equal chunker polynomials, got %v expected %v", repo.Config().ChunkerPolynomial,
otherRepo.Config().ChunkerPolynomial)
}

func testRunTag(t testing.TB, opts TagOptions, gopts GlobalOptions) {
rtest.OK(t, runTag(opts, gopts, []string{}))
}
Expand Down
25 changes: 24 additions & 1 deletion doc/045_working_with_repos.rst
Expand Up @@ -110,7 +110,8 @@ be skipped by later copy runs.
entire snapshot(s) due to the different encryption keys used in the source and
destination repository. Also, the transferred files are not re-chunked, which
may break deduplication between files already stored in the destination repo
and files copied there using this command.
and files copied there using this command. See the next section for how to avoid
this problem.

For the destination repository ``--repo2`` the password can be read from
a file ``--password-file2`` or from a command ``--password-command2``.
Expand Down Expand Up @@ -142,6 +143,28 @@ which case only these instead of all snapshots will be copied:
$ restic -r /srv/restic-repo copy --repo2 /srv/restic-repo-copy 410b18a2 4e5d5487 latest
Ensuring deduplication for copied snapshots
-------------------------------------------

Even though the copy command can transfer snapshots between arbitrary repositories,
deduplication between snapshots from the source and destination repository may not work.
To ensure proper deduplication, both repositories have to use the same parameters for
splitting large files into smaller chunks, which requires additional setup steps. With
the same parameters restic will for both repositories split identical files into
identical chunks and therefore deduplication also works for snapshots copied between
these repositories.

The chunker parameters are generated once when creating a new (destination) repository.
That is for a copy destination repository we have to instruct restic to initialize it
using the same chunker parameters as the source repository:

.. code-block:: console
$ restic -r /srv/restic-repo-copy init --repo2 /srv/restic-repo --copy-chunker-params
Note that it is not possible to change the chunker parameters of an existing repository.


Checking integrity and consistency
==================================

Expand Down
6 changes: 5 additions & 1 deletion internal/repository/repository.go
Expand Up @@ -8,6 +8,7 @@ import (
"io"
"os"

"github.com/restic/chunker"
"github.com/restic/restic/internal/cache"
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/debug"
Expand Down Expand Up @@ -614,7 +615,7 @@ func (r *Repository) SearchKey(ctx context.Context, password string, maxKeys int

// Init creates a new master key with the supplied password, initializes and
// saves the repository config.
func (r *Repository) Init(ctx context.Context, password string) error {
func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomial *chunker.Pol) error {
has, err := r.be.Test(ctx, restic.Handle{Type: restic.ConfigFile})
if err != nil {
return err
Expand All @@ -627,6 +628,9 @@ func (r *Repository) Init(ctx context.Context, password string) error {
if err != nil {
return err
}
if chunkerPolynomial != nil {
cfg.ChunkerPolynomial = *chunkerPolynomial
}

return r.init(ctx, password, cfg)
}
Expand Down

0 comments on commit 270b82c

Please sign in to comment.