Skip to content

Commit

Permalink
Merge pull request #4006 from MichaelEischer/deviceID-only-for-hardlinks
Browse files Browse the repository at this point in the history
archiver: only store deviceID for hardlinks
  • Loading branch information
MichaelEischer committed Mar 28, 2024
2 parents 870904d + cf81f8c commit 71b6284
Show file tree
Hide file tree
Showing 9 changed files with 113 additions and 8 deletions.
16 changes: 16 additions & 0 deletions changelog/unreleased/pull-4006
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Enhancement: (alpha) Store deviceID only for hardlinks

Set `RESTIC_FEATURES=device-id-for-hardlinks` to enable this alpha feature.
The feature flag will be removed after repository format version 3 becomes
available or be replaced with a different solution.

When creating backups from a filesystem snapshot, for example created using
btrfs subvolumes, the deviceID of the filesystem changes compared to previous
snapshots. This prevented restic from deduplicating the directory metadata of
a snapshot.

When this alpha feature is enabled, then the deviceID is only stored for
hardlinks. This significantly reduces the metadata duplication for most
backups.

https://github.com/restic/restic/pull/4006
1 change: 1 addition & 0 deletions changelog/unreleased/pull-4503
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ If files on different devices had the same inode id, then the `stats` command
did not correctly calculate the snapshot size. This has been fixed.

https://github.com/restic/restic/pull/4503
https://github.com/restic/restic/pull/4006
https://forum.restic.net/t/possible-bug-in-stats/6461/8
11 changes: 7 additions & 4 deletions cmd/restic/cmd_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,11 +270,14 @@ func statsWalkTree(repo restic.Loader, opts StatsOptions, stats *statsContainer,
// will still be restored
stats.TotalFileCount++

// if inodes are present, only count each inode once
// (hard links do not increase restore size)
if !hardLinkIndex.Has(node.Inode, node.DeviceID) || node.Inode == 0 {
hardLinkIndex.Add(node.Inode, node.DeviceID, struct{}{})
if node.Links == 1 || node.Type == "dir" {
stats.TotalSize += node.Size
} else {
// if hardlinks are present only count each deviceID+inode once
if !hardLinkIndex.Has(node.Inode, node.DeviceID) || node.Inode == 0 {
hardLinkIndex.Add(node.Inode, node.DeviceID, struct{}{})
stats.TotalSize += node.Size
}
}
}

Expand Down
9 changes: 9 additions & 0 deletions internal/archiver/archiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/feature"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/restic"
"golang.org/x/sync/errgroup"
Expand Down Expand Up @@ -188,6 +189,14 @@ func (arch *Archiver) nodeFromFileInfo(snPath, filename string, fi os.FileInfo)
if !arch.WithAtime {
node.AccessTime = node.ModTime
}
if feature.Flag.Enabled(feature.DeviceIDForHardlinks) {
if node.Links == 1 || node.Type == "dir" {
// the DeviceID is only necessary for hardlinked files
// when using subvolumes or snapshots their deviceIDs tend to change which causes
// restic to upload new tree blobs
node.DeviceID = 0
}
}
// overwrite name to match that within the snapshot
node.Name = path.Base(snPath)
if err != nil {
Expand Down
4 changes: 4 additions & 0 deletions internal/archiver/archiver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/restic/restic/internal/backend/mem"
"github.com/restic/restic/internal/checker"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/feature"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
Expand Down Expand Up @@ -2125,6 +2126,8 @@ const (
)

func TestMetadataChanged(t *testing.T) {
defer feature.TestSetFlag(t, feature.Flag, feature.DeviceIDForHardlinks, true)()

files := TestDir{
"testfile": TestFile{
Content: "foo bar test file",
Expand Down Expand Up @@ -2153,6 +2156,7 @@ func TestMetadataChanged(t *testing.T) {
sn, node2 := snapshot(t, repo, fs, nil, "testfile")

// set some values so we can then compare the nodes
want.DeviceID = 0
want.Content = node2.Content
want.Path = ""
if len(want.ExtendedAttributes) == 0 {
Expand Down
48 changes: 48 additions & 0 deletions internal/archiver/archiver_unix_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ package archiver
import (
"os"
"syscall"
"testing"

"github.com/restic/restic/internal/feature"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/restic"
restictest "github.com/restic/restic/internal/test"
)

type wrappedFileInfo struct {
Expand Down Expand Up @@ -39,3 +45,45 @@ func wrapFileInfo(fi os.FileInfo) os.FileInfo {

return res
}

func statAndSnapshot(t *testing.T, repo restic.Repository, name string) (*restic.Node, *restic.Node) {
fi := lstat(t, name)
want, err := restic.NodeFromFileInfo(name, fi)
restictest.OK(t, err)

_, node := snapshot(t, repo, fs.Local{}, nil, name)
return want, node
}

func TestHardlinkMetadata(t *testing.T) {
defer feature.TestSetFlag(t, feature.Flag, feature.DeviceIDForHardlinks, true)()

files := TestDir{
"testfile": TestFile{
Content: "foo bar test file",
},
"linktarget": TestFile{
Content: "test file",
},
"testlink": TestHardlink{
Target: "./linktarget",
},
"testdir": TestDir{},
}

tempdir, repo := prepareTempdirRepoSrc(t, files)

back := restictest.Chdir(t, tempdir)
defer back()

want, node := statAndSnapshot(t, repo, "testlink")
restictest.Assert(t, node.DeviceID == want.DeviceID, "device id mismatch expected %v got %v", want.DeviceID, node.DeviceID)
restictest.Assert(t, node.Links == want.Links, "link count mismatch expected %v got %v", want.Links, node.Links)
restictest.Assert(t, node.Inode == want.Inode, "inode mismatch expected %v got %v", want.Inode, node.Inode)

_, node = statAndSnapshot(t, repo, "testfile")
restictest.Assert(t, node.DeviceID == 0, "device id mismatch for testfile expected %v got %v", 0, node.DeviceID)

_, node = statAndSnapshot(t, repo, "testdir")
restictest.Assert(t, node.DeviceID == 0, "device id mismatch for testdir expected %v got %v", 0, node.DeviceID)
}
26 changes: 25 additions & 1 deletion internal/archiver/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"path"
"path/filepath"
"runtime"
"sort"
"strings"
"testing"
"time"
Expand Down Expand Up @@ -63,11 +64,29 @@ func (s TestSymlink) String() string {
return "<Symlink>"
}

// TestHardlink describes a hardlink created for a test.
type TestHardlink struct {
Target string
}

func (s TestHardlink) String() string {
return "<Hardlink>"
}

// TestCreateFiles creates a directory structure described by dir at target,
// which must already exist. On Windows, symlinks aren't created.
func TestCreateFiles(t testing.TB, target string, dir TestDir) {
t.Helper()
for name, item := range dir {

// ensure a stable order such that it can be guaranteed that a hardlink target already exists
var names []string
for name := range dir {
names = append(names, name)
}
sort.Strings(names)

for _, name := range names {
item := dir[name]
targetPath := filepath.Join(target, name)

switch it := item.(type) {
Expand All @@ -81,6 +100,11 @@ func TestCreateFiles(t testing.TB, target string, dir TestDir) {
if err != nil {
t.Fatal(err)
}
case TestHardlink:
err := fs.Link(filepath.Join(target, filepath.FromSlash(it.Target)), targetPath)
if err != nil {
t.Fatal(err)
}
case TestDir:
err := fs.Mkdir(targetPath, 0755)
if err != nil {
Expand Down
4 changes: 2 additions & 2 deletions internal/feature/registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@ var Flag = New()

// flag names are written in kebab-case
const (
ExampleFeature FlagName = "example-feature"
DeprecateLegacyIndex FlagName = "deprecate-legacy-index"
DeviceIDForHardlinks FlagName = "device-id-for-hardlinks"
)

func init() {
Flag.SetFlags(map[FlagName]FlagDesc{
ExampleFeature: {Type: Alpha, Description: "just for testing"},
DeprecateLegacyIndex: {Type: Beta, Description: "disable support for index format used by restic 0.1.0. Use `restic repair index` to update the index if necessary."},
DeviceIDForHardlinks: {Type: Alpha, Description: "store deviceID only for hardlinks to reduce metadata changes for example when using btrfs subvolumes. Will be removed in a future restic version after repository format 3 is available"},
})
}
2 changes: 1 addition & 1 deletion internal/restic/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ type Node struct {
User string `json:"user,omitempty"`
Group string `json:"group,omitempty"`
Inode uint64 `json:"inode,omitempty"`
DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev
DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev, only stored for hardlinks
Size uint64 `json:"size,omitempty"`
Links uint64 `json:"links,omitempty"`
LinkTarget string `json:"linktarget,omitempty"`
Expand Down

0 comments on commit 71b6284

Please sign in to comment.