/
reptar.go
120 lines (106 loc) · 2.89 KB
/
reptar.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
package reptar
import (
"archive/tar"
"compress/gzip"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"time"
)
var zeroTime time.Time
// References:
// http://h2.jaguarpaw.co.uk/posts/reproducible-tar/
// https://reproducible-builds.org/docs/archives/
// Reptar creates a tar of a location. Reptar stands for reproducible tar and is
// intended to replicate the following gnu tar command:
//
// tar - \
// --sort=name \
// --mtime="1970-01-01 00:00:00Z" \
// --owner=0 --group=0 --numeric-owner \
// --pax-option=exthdr.name=%d/PaxHeaders/%f,delete=atime,delete=ctime \
// -cf
//
// This command is currently not complete and only works on very basic test
// cases. GNU Tar also adds padding to outputted files
func Reptar(location string, out io.Writer) (err error) {
// TODO: add our own null padding to match GNU Tar
// TODO: test with hardlinks
// TODO: confirm name sorting is identical in all cases
// TODO: disallow absolute paths
tw := tar.NewWriter(out)
location = filepath.Clean(location)
if err = filepath.Walk(location, func(path string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
if location == path {
return nil
}
var linkTarget string
if isSymlink(fi) {
var err error
linkTarget, err = os.Readlink(path)
if err != nil {
return fmt.Errorf("%s: readlink: %w", fi.Name(), err)
}
// TODO: convert from absolute to relative
}
// GNU Tar adds a slash to the end of directories, but Go removes them
if fi.IsDir() {
path += "/"
}
hdr, err := tar.FileInfoHeader(fi, filepath.ToSlash(linkTarget))
if err != nil {
return err
}
// Setting an explicit unix epoch using time.Date(1970, time.January..)
// resulted in zeros in the timestamp and not null, so we explicitly use
// a null time
hdr.ModTime = zeroTime
hdr.AccessTime = zeroTime
hdr.ChangeTime = zeroTime
// It seems that both seeing these to 0 and using empty strings for
// Gname and Uname is required
hdr.Uid = 0
hdr.Gid = 0
hdr.Gname = ""
hdr.Uname = ""
// pax format
hdr.Format = tar.FormatPAX
hdr.Name = strings.TrimPrefix(path, location)
if err = tw.WriteHeader(hdr); err != nil {
return fmt.Errorf("%s: writing header: %w", hdr.Name, err)
}
if fi.IsDir() {
return nil // directories have no contents
}
if hdr.Typeflag == tar.TypeReg {
var file io.ReadCloser
file, err = os.Open(path)
if err != nil {
return fmt.Errorf("%s: opening: %w", path, err)
}
_, err := io.Copy(tw, file)
if err != nil {
return fmt.Errorf("%s: copying contents: %v", fi.Name(), err)
}
_ = file.Close()
}
return nil
}); err != nil {
return
}
return tw.Close()
}
// GzipReptar just wraps reptar in gzip.
func GzipReptar(location string, out io.Writer) (err error) {
w := gzip.NewWriter(out)
defer w.Close()
return Reptar(location, w)
}
func isSymlink(fi os.FileInfo) bool {
return fi.Mode()&os.ModeSymlink != 0
}