diff --git a/pkg/archive/archive.go b/pkg/archive/archive.go index 44e193d9fa9da..acb9f176d8287 100644 --- a/pkg/archive/archive.go +++ b/pkg/archive/archive.go @@ -7,6 +7,7 @@ import ( "compress/bzip2" "compress/gzip" "context" + "encoding/binary" "fmt" "io" "os" @@ -124,15 +125,59 @@ func IsArchivePath(path string) bool { return err == nil } +const ( + zstdMagicSkippableStart = 0x184D2A50 + zstdMagicSkippableMask = 0xFFFFFFF0 +) + +var ( + bzip2Magic = []byte{0x42, 0x5A, 0x68} + gzipMagic = []byte{0x1F, 0x8B, 0x08} + xzMagic = []byte{0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00} + zstdMagic = []byte{0x28, 0xb5, 0x2f, 0xfd} +) + +type matcher = func([]byte) bool + +func magicNumberMatcher(m []byte) matcher { + return func(source []byte) bool { + return bytes.HasPrefix(source, m) + } +} + +// zstdMatcher detects zstd compression algorithm. +// Zstandard compressed data is made of one or more frames. +// There are two frame formats defined by Zstandard: Zstandard frames and Skippable frames. +// See https://tools.ietf.org/id/draft-kucherawy-dispatch-zstd-00.html#rfc.section.2 for more details. +func zstdMatcher() matcher { + return func(source []byte) bool { + if bytes.HasPrefix(source, zstdMagic) { + // Zstandard frame + return true + } + // skippable frame + if len(source) < 8 { + return false + } + // magic number from 0x184D2A50 to 0x184D2A5F. + if binary.LittleEndian.Uint32(source[:4])&zstdMagicSkippableMask == zstdMagicSkippableStart { + return true + } + return false + } +} + // DetectCompression detects the compression algorithm of the source. func DetectCompression(source []byte) Compression { - for compression, m := range map[Compression][]byte{ - Bzip2: {0x42, 0x5A, 0x68}, - Gzip: {0x1F, 0x8B, 0x08}, - Xz: {0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00}, - Zstd: {0x28, 0xb5, 0x2f, 0xfd}, - } { - if bytes.HasPrefix(source, m) { + compressionMap := map[Compression]matcher{ + Bzip2: magicNumberMatcher(bzip2Magic), + Gzip: magicNumberMatcher(gzipMagic), + Xz: magicNumberMatcher(xzMagic), + Zstd: zstdMatcher(), + } + for _, compression := range []Compression{Bzip2, Gzip, Xz, Zstd} { + fn := compressionMap[compression] + if fn(source) { return compression } } diff --git a/pkg/archive/archive_test.go b/pkg/archive/archive_test.go index 0331c825baefb..a1b894be3c643 100644 --- a/pkg/archive/archive_test.go +++ b/pkg/archive/archive_test.go @@ -700,6 +700,34 @@ func tarUntar(t *testing.T, origin string, options *TarOptions) ([]Change, error return ChangesDirs(origin, tmp) } +func TestDetectCompressionZstd(t *testing.T) { + // test zstd compression without skippable frames. + compressedData := []byte{ + 0x28, 0xb5, 0x2f, 0xfd, // magic number of Zstandard frame: 0xFD2FB528 + 0x04, 0x00, 0x31, 0x00, 0x00, // frame header + 0x64, 0x6f, 0x63, 0x6b, 0x65, 0x72, // data block "docker" + 0x16, 0x0e, 0x21, 0xc3, // content checksum + } + compression := DetectCompression(compressedData) + if compression != Zstd { + t.Fatal("Unexpected compression") + } + // test zstd compression with skippable frames. + hex := []byte{ + 0x50, 0x2a, 0x4d, 0x18, // magic number of skippable frame: 0x184D2A50 to 0x184D2A5F + 0x04, 0x00, 0x00, 0x00, // frame size + 0x5d, 0x00, 0x00, 0x00, // user data + 0x28, 0xb5, 0x2f, 0xfd, // magic number of Zstandard frame: 0xFD2FB528 + 0x04, 0x00, 0x31, 0x00, 0x00, // frame header + 0x64, 0x6f, 0x63, 0x6b, 0x65, 0x72, // data block "docker" + 0x16, 0x0e, 0x21, 0xc3, // content checksum + } + compression = DetectCompression(hex) + if compression != Zstd { + t.Fatal("Unexpected compression") + } +} + func TestTarUntar(t *testing.T) { origin, err := os.MkdirTemp("", "docker-test-untar-origin") if err != nil {