From a0c475671c4e13fd3092ac77c17cf15621820d77 Mon Sep 17 00:00:00 2001 From: Victor Farazdagi Date: Tue, 15 Dec 2020 06:18:15 -0800 Subject: [PATCH] Add extra methods to shared/fileutil (#8117) * add extra methods to fileutil * Shay's suggestion * Update shared/fileutil/fileutil.go Co-authored-by: Shay Zluf * Adds reference to the original implementation Co-authored-by: Shay Zluf --- shared/fileutil/fileutil.go | 110 ++++++++++++++++++++++ shared/fileutil/fileutil_test.go | 151 +++++++++++++++++++++++++++++++ 2 files changed, 261 insertions(+) diff --git a/shared/fileutil/fileutil.go b/shared/fileutil/fileutil.go index ba36f9263de..cd2bafc251e 100644 --- a/shared/fileutil/fileutil.go +++ b/shared/fileutil/fileutil.go @@ -1,11 +1,16 @@ package fileutil import ( + "crypto/sha256" + "encoding/base64" + "fmt" + "io" "io/ioutil" "os" "os/user" "path" "path/filepath" + "sort" "strings" "github.com/pkg/errors" @@ -139,3 +144,108 @@ func CopyFile(src, dst string) error { } return nil } + +// CopyDir copies contents of one directory into another, recursively. +func CopyDir(src, dst string) error { + dstExists, err := HasDir(dst) + if err != nil { + return err + } + if dstExists { + return errors.New("destination directory already exists") + } + fds, err := ioutil.ReadDir(src) + if err != nil { + return err + } + if err := MkdirAll(dst); err != nil { + return errors.Wrapf(err, "error creating directory: %s", dst) + } + for _, fd := range fds { + srcPath := path.Join(src, fd.Name()) + dstPath := path.Join(dst, fd.Name()) + if fd.IsDir() { + if err = CopyDir(srcPath, dstPath); err != nil { + return errors.Wrapf(err, "error copying directory %s -> %s", srcPath, dstPath) + } + } else { + if err = CopyFile(srcPath, dstPath); err != nil { + return errors.Wrapf(err, "error copying file %s -> %s", srcPath, dstPath) + } + } + } + return nil +} + +// DirsEqual checks whether two directories have the same content. +func DirsEqual(src, dst string) bool { + hash1, err := HashDir(src) + if err != nil { + return false + } + + hash2, err := HashDir(dst) + if err != nil { + return false + } + + return hash1 == hash2 +} + +// HashDir calculates and returns hash of directory: each file's hash is calculated and saved along +// with the file name into the list, after which list is hashed to produce the final signature. +// Implementation is based on https://github.com/golang/mod/blob/release-branch.go1.15/sumdb/dirhash/hash.go +func HashDir(dir string) (string, error) { + files, err := DirFiles(dir) + if err != nil { + return "", err + } + + h := sha256.New() + files = append([]string(nil), files...) + sort.Strings(files) + for _, file := range files { + fd, err := os.Open(filepath.Join(dir, file)) + if err != nil { + return "", err + } + hf := sha256.New() + _, err = io.Copy(hf, fd) + if err != nil { + return "", err + } + if err := fd.Close(); err != nil { + return "", err + } + if _, err := fmt.Fprintf(h, "%x %s\n", hf.Sum(nil), file); err != nil { + return "", err + } + } + return "hashdir:" + base64.StdEncoding.EncodeToString(h.Sum(nil)), nil +} + +// DirFiles returns list of files found within a given directory and its sub-directories. +// Directory prefix will not be included as a part of returned file string i.e. for a file located +// in "dir/foo/bar" only "foo/bar" part will be returned. +func DirFiles(dir string) ([]string, error) { + var files []string + dir = filepath.Clean(dir) + err := filepath.Walk(dir, func(file string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } + relFile := file + if dir != "." { + relFile = file[len(dir)+1:] + } + files = append(files, filepath.ToSlash(relFile)) + return nil + }) + if err != nil { + return nil, err + } + return files, nil +} diff --git a/shared/fileutil/fileutil_test.go b/shared/fileutil/fileutil_test.go index b242520a7e1..aa8dabc3da6 100644 --- a/shared/fileutil/fileutil_test.go +++ b/shared/fileutil/fileutil_test.go @@ -22,6 +22,7 @@ import ( "os" "os/user" "path/filepath" + "sort" "testing" "github.com/prysmaticlabs/prysm/shared/fileutil" @@ -114,6 +115,131 @@ func TestCopyFile(t *testing.T) { assert.Equal(t, true, deepCompare(t, fName, fName+"copy")) } +func TestCopyDir(t *testing.T) { + tmpDir1 := t.TempDir() + tmpDir2 := filepath.Join(t.TempDir(), "copyfolder") + type fileDesc struct { + path string + content []byte + } + fds := []fileDesc{ + { + path: "testfile1", + content: []byte{1, 2, 3}, + }, + { + path: "subfolder1/testfile1", + content: []byte{4, 5, 6}, + }, + { + path: "subfolder1/testfile2", + content: []byte{7, 8, 9}, + }, + { + path: "subfolder2/testfile1", + content: []byte{10, 11, 12}, + }, + { + path: "testfile2", + content: []byte{13, 14, 15}, + }, + } + require.NoError(t, os.MkdirAll(filepath.Join(tmpDir1, "subfolder1"), 0777)) + require.NoError(t, os.MkdirAll(filepath.Join(tmpDir1, "subfolder2"), 0777)) + for _, fd := range fds { + require.NoError(t, fileutil.WriteFile(filepath.Join(tmpDir1, fd.path), fd.content)) + assert.Equal(t, true, fileutil.FileExists(filepath.Join(tmpDir1, fd.path))) + assert.Equal(t, false, fileutil.FileExists(filepath.Join(tmpDir2, fd.path))) + } + + // Make sure that files are copied into non-existent directory only. If directory exists function exits. + assert.ErrorContains(t, "destination directory already exists", fileutil.CopyDir(tmpDir1, t.TempDir())) + require.NoError(t, fileutil.CopyDir(tmpDir1, tmpDir2)) + + // Now, all files should have been copied. + for _, fd := range fds { + assert.Equal(t, true, fileutil.FileExists(filepath.Join(tmpDir2, fd.path))) + assert.Equal(t, true, deepCompare(t, filepath.Join(tmpDir1, fd.path), filepath.Join(tmpDir2, fd.path))) + } + assert.Equal(t, true, fileutil.DirsEqual(tmpDir1, tmpDir2)) +} + +func TestDirsEqual(t *testing.T) { + t.Run("non-existent source directory", func(t *testing.T) { + assert.Equal(t, false, fileutil.DirsEqual(filepath.Join(t.TempDir(), "nonexistent"), t.TempDir())) + }) + + t.Run("non-existent dest directory", func(t *testing.T) { + assert.Equal(t, false, fileutil.DirsEqual(t.TempDir(), filepath.Join(t.TempDir(), "nonexistent"))) + }) + + t.Run("non-empty directory", func(t *testing.T) { + // Start with directories that do not have the same contents. + tmpDir1, tmpFileNames := tmpDirWithContents(t) + tmpDir2 := filepath.Join(t.TempDir(), "newfolder") + assert.Equal(t, false, fileutil.DirsEqual(tmpDir1, tmpDir2)) + + // Copy dir, and retest (hashes should match now). + require.NoError(t, fileutil.CopyDir(tmpDir1, tmpDir2)) + assert.Equal(t, true, fileutil.DirsEqual(tmpDir1, tmpDir2)) + + // Tamper the data, make sure that hashes do not match anymore. + require.NoError(t, os.Remove(filepath.Join(tmpDir1, tmpFileNames[2]))) + assert.Equal(t, false, fileutil.DirsEqual(tmpDir1, tmpDir2)) + }) +} + +func TestHashDir(t *testing.T) { + t.Run("non-existent directory", func(t *testing.T) { + hash, err := fileutil.HashDir(filepath.Join(t.TempDir(), "nonexistent")) + assert.ErrorContains(t, "no such file or directory", err) + assert.Equal(t, "", hash) + }) + + t.Run("empty directory", func(t *testing.T) { + hash, err := fileutil.HashDir(t.TempDir()) + assert.NoError(t, err) + assert.Equal(t, "hashdir:47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=", hash) + }) + + t.Run("non-empty directory", func(t *testing.T) { + tmpDir, _ := tmpDirWithContents(t) + hash, err := fileutil.HashDir(tmpDir) + assert.NoError(t, err) + assert.Equal(t, "hashdir:oSp9wRacwTIrnbgJWcwTvihHfv4B2zRbLYa0GZ7DDk0=", hash) + }) +} + +func TestDirFiles(t *testing.T) { + tmpDir, tmpDirFnames := tmpDirWithContents(t) + tests := []struct { + name string + path string + outFiles []string + }{ + { + name: "dot path", + path: filepath.Join(tmpDir, "/./"), + outFiles: tmpDirFnames, + }, + { + name: "non-empty folder", + path: tmpDir, + outFiles: tmpDirFnames, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + outFiles, err := fileutil.DirFiles(tt.path) + require.NoError(t, err) + + sort.Strings(outFiles) + assert.DeepEqual(t, tt.outFiles, outFiles) + }) + } +} + func deepCompare(t *testing.T, file1, file2 string) bool { sf, err := os.Open(file1) assert.NoError(t, err) @@ -129,3 +255,28 @@ func deepCompare(t *testing.T, file1, file2 string) bool { } return true } + +// tmpDirWithContents returns path to temporary directory having some folders/files in it. +// Directory is automatically removed by internal testing cleanup methods. +func tmpDirWithContents(t *testing.T) (string, []string) { + dir := t.TempDir() + fnames := []string{ + "file1", + "file2", + "subfolder1/file1", + "subfolder1/file2", + "subfolder1/subfolder11/file1", + "subfolder1/subfolder11/file2", + "subfolder1/subfolder12/file1", + "subfolder1/subfolder12/file2", + "subfolder2/file1", + } + require.NoError(t, os.MkdirAll(filepath.Join(dir, "subfolder1", "subfolder11"), 0777)) + require.NoError(t, os.MkdirAll(filepath.Join(dir, "subfolder1", "subfolder12"), 0777)) + require.NoError(t, os.MkdirAll(filepath.Join(dir, "subfolder2"), 0777)) + for _, fname := range fnames { + require.NoError(t, ioutil.WriteFile(filepath.Join(dir, fname), []byte(fname), 0777)) + } + sort.Strings(fnames) + return dir, fnames +}