-
Notifications
You must be signed in to change notification settings - Fork 568
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3713 from pachyderm/fileset-serialization-mvp
Fileset Serialization MVP
- Loading branch information
Showing
11 changed files
with
306 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
package fileset | ||
|
||
import ( | ||
"archive/tar" | ||
"bytes" | ||
"context" | ||
"fmt" | ||
"io" | ||
"math/rand" | ||
"strconv" | ||
"testing" | ||
"time" | ||
|
||
"github.com/pachyderm/pachyderm/src/client/pkg/require" | ||
"github.com/pachyderm/pachyderm/src/server/pkg/storage/chunk" | ||
"github.com/pachyderm/pachyderm/src/server/pkg/storage/fileset/index" | ||
) | ||
|
||
const ( | ||
max = 20 * chunk.MB | ||
) | ||
|
||
type file struct { | ||
hashes []string | ||
data []byte | ||
} | ||
|
||
func dataRefsToHashes(dataRefs []*chunk.DataRef) []string { | ||
var hashes []string | ||
for _, dataRef := range dataRefs { | ||
if dataRef.Hash == "" { | ||
hashes = append(hashes, dataRef.Chunk.Hash) | ||
continue | ||
} | ||
hashes = append(hashes, dataRef.Hash) | ||
} | ||
return hashes | ||
} | ||
|
||
func seedStr(seed int64) string { | ||
return fmt.Sprint("seed: ", strconv.FormatInt(seed, 10)) | ||
} | ||
|
||
func TestWriteThenRead(t *testing.T) { | ||
objC, chunks := chunk.LocalStorage(t) | ||
defer func() { | ||
chunks.DeleteAll(context.Background()) | ||
objC.Delete(context.Background(), chunk.Prefix) | ||
}() | ||
fileNames := index.Generate("abc") | ||
files := make(map[string]*file) | ||
seed := time.Now().UTC().UnixNano() | ||
rand.Seed(seed) | ||
for _, fileName := range fileNames { | ||
files[fileName] = &file{ | ||
data: chunk.RandSeq(rand.Intn(max)), | ||
} | ||
} | ||
// Write out ten filesets where each subsequent fileset has the content of one random file changed. | ||
// Confirm that all of the content and hashes other than the changed file remain the same. | ||
// (bryce) we are going to want a dedupe test somewhere, not sure if it makes sense here or in the chunk | ||
// storage layer (probably in the chunk storage layer). | ||
for i := 0; i < 10; i++ { | ||
// Write files to file set. | ||
w := NewWriter(context.Background(), chunks) | ||
for _, fileName := range fileNames { | ||
hdr := &index.Header{ | ||
Hdr: &tar.Header{ | ||
Name: fileName, | ||
Size: int64(len(files[fileName].data)), | ||
}, | ||
} | ||
require.NoError(t, w.WriteHeader(hdr), seedStr(seed)) | ||
_, err := w.Write(files[fileName].data) | ||
require.NoError(t, err, seedStr(seed)) | ||
} | ||
idx, err := w.Close() | ||
require.NoError(t, err, seedStr(seed)) | ||
// Read files from file set, checking against recorded data and hashes. | ||
r := NewReader(context.Background(), chunks, idx, "") | ||
for _, fileName := range fileNames { | ||
hdr, err := r.Next() | ||
require.NoError(t, err, seedStr(seed)) | ||
actualHashes := dataRefsToHashes(hdr.Idx.DataOp.DataRefs) | ||
// If no hashes are recorded (first iteration or changed file), | ||
// then set them based on what was read. | ||
if len(files[fileName].hashes) == 0 { | ||
files[fileName].hashes = actualHashes | ||
} | ||
require.Equal(t, files[fileName].hashes, actualHashes, seedStr(seed)) | ||
actualData := &bytes.Buffer{} | ||
_, err = io.Copy(actualData, r) | ||
require.NoError(t, err, seedStr(seed)) | ||
require.Equal(t, files[fileName].data, actualData.Bytes(), seedStr(seed)) | ||
} | ||
// Change one random file | ||
for fileName := range files { | ||
files[fileName] = &file{ | ||
data: chunk.RandSeq(rand.Intn(max)), | ||
} | ||
break | ||
} | ||
require.NoError(t, chunks.DeleteAll(context.Background()), seedStr(seed)) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package index | ||
|
||
import "sort" | ||
|
||
// Perm calls f with each permutation of a. | ||
func Perm(a []rune, f func([]rune)) { | ||
perm(a, f, 0) | ||
} | ||
|
||
// Permute the values at index i to len(a)-1. | ||
func perm(a []rune, f func([]rune), i int) { | ||
if i > len(a) { | ||
f(a) | ||
return | ||
} | ||
perm(a, f, i+1) | ||
for j := i + 1; j < len(a); j++ { | ||
a[i], a[j] = a[j], a[i] | ||
perm(a, f, i+1) | ||
a[i], a[j] = a[j], a[i] | ||
} | ||
} | ||
|
||
// Generate generates the permutations of the passed in string and returns them sorted. | ||
func Generate(s string) []string { | ||
fileNames := []string{} | ||
Perm([]rune(s), func(fileName []rune) { | ||
fileNames = append(fileNames, string(fileName)) | ||
}) | ||
sort.Strings(fileNames) | ||
return fileNames | ||
} |
Oops, something went wrong.