-
Notifications
You must be signed in to change notification settings - Fork 386
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
cmd/tools: add is-valid-sj1-blob utility
Takes a list of filenames, and evaluates each to see if it appears to be a valid sj1 blob. (Any arbitrary blob is a valid FormatV0 blob, and we can't tell anything helpful about the blob in that case, so we don't bother checking for that.) If the blob does not appear to be valid, output diagnostic information. If it does appear to be valid, show the blob's correct filename inside a storage node blobs directory. Change-Id: I2c1cdde16c3b8c1a9953f4a0a2df816869f27409
- Loading branch information
Showing
1 changed file
with
212 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
// Copyright (C) 2023 Storj Labs, Inc. | ||
// See LICENSE for copying information. | ||
|
||
package main | ||
|
||
import ( | ||
"bytes" | ||
"crypto/sha256" | ||
"encoding/base32" | ||
"encoding/binary" | ||
"errors" | ||
"flag" | ||
"fmt" | ||
"io" | ||
"os" | ||
"time" | ||
|
||
"github.com/zeebo/blake3" | ||
|
||
"storj.io/common/pb" | ||
"storj.io/storj/storagenode/pieces" | ||
) | ||
|
||
var ( | ||
doFindCorrectLength = flag.Bool("find-length", false, | ||
"If set, when a file with invalid length is detected, try to determine the correct length by evaluating "+ | ||
"the hash after every byte. This may be slow.") | ||
) | ||
|
||
const ( | ||
v1PieceHeaderFramingSize = 2 | ||
// Rather than look up all the details of how a signature digest is encoded with its salt and how big a salt can be, | ||
// I'll establish some too-loose bounds based purely on observation. | ||
minSignatureSize = sha256.Size | ||
maxSignatureSize = 80 | ||
) | ||
|
||
var ( | ||
wayTooEarly = time.Date(2013, 1, 1, 0, 0, 0, 0, time.UTC) | ||
wayTooLate = time.Now().AddDate(0, 0, 1) | ||
|
||
pathEncoding = base32.NewEncoding("abcdefghijklmnopqrstuvwxyz234567").WithPadding(base32.NoPadding) | ||
) | ||
|
||
func readBlobHeader(r io.Reader) (*pb.PieceHeader, error) { | ||
var headerBytes [pieces.V1PieceHeaderReservedArea]byte | ||
framingBytes := headerBytes[:v1PieceHeaderFramingSize] | ||
n, err := io.ReadFull(r, framingBytes) | ||
if err != nil { | ||
return nil, err | ||
} | ||
if n != v1PieceHeaderFramingSize { | ||
return nil, errors.New("could not read whole PieceHeader framing field") | ||
} | ||
headerSize := binary.BigEndian.Uint16(framingBytes) | ||
if headerSize > (pieces.V1PieceHeaderReservedArea - v1PieceHeaderFramingSize) { | ||
return nil, fmt.Errorf("blob PieceHeader framing field claims impossible size of %d bytes", headerSize) | ||
} | ||
|
||
// Now we can read the actual serialized header. | ||
pieceHeaderBytes := headerBytes[v1PieceHeaderFramingSize : v1PieceHeaderFramingSize+headerSize] | ||
_, err = io.ReadFull(r, pieceHeaderBytes) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
// Deserialize and return. | ||
header := &pb.PieceHeader{} | ||
if err := pb.Unmarshal(pieceHeaderBytes, header); err != nil { | ||
return nil, fmt.Errorf("deserializing piece header: %w", err) | ||
} | ||
return header, nil | ||
} | ||
|
||
func checkHash(r io.Reader, claimedHash []byte, hashAlgo pb.PieceHashAlgorithm) (matches bool, err error) { | ||
hasher := pb.NewHashFromAlgorithm(hashAlgo) | ||
if _, err := io.Copy(hasher, r); err != nil { | ||
return false, err | ||
} | ||
calculatedHash := hasher.Sum(nil) | ||
return bytes.Equal(claimedHash, calculatedHash), nil | ||
} | ||
|
||
func checkSanity(header *pb.PieceHeader, fileSize int64) error { | ||
var expectHashSize int | ||
switch header.HashAlgorithm { | ||
case pb.PieceHashAlgorithm_SHA256: | ||
expectHashSize = sha256.Size | ||
case pb.PieceHashAlgorithm_BLAKE3: | ||
expectHashSize = blake3.New().Size() | ||
default: | ||
return fmt.Errorf("invalid PieceHashAlgorithm %d", header.HashAlgorithm) | ||
} | ||
sig := header.Signature | ||
if len(sig) < minSignatureSize || len(sig) > maxSignatureSize { | ||
return fmt.Errorf("signature field has invalid size %d", len(sig)) | ||
} | ||
sig2 := header.OrderLimit.SatelliteSignature | ||
if len(sig2) < minSignatureSize || len(sig2) > maxSignatureSize { | ||
return fmt.Errorf("satellite signature field has invalid size %d", len(sig2)) | ||
} | ||
if header.OrderLimit.Limit < (fileSize - int64(pieces.V1PieceHeaderReservedArea)) { | ||
return fmt.Errorf("order limit size %d is too small for file size %d", header.OrderLimit.Limit, fileSize) | ||
} | ||
if len(header.Hash) != expectHashSize { | ||
return fmt.Errorf("hash field should be %d bytes, but is %d bytes", expectHashSize, len(header.Hash)) | ||
} | ||
if header.OrderLimit.OrderCreation.Before(wayTooEarly) { | ||
return fmt.Errorf("order creation field has improbably early value %s", header.OrderLimit.OrderCreation.String()) | ||
} | ||
if header.OrderLimit.OrderCreation.After(wayTooLate) { | ||
return fmt.Errorf("order creation field has improbably late value %s", header.OrderLimit.OrderCreation.String()) | ||
} | ||
switch header.OrderLimit.Action { | ||
case pb.PieceAction_PUT, pb.PieceAction_GET, pb.PieceAction_GET_AUDIT, pb.PieceAction_GET_REPAIR, pb.PieceAction_PUT_REPAIR, pb.PieceAction_DELETE, pb.PieceAction_PUT_GRACEFUL_EXIT: | ||
default: | ||
return fmt.Errorf("order limit action has invalid value %d", header.OrderLimit.Action) | ||
} | ||
return nil | ||
} | ||
|
||
func findCorrectLength(r io.Reader, claimedHash []byte, hashAlgo pb.PieceHashAlgorithm) (rightLength int64, err error) { | ||
hasher := pb.NewHashFromAlgorithm(hashAlgo) | ||
var readBytes int64 | ||
calculatedHash := make([]byte, hasher.Size()) | ||
for { | ||
cHash := hasher.Sum(calculatedHash[:0]) | ||
if bytes.Equal(claimedHash, cHash) { | ||
return readBytes, nil | ||
} | ||
var buf [1]byte | ||
n, err := r.Read(buf[:]) | ||
if n == 1 { | ||
// we do this even if err != nil; i.e. if err = io.EOF, the read byte is still valid | ||
hasher.Write(buf[:]) | ||
readBytes++ | ||
continue | ||
} | ||
if err != nil { | ||
if errors.Is(err, io.EOF) { | ||
return 0, errors.New("no possible valid length found. (possibly the blob is truncated?)") | ||
} | ||
return 0, err | ||
} | ||
} | ||
} | ||
|
||
func checkFile(fileName string) (report string, realFilename string, err error) { | ||
fh, err := os.Open(fileName) | ||
if err != nil { | ||
return "", "", fmt.Errorf("could not open file %s: %w", fileName, err) | ||
} | ||
header, err := readBlobHeader(fh) | ||
if err != nil { | ||
return fmt.Sprintf("not a valid sj1 blob (%v)", err), "", nil | ||
} | ||
if header.FormatVersion != 1 { | ||
return fmt.Sprintf("not a valid sj1 blob (FormatVersion=%d)", header.FormatVersion), "", nil | ||
} | ||
fileSize, err := fh.Seek(0, io.SeekEnd) | ||
if err != nil { | ||
return "", "", fmt.Errorf("could not seek to end of file: %w", err) | ||
} | ||
_, err = fh.Seek(pieces.V1PieceHeaderReservedArea, io.SeekStart) | ||
if err != nil { | ||
return "", "", fmt.Errorf("could not seek to after header area: %w", err) | ||
} | ||
err = checkSanity(header, fileSize) | ||
if err != nil { | ||
return fmt.Sprintf("not a valid sj1 blob (%v)", err), "", nil | ||
} | ||
matches, err := checkHash(fh, header.Hash, header.HashAlgorithm) | ||
if err != nil { | ||
return "", "", fmt.Errorf("could not read file data to check hash: %w", err) | ||
} | ||
pieceID := pathEncoding.EncodeToString(header.OrderLimit.PieceId[:]) | ||
satelliteID := pathEncoding.EncodeToString(header.OrderLimit.SatelliteId[:]) | ||
realFilename = fmt.Sprintf("%s/%s.sj1", satelliteID, pieceID) | ||
if matches { | ||
return "valid sj1 blob", realFilename, nil | ||
} | ||
if *doFindCorrectLength { | ||
_, err = fh.Seek(pieces.V1PieceHeaderReservedArea, io.SeekStart) | ||
if err != nil { | ||
return "", "", fmt.Errorf("could not seek to after header area: %w", err) | ||
} | ||
rightLength, err := findCorrectLength(fh, header.Hash, header.HashAlgorithm) | ||
if err != nil { | ||
return fmt.Sprintf("appears to be a valid sj1 blob, but could not determine correct length: %v", err), "", nil | ||
} | ||
fileLength := rightLength + pieces.V1PieceHeaderReservedArea | ||
return fmt.Sprintf("valid sj1 blob but should be truncated at %d bytes. Hint:\n truncate -s %d %q", fileLength, fileLength, fileName), realFilename, nil | ||
} | ||
return fmt.Sprintf("valid sj1 blob with incorrect size: %s/%s.sj1", satelliteID, pieceID), realFilename, nil | ||
} | ||
|
||
func main() { | ||
flag.Parse() | ||
|
||
fileNames := flag.Args() | ||
for _, name := range fileNames { | ||
report, realFilename, err := checkFile(name) | ||
if err != nil { | ||
_, _ = fmt.Fprintf(os.Stderr, "%s: %v\n", name, err) | ||
continue | ||
} | ||
fmt.Printf("%s: %s\n", name, report) | ||
if realFilename != "" { | ||
fmt.Printf("%s=%s\n", name, realFilename) | ||
} | ||
} | ||
} |
e83f41e
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This commit has been mentioned on Storj Community Forum (official). There might be relevant details there:
https://forum.storj.io/t/how-to-get-the-name-of-a-piece-having-its-file/19398/12