Skip to content

Commit

Permalink
fusefrontend: get the file ID from the open files table
Browse files Browse the repository at this point in the history
This fixes the problem that a truncate can reset the file
ID without the other open FDs noticing it.
  • Loading branch information
rfjakob committed Nov 17, 2016
1 parent e04dc05 commit 0489d08
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 84 deletions.
110 changes: 65 additions & 45 deletions internal/fusefrontend/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ type file struct {
contentEnc *contentenc.ContentEnc
// Device and inode number uniquely identify the backing file
devIno DevInoStruct
// File header
header *contentenc.FileHeader
// Entry in the open file map
fileTableEntry *openFileEntryT
// go-fuse nodefs.loopbackFile
loopbackFile nodefs.File
// Store what the last byte was written
// Store where the last byte was written
lastWrittenOffset int64
// The opCount is used to judge whether "lastWrittenOffset" is still
// guaranteed to be correct.
Expand All @@ -60,14 +60,15 @@ func NewFile(fd *os.File, writeOnly bool, contentEnc *contentenc.ContentEnc) (no
return nil, fuse.ToStatus(err)
}
di := DevInoFromStat(&st)
wlock.register(di)
t := openFileMap.register(di)

return &file{
fd: fd,
writeOnly: writeOnly,
contentEnc: contentEnc,
devIno: di,
loopbackFile: nodefs.NewLoopbackFile(fd),
fd: fd,
writeOnly: writeOnly,
contentEnc: contentEnc,
devIno: di,
fileTableEntry: t,
loopbackFile: nodefs.NewLoopbackFile(fd),
}, fuse.OK
}

Expand All @@ -84,44 +85,39 @@ func (f *file) InnerFile() nodefs.File {
func (f *file) SetInode(n *nodefs.Inode) {
}

// readHeader - load the file header from disk
//
// Returns io.EOF if the file is empty
func (f *file) readHeader() error {
// readFileID loads the file header from disk and extracts the file ID.
// Returns io.EOF if the file is empty.
func (f *file) readFileID() ([]byte, error) {
buf := make([]byte, contentenc.HeaderLen)
_, err := f.fd.ReadAt(buf, 0)
if err != nil {
return err
return nil, err
}
h, err := contentenc.ParseHeader(buf)
if err != nil {
return err
return nil, err
}
f.header = h

return nil
return h.ID, nil
}

// createHeader - create a new random header and write it to disk
func (f *file) createHeader() error {
// createHeader creates a new random header and writes it to disk.
// Returns the new file ID.
// The caller must hold fileIDLock.Lock().
func (f *file) createHeader() (fileID []byte, err error) {
h := contentenc.RandomHeader()
buf := h.Pack()

// Prevent partially written (=corrupt) header by preallocating the space beforehand
err := syscallcompat.EnospcPrealloc(int(f.fd.Fd()), 0, contentenc.HeaderLen)
err = syscallcompat.EnospcPrealloc(int(f.fd.Fd()), 0, contentenc.HeaderLen)
if err != nil {
tlog.Warn.Printf("ino%d: createHeader: prealloc failed: %s\n", f.devIno.ino, err.Error())
return err
return nil, err
}

// Actually write header
_, err = f.fd.WriteAt(buf, 0)
if err != nil {
return err
return nil, err
}
f.header = h

return nil
return h.ID, err
}

func (f *file) String() string {
Expand All @@ -137,25 +133,39 @@ func (f *file) String() string {
// Called by Read() for normal reading,
// by Write() and Truncate() for Read-Modify-Write
func (f *file) doRead(off uint64, length uint64) ([]byte, fuse.Status) {

// Read file header
if f.header == nil {
err := f.readHeader()
// Make sure we have the file ID.
f.fileTableEntry.IDLock.RLock()
if f.fileTableEntry.ID == nil {
f.fileTableEntry.IDLock.RUnlock()
// Yes, somebody else may take the lock before we can. This will get
// the header read twice, but causes no harm otherwise.
f.fileTableEntry.IDLock.Lock()
tmpID, err := f.readFileID()
if err == io.EOF {
f.fileTableEntry.IDLock.Unlock()
return nil, fuse.OK
}
if err != nil {
f.fileTableEntry.IDLock.Unlock()
return nil, fuse.ToStatus(err)
}
f.fileTableEntry.ID = tmpID
// Downgrade the lock.
f.fileTableEntry.IDLock.Unlock()
// The file ID may change in here. This does no harm because we
// re-read it after the RLock().
f.fileTableEntry.IDLock.RLock()
}

fileID := f.fileTableEntry.ID
// Read the backing ciphertext in one go
blocks := f.contentEnc.ExplodePlainRange(off, length)
alignedOffset, alignedLength := blocks[0].JointCiphertextRange(blocks)
skip := blocks[0].Skip
tlog.Debug.Printf("JointCiphertextRange(%d, %d) -> %d, %d, %d", off, length, alignedOffset, alignedLength, skip)
ciphertext := make([]byte, int(alignedLength))
n, err := f.fd.ReadAt(ciphertext, int64(alignedOffset))
// We don't care if the file ID changes after we have read the data. Drop the lock.
f.fileTableEntry.IDLock.RUnlock()
if err != nil && err != io.EOF {
tlog.Warn.Printf("read: ReadAt: %s", err.Error())
return nil, fuse.ToStatus(err)
Expand All @@ -167,7 +177,7 @@ func (f *file) doRead(off uint64, length uint64) ([]byte, fuse.Status) {
tlog.Debug.Printf("ReadAt offset=%d bytes (%d blocks), want=%d, got=%d", alignedOffset, firstBlockNo, alignedLength, n)

// Decrypt it
plaintext, err := f.contentEnc.DecryptBlocks(ciphertext, firstBlockNo, f.header.ID)
plaintext, err := f.contentEnc.DecryptBlocks(ciphertext, firstBlockNo, fileID)
if err != nil {
curruptBlockNo := firstBlockNo + f.contentEnc.PlainOffToBlockNo(uint64(len(plaintext)))
tlog.Warn.Printf("ino%d: doRead: corrupt block #%d: %v", f.devIno.ino, curruptBlockNo, err)
Expand Down Expand Up @@ -223,18 +233,28 @@ func (f *file) Read(buf []byte, off int64) (resultData fuse.ReadResult, code fus
//
// Empty writes do nothing and are allowed.
func (f *file) doWrite(data []byte, off int64) (uint32, fuse.Status) {

// Read header from disk, create a new one if the file is empty
if f.header == nil {
err := f.readHeader()
f.fileTableEntry.IDLock.RLock()
if f.fileTableEntry.ID == nil {
f.fileTableEntry.IDLock.RUnlock()
// Somebody else may write the header here, but this would do no harm.
f.fileTableEntry.IDLock.Lock()
tmpID, err := f.readFileID()
if err == io.EOF {
err = f.createHeader()

tmpID, err = f.createHeader()
}
if err != nil {
f.fileTableEntry.IDLock.Unlock()
return 0, fuse.ToStatus(err)
}
f.fileTableEntry.ID = tmpID
f.fileTableEntry.IDLock.Unlock()
// The file ID may change in here. This does no harm because we
// re-read it after the RLock().
f.fileTableEntry.IDLock.RLock()
}
fileID := f.fileTableEntry.ID
defer f.fileTableEntry.IDLock.RUnlock()

var written uint32
status := fuse.OK
Expand All @@ -261,7 +281,7 @@ func (f *file) doWrite(data []byte, off int64) (uint32, fuse.Status) {

// Encrypt
blockOffset := b.BlockCipherOff()
blockData = f.contentEnc.EncryptBlock(blockData, b.BlockNo, f.header.ID)
blockData = f.contentEnc.EncryptBlock(blockData, b.BlockNo, fileID)
tlog.Debug.Printf("ino%d: Writing %d bytes to block #%d",
f.devIno.ino, uint64(len(blockData))-f.contentEnc.BlockOverhead(), b.BlockNo)

Expand Down Expand Up @@ -292,7 +312,7 @@ func (f *file) doWrite(data []byte, off int64) (uint32, fuse.Status) {
// Stat() call is very expensive.
// The caller must "wlock.lock(f.devIno.ino)" otherwise this check would be racy.
func (f *file) isConsecutiveWrite(off int64) bool {
opCount := atomic.LoadUint64(&wlock.opCount)
opCount := atomic.LoadUint64(&openFileMap.opCount)
return opCount == f.lastOpCount+1 && off == f.lastWrittenOffset+1
}

Expand All @@ -309,8 +329,8 @@ func (f *file) Write(data []byte, off int64) (uint32, fuse.Status) {
tlog.Warn.Printf("ino%d fh%d: Write on released file", f.devIno.ino, f.intFd())
return 0, fuse.EBADF
}
wlock.lock(f.devIno)
defer wlock.unlock(f.devIno)
f.fileTableEntry.writeLock.Lock()
defer f.fileTableEntry.writeLock.Unlock()
tlog.Debug.Printf("ino%d: FUSE Write: offset=%d length=%d", f.devIno.ino, off, len(data))
// If the write creates a file hole, we have to zero-pad the last block.
// But if the write directly follows an earlier write, it cannot create a
Expand All @@ -323,7 +343,7 @@ func (f *file) Write(data []byte, off int64) (uint32, fuse.Status) {
}
n, status := f.doWrite(data, off)
if status.Ok() {
f.lastOpCount = atomic.LoadUint64(&wlock.opCount)
f.lastOpCount = atomic.LoadUint64(&openFileMap.opCount)
f.lastWrittenOffset = off + int64(len(data)) - 1
}
return n, status
Expand All @@ -339,7 +359,7 @@ func (f *file) Release() {
f.released = true
f.fdLock.Unlock()

wlock.unregister(f.devIno)
openFileMap.unregister(f.devIno)
}

// Flush - FUSE call
Expand Down
16 changes: 10 additions & 6 deletions internal/fusefrontend/file_allocate_truncate.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ func (f *file) Allocate(off uint64, sz uint64, mode uint32) fuse.Status {
if f.released {
return fuse.EBADF
}
wlock.lock(f.devIno)
defer wlock.unlock(f.devIno)
f.fileTableEntry.writeLock.Lock()
defer f.fileTableEntry.writeLock.Unlock()

blocks := f.contentEnc.ExplodePlainRange(off, sz)
firstBlock := blocks[0]
Expand Down Expand Up @@ -100,8 +100,8 @@ func (f *file) Truncate(newSize uint64) fuse.Status {
tlog.Warn.Printf("ino%d fh%d: Truncate on released file", f.devIno.ino, f.intFd())
return fuse.EBADF
}
wlock.lock(f.devIno)
defer wlock.unlock(f.devIno)
f.fileTableEntry.writeLock.Lock()
defer f.fileTableEntry.writeLock.Unlock()
var err error
// Common case first: Truncate to zero
if newSize == 0 {
Expand All @@ -111,7 +111,9 @@ func (f *file) Truncate(newSize uint64) fuse.Status {
return fuse.ToStatus(err)
}
// Truncate to zero kills the file header
f.header = nil
f.fileTableEntry.IDLock.Lock()
f.fileTableEntry.ID = nil
f.fileTableEntry.IDLock.Unlock()
return fuse.OK
}
// We need the old file size to determine if we are growing or shrinking
Expand Down Expand Up @@ -184,7 +186,9 @@ func (f *file) truncateGrowFile(oldPlainSz uint64, newPlainSz uint64) fuse.Statu
var err error
// File was empty, create new header
if oldPlainSz == 0 {
err = f.createHeader()
f.fileTableEntry.IDLock.Lock()
_, err = f.createHeader()
f.fileTableEntry.IDLock.Unlock()
if err != nil {
return fuse.ToStatus(err)
}
Expand Down
64 changes: 31 additions & 33 deletions internal/fusefrontend/write_lock.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,81 +22,79 @@ func DevInoFromStat(st *syscall.Stat_t) DevInoStruct {
}

func init() {
wlock.inodeLocks = make(map[DevInoStruct]*refCntMutex)
openFileMap.entries = make(map[DevInoStruct]*openFileEntryT)
}

// wlock - serializes write accesses to each file (identified by inode number)
// Writing partial blocks means we have to do read-modify-write cycles. We
// really don't want concurrent writes there.
// Concurrent full-block writes could actually be allowed, but are not to
// keep the locking simple.
var wlock wlockMap
var openFileMap openFileMapT

// wlockMap - usage:
// 1) register
// 2) lock ... unlock ...
// 3) unregister
type wlockMap struct {
// opCount counts lock() calls. As every operation that modifies a file should
type openFileMapT struct {
// opCount counts writeLock.Lock() calls. As every operation that modifies a file should
// call it, this effectively serves as a write-operation counter.
// The variable is accessed without holding any locks so atomic operations
// must be used. It must be the first element of the struct to guarantee
// 64-bit alignment.
opCount uint64
// Protects map access
sync.Mutex
inodeLocks map[DevInoStruct]*refCntMutex
entries map[DevInoStruct]*openFileEntryT
}

// refCntMutex - mutex with reference count
type refCntMutex struct {
// Write lock for this inode
type opCountMutex struct {
sync.Mutex
// Points to the opCount variable of the parent openFileMapT
opCount *uint64
}

func (o *opCountMutex) Lock() {
o.Mutex.Lock()
atomic.AddUint64(o.opCount, 1)
}

// refCntMutex - mutex with reference count
type openFileEntryT struct {
// Reference count
refCnt int
// Write lock for this inode
writeLock *opCountMutex
// ID is the file ID in the file header.
ID []byte
IDLock sync.RWMutex
}

// register creates an entry for "ino", or incrementes the reference count
// if the entry already exists.
func (w *wlockMap) register(di DevInoStruct) {
func (w *openFileMapT) register(di DevInoStruct) *openFileEntryT {
w.Lock()
defer w.Unlock()

r := w.inodeLocks[di]
r := w.entries[di]
if r == nil {
r = &refCntMutex{}
w.inodeLocks[di] = r
o := opCountMutex{opCount: &w.opCount}
r = &openFileEntryT{writeLock: &o}
w.entries[di] = r
}
r.refCnt++
return r
}

// unregister decrements the reference count for "di" and deletes the entry if
// the reference count has reached 0.
func (w *wlockMap) unregister(di DevInoStruct) {
func (w *openFileMapT) unregister(di DevInoStruct) {
w.Lock()
defer w.Unlock()

r := w.inodeLocks[di]
r := w.entries[di]
r.refCnt--
if r.refCnt == 0 {
delete(w.inodeLocks, di)
delete(w.entries, di)
}
}

// lock retrieves the entry for "di" and locks it.
func (w *wlockMap) lock(di DevInoStruct) {
atomic.AddUint64(&w.opCount, 1)
w.Lock()
r := w.inodeLocks[di]
w.Unlock()
// this can take a long time - execute outside the wlockMap lock
r.Lock()
}

// unlock retrieves the entry for "di" and unlocks it.
func (w *wlockMap) unlock(di DevInoStruct) {
w.Lock()
r := w.inodeLocks[di]
w.Unlock()
r.Unlock()
}

0 comments on commit 0489d08

Please sign in to comment.