Skip to content

Commit

Permalink
Fix watching a released transfer
Browse files Browse the repository at this point in the history
Things could go wrong if Watch was called after the last existing
watcher was released. The call to Watch would succeed even though it was
not really adding a watcher, and the corresponding call to Release would
close hasWatchers a second time.

The fix for this is twofold:

1. We allow transfers to gain new watchers after the watcher count has
touched zero. This means that the channel returned by Released should
not be closed until all watchers have been released AND the transfer is
no longer tracked by the transfer manager, meaning it won't be possible
for additional calls to Watch to race with closing the channel returned
by Released.

The Transfer interface has a new method called Close so the transfer can
know when the transfer manager no longer references it.

Remove the Cancel method. It's not used and should not be exported.

2. Even though (1) makes it possible to add watchers after all the
previous watchers have been released, we want to avoid doing this in
practice. A transfer that has had all its watchers released is in the
process of being cancelled, and attaching to one of these will never be
the correct behavior. Add a check if a watcher is attaching to a
cancelled transfer.  In this case, wait for the transfer to be removed
from the map and try again. This will ensure correct behavior when a
watcher tries to attach during the race window.

Either (1) or (2) should be sufficient to fix the race involved here,
but the combination is the most correct approach. (1) fixes the
low-level plumbing to be resilient to the race condition, and (2) avoids
using it in a racy way.

Fixes #19606

Signed-off-by: Aaron Lehmann <aaron.lehmann@docker.com>
  • Loading branch information
aaronlehmann committed Jan 25, 2016
1 parent d02ed72 commit 3e2b50c
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 15 deletions.
75 changes: 60 additions & 15 deletions distribution/xfer/transfer.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package xfer

import (
"runtime"
"sync"

"github.com/docker/docker/pkg/progress"
Expand Down Expand Up @@ -38,7 +39,7 @@ type Transfer interface {
Watch(progressOutput progress.Output) *Watcher
Release(*Watcher)
Context() context.Context
Cancel()
Close()
Done() <-chan struct{}
Released() <-chan struct{}
Broadcast(masterProgressChan <-chan progress.Progress)
Expand All @@ -61,11 +62,14 @@ type transfer struct {

// running remains open as long as the transfer is in progress.
running chan struct{}
// hasWatchers stays open until all watchers release the transfer.
hasWatchers chan struct{}
// released stays open until all watchers release the transfer and
// the transfer is no longer tracked by the transfer manager.
released chan struct{}

// broadcastDone is true if the master progress channel has closed.
broadcastDone bool
// closed is true if Close has been called
closed bool
// broadcastSyncChan allows watchers to "ping" the broadcasting
// goroutine to wait for it for deplete its input channel. This ensures
// a detaching watcher won't miss an event that was sent before it
Expand All @@ -78,7 +82,7 @@ func NewTransfer() Transfer {
t := &transfer{
watchers: make(map[chan struct{}]*Watcher),
running: make(chan struct{}),
hasWatchers: make(chan struct{}),
released: make(chan struct{}),
broadcastSyncChan: make(chan struct{}),
}

Expand Down Expand Up @@ -144,13 +148,13 @@ func (t *transfer) Watch(progressOutput progress.Output) *Watcher {
running: make(chan struct{}),
}

t.watchers[w.releaseChan] = w

if t.broadcastDone {
close(w.running)
return w
}

t.watchers[w.releaseChan] = w

go func() {
defer func() {
close(w.running)
Expand Down Expand Up @@ -202,8 +206,19 @@ func (t *transfer) Release(watcher *Watcher) {
delete(t.watchers, watcher.releaseChan)

if len(t.watchers) == 0 {
close(t.hasWatchers)
t.cancel()
if t.closed {
// released may have been closed already if all
// watchers were released, then another one was added
// while waiting for a previous watcher goroutine to
// finish.
select {
case <-t.released:
default:
close(t.released)
}
} else {
t.cancel()
}
}
t.mu.Unlock()

Expand All @@ -223,19 +238,25 @@ func (t *transfer) Done() <-chan struct{} {
}

// Released returns a channel which is closed once all watchers release the
// transfer.
// transfer AND the transfer is no longer tracked by the transfer manager.
func (t *transfer) Released() <-chan struct{} {
return t.hasWatchers
return t.released
}

// Context returns the context associated with the transfer.
func (t *transfer) Context() context.Context {
return t.ctx
}

// Cancel cancels the context associated with the transfer.
func (t *transfer) Cancel() {
t.cancel()
// Close is called by the transfer manager when the transfer is no longer
// being tracked.
func (t *transfer) Close() {
t.mu.Lock()
t.closed = true
if len(t.watchers) == 0 {
close(t.released)
}
t.mu.Unlock()
}

// DoFunc is a function called by the transfer manager to actually perform
Expand Down Expand Up @@ -280,10 +301,33 @@ func (tm *transferManager) Transfer(key string, xferFunc DoFunc, progressOutput
tm.mu.Lock()
defer tm.mu.Unlock()

if xfer, present := tm.transfers[key]; present {
for {
xfer, present := tm.transfers[key]
if !present {
break
}
// Transfer is already in progress.
watcher := xfer.Watch(progressOutput)
return xfer, watcher

select {
case <-xfer.Context().Done():
// We don't want to watch a transfer that has been cancelled.
// Wait for it to be removed from the map and try again.
xfer.Release(watcher)
tm.mu.Unlock()
// The goroutine that removes this transfer from the
// map is also waiting for xfer.Done(), so yield to it.
// This could be avoided by adding a Closed method
// to Transfer to allow explicitly waiting for it to be
// removed the map, but forcing a scheduling round in
// this very rare case seems better than bloating the
// interface definition.
runtime.Gosched()
<-xfer.Done()
tm.mu.Lock()
default:
return xfer, watcher
}
}

start := make(chan struct{})
Expand Down Expand Up @@ -318,6 +362,7 @@ func (tm *transferManager) Transfer(key string, xferFunc DoFunc, progressOutput
}
delete(tm.transfers, key)
tm.mu.Unlock()
xfer.Close()
return
}
}
Expand Down
38 changes: 38 additions & 0 deletions distribution/xfer/transfer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,44 @@ func TestWatchRelease(t *testing.T) {
}
}

func TestWatchFinishedTransfer(t *testing.T) {
makeXferFunc := func(id string) DoFunc {
return func(progressChan chan<- progress.Progress, start <-chan struct{}, inactive chan<- struct{}) Transfer {
xfer := NewTransfer()
go func() {
// Finish immediately
close(progressChan)
}()
return xfer
}
}

tm := NewTransferManager(5)

// Start a transfer
watchers := make([]*Watcher, 3)
var xfer Transfer
xfer, watchers[0] = tm.Transfer("id1", makeXferFunc("id1"), progress.ChanOutput(make(chan progress.Progress)))

// Give it a watcher immediately
watchers[1] = xfer.Watch(progress.ChanOutput(make(chan progress.Progress)))

// Wait for the transfer to complete
<-xfer.Done()

// Set up another watcher
watchers[2] = xfer.Watch(progress.ChanOutput(make(chan progress.Progress)))

// Release the watchers
for _, w := range watchers {
xfer.Release(w)
}

// Now that all watchers have been released, Released() should
// return a closed channel.
<-xfer.Released()
}

func TestDuplicateTransfer(t *testing.T) {
ready := make(chan struct{})

Expand Down

0 comments on commit 3e2b50c

Please sign in to comment.