Skip to content

Commit

Permalink
Remove blocking wait on container exit for every exec created (#1604)
Browse files Browse the repository at this point in the history
Commit fixes the memory leak seen in the shim.
It removes creation of channel that waits on container exit
for every new exec. Instead, the container wait channel is exposed
through WaitChannel() function which callers can use to decide
if container has exited or not.

Signed-off-by: Kirtana Ashok <Kirtana.Ashok@microsoft.com>
(cherry picked from commit 5fc00c5)
Signed-off-by: Kirtana Ashok <Kirtana.Ashok@microsoft.com>

Signed-off-by: Kirtana Ashok <Kirtana.Ashok@microsoft.com>
Co-authored-by: Kirtana Ashok <Kirtana.Ashok@microsoft.com>
  • Loading branch information
kiashok and Kirtana Ashok committed Dec 12, 2022
1 parent e0b7d33 commit 5d5b210
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 44 deletions.
8 changes: 2 additions & 6 deletions cmd/containerd-shim-runhcs-v1/exec_hcs.go
Expand Up @@ -481,13 +481,9 @@ func (he *hcsExec) waitForContainerExit() {
trace.StringAttribute("tid", he.tid),
trace.StringAttribute("eid", he.id))

cexit := make(chan struct{})
go func() {
_ = he.c.Wait()
close(cexit)
}()
// wait for container or process to exit and ckean up resrources
select {
case <-cexit:
case <-he.c.WaitChannel():
// Container exited first. We need to force the process into the exited
// state and cleanup any resources
he.sl.Lock()
Expand Down
6 changes: 6 additions & 0 deletions internal/cow/cow.go
Expand Up @@ -86,6 +86,12 @@ type Container interface {
// container to be terminated by some error condition (including calling
// Close).
Wait() error
// WaitChannel returns the wait channel of the container
WaitChannel() <-chan struct{}
// WaitError returns the container termination error.
// This function should only be called after the channel in WaitChannel()
// is closed. Otherwise it is not thread safe.
WaitError() error
// Modify sends a request to modify container resources
Modify(ctx context.Context, config interface{}) error
}
50 changes: 34 additions & 16 deletions internal/gcs/container.go
Expand Up @@ -24,6 +24,10 @@ type Container struct {
notifyCh chan struct{}
closeCh chan struct{}
closeOnce sync.Once
// waitBlock is the channel used to wait for container shutdown or termination
waitBlock chan struct{}
// waitError indicates the container termination error if any
waitError error
}

var _ cow.Container = &Container{}
Expand All @@ -37,10 +41,11 @@ func (gc *GuestConnection) CreateContainer(ctx context.Context, cid string, conf
span.AddAttributes(trace.StringAttribute("cid", cid))

c := &Container{
gc: gc,
id: cid,
notifyCh: make(chan struct{}),
closeCh: make(chan struct{}),
gc: gc,
id: cid,
notifyCh: make(chan struct{}),
closeCh: make(chan struct{}),
waitBlock: make(chan struct{}),
}
err = gc.requestNotify(cid, c.notifyCh)
if err != nil {
Expand All @@ -63,10 +68,11 @@ func (gc *GuestConnection) CreateContainer(ctx context.Context, cid string, conf
// container that is already running inside the UVM (after cloning).
func (gc *GuestConnection) CloneContainer(ctx context.Context, cid string) (_ *Container, err error) {
c := &Container{
gc: gc,
id: cid,
notifyCh: make(chan struct{}),
closeCh: make(chan struct{}),
gc: gc,
id: cid,
notifyCh: make(chan struct{}),
closeCh: make(chan struct{}),
waitBlock: make(chan struct{}),
}
err = gc.requestNotify(cid, c.notifyCh)
if err != nil {
Expand All @@ -93,6 +99,8 @@ func (c *Container) Close() error {
_, span := trace.StartSpan(context.Background(), "gcs::Container::Close")
defer span.End()
span.AddAttributes(trace.StringAttribute("cid", c.id))

close(c.closeCh)
})
return nil
}
Expand Down Expand Up @@ -222,23 +230,33 @@ func (c *Container) Terminate(ctx context.Context) (err error) {
return c.shutdown(ctx, rpcShutdownForced)
}

func (c *Container) WaitChannel() <-chan struct{} {
return c.waitBlock
}

func (c *Container) WaitError() error {
return c.waitError
}

// Wait waits for the container to terminate (or Close to be called, or the
// guest connection to terminate).
func (c *Container) Wait() error {
select {
case <-c.notifyCh:
return nil
case <-c.closeCh:
return errors.New("container closed")
}
<-c.WaitChannel()
return c.WaitError()
}

func (c *Container) waitBackground() {
ctx, span := trace.StartSpan(context.Background(), "gcs::Container::waitBackground")
defer span.End()
span.AddAttributes(trace.StringAttribute("cid", c.id))

err := c.Wait()
select {
case <-c.notifyCh:
case <-c.closeCh:
c.waitError = errors.New("container closed")
}
close(c.waitBlock)

log.G(ctx).Debug("container exited")
oc.SetSpanStatus(span, err)
oc.SetSpanStatus(span, c.waitError)
}
12 changes: 10 additions & 2 deletions internal/hcs/system.go
Expand Up @@ -287,11 +287,19 @@ func (computeSystem *System) waitBackground() {
oc.SetSpanStatus(span, err)
}

func (computeSystem *System) WaitChannel() <-chan struct{} {
return computeSystem.waitBlock
}

func (computeSystem *System) WaitError() error {
return computeSystem.waitError
}

// Wait synchronously waits for the compute system to shutdown or terminate. If
// the compute system has already exited returns the previous error (if any).
func (computeSystem *System) Wait() error {
<-computeSystem.waitBlock
return computeSystem.waitError
<-computeSystem.WaitChannel()
return computeSystem.WaitError()
}

// ExitError returns an error describing the reason the compute system terminated.
Expand Down
12 changes: 10 additions & 2 deletions internal/jobcontainers/jobcontainer.go
Expand Up @@ -471,11 +471,19 @@ func (c *JobContainer) Terminate(ctx context.Context) error {
return nil
}

func (c *JobContainer) WaitChannel() <-chan struct{} {
return c.waitBlock
}

func (c *JobContainer) WaitError() error {
return c.waitError
}

// Wait synchronously waits for the container to shutdown or terminate. If
// the container has already exited returns the previous error (if any).
func (c *JobContainer) Wait() error {
<-c.waitBlock
return c.waitError
<-c.WaitChannel()
return c.WaitError()
}

func (c *JobContainer) waitBackground(ctx context.Context) {
Expand Down
6 changes: 6 additions & 0 deletions test/vendor/github.com/Microsoft/hcsshim/internal/cow/cow.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 34 additions & 16 deletions test/vendor/github.com/Microsoft/hcsshim/internal/gcs/container.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 10 additions & 2 deletions test/vendor/github.com/Microsoft/hcsshim/internal/hcs/system.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 5d5b210

Please sign in to comment.