Skip to content

Commit

Permalink
io_submit(): replace syscall context before returning, hold reference…
Browse files Browse the repository at this point in the history
… per iocb op

The aio code was issuing read and write requests on behalf of the running
syscall context without managing the lifetime of the context or replacing the
cpu's default syscall context. The following scenario would lead to a crash
during the aio test: An aio_complete executes within the saved syscall context
but on a different cpu than the one that issued the requests. The originating
cpu, still having the same syscall context set as default, then serves a
syscall. The low-level syscall entry begins running on the stack of this
context, expecting exclusive access to it, and corruption of the stack ensues.

The solution presented here uses the active syscall context to encompass all
continuations as a result of an io_submit(), despite such operations no longer
being associated with a syscall. The default syscall context is replaced by
calling check_syscall_context_replace() in io_submit(), and each issued I/O
operation holds a reference to the context. The context is finally freed and
recycled once the last operation completes.
  • Loading branch information
wjhun committed Jan 21, 2022
1 parent ab84412 commit 799bed3
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions src/unix/aio.c
Expand Up @@ -136,6 +136,7 @@ closure_function(3, 2, void, aio_eventfd_complete,
u64 *efd_val = bound(efd_val);
deallocate(bound(h), efd_val, sizeof(*efd_val));
fdesc_put(bound(f));
context_release_refcount(get_current_context(current_cpu()));
closure_finish();
}

Expand Down Expand Up @@ -174,11 +175,14 @@ closure_function(5, 2, void, aio_complete,
*efd_val = 1;
io_completion completion = closure(h, aio_eventfd_complete, h, res, efd_val);
apply(res->write, efd_val, sizeof(*efd_val), 0, t, true, completion);
goto wake_and_release;
} else {
fdesc_put(res);
}
}
}
context_release_refcount(get_current_context(current_cpu()));
wake_and_release:
if (bq) {
blockq_wake_one(bq);
blockq_release(bq);
Expand All @@ -196,7 +200,7 @@ static unsigned int aio_avail_events(struct aio *aio)
return avail;
}

static sysreturn iocb_enqueue(struct aio *aio, struct iocb *iocb)
static sysreturn iocb_enqueue(struct aio *aio, struct iocb *iocb, context ctx)
{
if (!validate_user_memory(iocb, sizeof(struct iocb), false)) {
return -EFAULT;
Expand Down Expand Up @@ -226,6 +230,7 @@ static sysreturn iocb_enqueue(struct aio *aio, struct iocb *iocb)
}
io_completion completion = closure(heap_locked(aio->kh), aio_complete, aio, f,
iocb->aio_data, (u64) iocb, res_fd);
context_reserve_refcount(ctx);
refcount_reserve(&aio->refcount);
sysreturn rv;
switch (iocb->aio_lio_opcode) {
Expand Down Expand Up @@ -262,6 +267,7 @@ static sysreturn iocb_enqueue(struct aio *aio, struct iocb *iocb)
aio_unlock(aio);
refcount_release(&aio->refcount);
deallocate_closure(completion);
context_release_refcount(ctx);
fdesc_put(f);
return rv;
}
Expand All @@ -276,16 +282,19 @@ sysreturn io_submit(aio_context_t ctx_id, long nr, struct iocb **iocbpp)
if (!(aio = aio_from_ring(current->p, ctx_id))) {
return -EINVAL;
}
cpuinfo ci = current_cpu();
context ctx = get_current_context(ci);
int io_ops;
for (io_ops = 0; io_ops < nr; io_ops++) {
sysreturn rv = iocb_enqueue(aio, iocbpp[io_ops]);
sysreturn rv = iocb_enqueue(aio, iocbpp[io_ops], ctx);
if (rv) {
if (io_ops == 0) {
io_ops = rv;
}
break;
}
}
check_syscall_context_replace(ci, ctx);
refcount_release(&aio->refcount);
return io_ops;
}
Expand Down

0 comments on commit 799bed3

Please sign in to comment.