Permalink
Browse files

cuda: remove gsync() from cuLaunchGrid() and newly implemented cuCtxS…

…ynchronize()
  • Loading branch information...
1 parent 413ed47 commit a2a0c001f9ed789affa61de20c0b9c678eef5ea7 Shinpei Kato committed Dec 20, 2011
Showing with 46 additions and 8 deletions.
  1. +1 −0 common/gdev_nvidia_def.h
  2. +32 −0 cuda/driver_api/context.c
  3. +6 −7 cuda/driver_api/execution.c
  4. +7 −1 cuda/driver_api/gdev_cuda.h
@@ -25,6 +25,7 @@
#ifndef __GDEV_NVIDIA_DEF_H__
#define __GDEV_NVIDIA_DEF_H__
+#include "gdev_list.h"
#ifdef __KERNEL__
#include <linux/types.h>
#else
@@ -151,6 +151,9 @@ CUresult cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev)
gdev_list_add(&gdev_ctx_current->list_entry, &gdev_ctx_list);
}
+ /* initialize context synchronization list. */
+ gdev_list_init(&ctx->sync_list, NULL);
+
/* we will trace size of memory allocated by users and # of kernels. */
ctx->data_size = 0;
ctx->launch_id = 0;
@@ -190,6 +193,10 @@ CUresult cuCtxDestroy(CUcontext ctx)
return CUDA_ERROR_NOT_INITIALIZED;
if (!ctx)
return CUDA_ERROR_INVALID_VALUE;
+
+ /* wait for all on-the-fly kernels. */
+ cuCtxSynchronize();
+
if (gclose(ctx->gdev_handle))
return CUDA_ERROR_INVALID_CONTEXT;
@@ -296,18 +303,43 @@ CUresult cuCtxPopCurrent(CUcontext *pctx)
return CUDA_SUCCESS;
}
+/**
+ * Blocks until the device has completed all preceding requested tasks.
+ * cuCtxSynchronize() returns an error if one of the preceding tasks failed.
+ *
+ * Returns:
+ * CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED,
+ * CUDA_ERROR_INVALID_CONTEXT
+ */
CUresult cuCtxSynchronize(void)
{
Ghandle handle;
+ struct gdev_cuda_launch *l;
+ struct gdev_list *p;
if (!gdev_initialized)
return CUDA_ERROR_NOT_INITIALIZED;
if (!gdev_ctx_current)
return CUDA_ERROR_INVALID_CONTEXT;
+ if (gdev_list_empty(&gdev_ctx_current->sync_list))
+ return CUDA_SUCCESS;
+
handle = gdev_ctx_current->gdev_handle;
/* synchronize with all kernels. */
+ gdev_list_for_each(l, &gdev_ctx_current->sync_list, list_entry) {
+ /* if timeout is required, specify gdev_time value instead of NULL. */
+ if (gsync(handle, l->id, NULL))
+ return CUDA_ERROR_UNKNOWN;
+ }
+
+ /* remove all lists. */
+ while ((p = gdev_list_head(&gdev_ctx_current->sync_list))) {
+ gdev_list_del(p);
+ l = gdev_list_container(p);
+ FREE(l);
+ }
return CUDA_SUCCESS;
}
@@ -146,15 +146,17 @@ CUresult cuLaunchGrid(CUfunction f, int grid_width, int grid_height)
struct CUmod_st *mod = func->mod;
struct CUctx_st *ctx = mod->ctx;
struct gdev_kernel *k;
+ struct gdev_cuda_launch *l;
Ghandle handle;
- uint32_t id;
if (!gdev_initialized)
return CUDA_ERROR_NOT_INITIALIZED;
if (!ctx || ctx != gdev_ctx_current)
return CUDA_ERROR_INVALID_CONTEXT;
if (!func || grid_width <= 0 || grid_height <= 0)
return CUDA_ERROR_INVALID_VALUE;
+ if (!(l = MALLOC(sizeof(*l))))
+ return CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES;
k = &func->kernel;
k->grid_x = grid_width;
@@ -167,13 +169,10 @@ CUresult cuLaunchGrid(CUfunction f, int grid_width, int grid_height)
handle = gdev_ctx_current->gdev_handle;
- if (glaunch(handle, k, &id))
+ if (glaunch(handle, k, &l->id))
return CUDA_ERROR_LAUNCH_FAILED;
-
- /* if timeout is required, specify gdev_time value instead of NULL.
- this sync should be moved to cuCtxSynchronize(). */
- if (gsync(handle, id, NULL))
- return CUDA_ERROR_LAUNCH_TIMEOUT;
+ gdev_list_init(&l->list_entry, l);
+ gdev_list_add(&l->list_entry, &ctx->sync_list);
return CUDA_SUCCESS;
}
@@ -73,9 +73,15 @@ struct gdev_cuda_raw_func {
uint32_t local_size_neg;
};
+struct gdev_cuda_launch {
+ uint32_t id; /* kernel ID returned by the launch function. */
+ struct gdev_list list_entry; /* entry to synchronization list. */
+};
+
struct CUctx_st {
Ghandle gdev_handle;
- struct gdev_list list_entry;
+ struct gdev_list list_entry; /* entry to ctx_list. */
+ struct gdev_list sync_list;
struct gdev_cuda_info cuda_info;
uint64_t data_size;
int launch_id;

0 comments on commit a2a0c00

Please sign in to comment.