Skip to content
Browse files

gdev: refactored code

  • Loading branch information...
1 parent a2b5971 commit dd4de83d48beda2afe5dc07695192bb0b25994cd Shinpei Kato committed Jan 10, 2012
View
202 common/gdev_api.c
@@ -108,10 +108,7 @@ static int __f_ctu(void *dst, const void *src, uint32_t size)
* copy host buffer to device memory with pipelining.
* @host_copy is either memcpy() or copy_from_user().
*/
-static int __gmemcpy_to_device_p
-(gdev_ctx_t *ctx, uint64_t dst_addr, const void *src_buf, uint64_t size,
- int async, uint32_t ch_size, int p_count, gdev_mem_t **bmem,
- int (*host_copy)(void*, const void*, uint32_t))
+static int __gmemcpy_to_device_p(gdev_ctx_t *ctx, uint64_t dst_addr, const void *src_buf, uint64_t size, uint32_t ch_size, int p_count, gdev_mem_t **bmem, int (*host_copy)(void*, const void*, uint32_t))
{
uint64_t rest_size = size;
uint64_t offset;
@@ -136,12 +133,11 @@ static int __gmemcpy_to_device_p
/* HtoH */
if (fence[i])
gdev_poll(ctx, fence[i], NULL);
- ret = host_copy(dma_buf[i], src_buf + offset, dma_size);
+ ret = host_copy(dma_buf[i], src_buf+offset, dma_size);
if (ret)
goto end;
/* HtoD */
- fence[i] = gdev_memcpy(ctx, dst_addr + offset, dma_addr[i],
- dma_size, async);
+ fence[i] = gdev_memcpy(ctx, dst_addr+offset, dma_addr[i], dma_size);
if (rest_size == 0) {
/* wait for the last fence, and go out! */
gdev_poll(ctx, fence[i], NULL);
@@ -159,10 +155,7 @@ static int __gmemcpy_to_device_p
* copy host buffer to device memory without pipelining.
* @host_copy is either memcpy() or copy_from_user().
*/
-static int __gmemcpy_to_device_np
-(gdev_ctx_t *ctx, uint64_t dst_addr, const void *src_buf, uint64_t size,
- int async, uint32_t ch_size, gdev_mem_t **bmem,
- int (*host_copy)(void*, const void*, uint32_t))
+static int __gmemcpy_to_device_np(gdev_ctx_t *ctx, uint64_t dst_addr, const void *src_buf, uint64_t size, uint32_t ch_size, gdev_mem_t **bmem, int (*host_copy)(void*, const void*, uint32_t))
{
uint64_t rest_size = size;
uint64_t offset;
@@ -182,8 +175,7 @@ static int __gmemcpy_to_device_np
ret = host_copy(dma_buf[0], src_buf + offset, dma_size);
if (ret)
goto end;
- fence = gdev_memcpy(ctx, dst_addr + offset, dma_addr[0],
- dma_size, async);
+ fence = gdev_memcpy(ctx, dst_addr + offset, dma_addr[0], dma_size);
gdev_poll(ctx, fence, NULL);
rest_size -= dma_size;
offset += dma_size;
@@ -196,26 +188,24 @@ static int __gmemcpy_to_device_np
/**
* copy host DMA buffer to device memory.
*/
-static int __gmemcpy_dma_to_device
-(gdev_ctx_t *ctx, uint64_t dst_addr, uint64_t src_addr, uint64_t size,
- int async)
+static int __gmemcpy_dma_to_device(gdev_ctx_t *ctx, uint64_t dst_addr, uint64_t src_addr, uint64_t size, uint32_t *id)
{
uint32_t fence;
/* we don't break data into chunks if copying directly from dma memory. */
- fence = gdev_memcpy(ctx, dst_addr, src_addr, size, async);
- gdev_poll(ctx, fence, NULL);
+ fence = gdev_memcpy(ctx, dst_addr, src_addr, size);
+ if (!id)
+ gdev_poll(ctx, fence, NULL);
+ else
+ *id = fence;
return 0;
}
/**
* a wrapper function of __gmemcpy_to_device().
*/
-static int __gmemcpy_to_device_locked
-(gdev_ctx_t *ctx, uint64_t dst_addr, const void *src_buf, uint64_t size,
- int async, uint32_t ch_size, int p_count, gdev_vas_t *vas, gdev_mem_t *mem,
- gdev_mem_t **dma_mem, int (*host_copy)(void*, const void*, uint32_t))
+static int __gmemcpy_to_device_locked(gdev_ctx_t *ctx, uint64_t dst_addr, const void *src_buf, uint64_t size, uint32_t *id, uint32_t ch_size, int p_count, gdev_vas_t *vas, gdev_mem_t *mem, gdev_mem_t **dma_mem, int (*host_copy)(void*, const void*, uint32_t))
{
gdev_mem_t *hmem;
gdev_mem_t **bmem;
@@ -229,7 +219,7 @@ static int __gmemcpy_to_device_locked
ret = gdev_write(mem, dst_addr, src_buf, size);
}
else if ((hmem = gdev_mem_lookup(vas, (uint64_t)src_buf, GDEV_MEM_DMA))) {
- ret = __gmemcpy_dma_to_device(ctx, dst_addr, hmem->addr, size, async);
+ ret = __gmemcpy_dma_to_device(ctx, dst_addr, hmem->addr, size, id);
}
else {
/* prepare bounce buffer memory. */
@@ -243,11 +233,9 @@ static int __gmemcpy_to_device_locked
/* copy memory to device. */
if (p_count > 1 && size > ch_size)
- ret = __gmemcpy_to_device_p(ctx, dst_addr, src_buf, size, async,
- ch_size, p_count, bmem, host_copy);
+ ret = __gmemcpy_to_device_p(ctx, dst_addr, src_buf, size, ch_size, p_count, bmem, host_copy);
else
- ret = __gmemcpy_to_device_np(ctx, dst_addr, src_buf, size, async,
- ch_size, bmem, host_copy);
+ ret = __gmemcpy_to_device_np(ctx, dst_addr, src_buf, size, ch_size, bmem, host_copy);
/* free bounce buffer memory, if necessary. */
if (!dma_mem)
@@ -260,9 +248,7 @@ static int __gmemcpy_to_device_locked
/**
* a wrapper function of gmemcpy_to_device().
*/
-static int __gmemcpy_to_device
-(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size,
- int async, int (*host_copy)(void*, const void*, uint32_t))
+static int __gmemcpy_to_device(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size, uint32_t *id, int (*host_copy)(void*, const void*, uint32_t))
{
gdev_vas_t *vas = h->vas;
gdev_ctx_t *ctx = h->ctx;
@@ -282,22 +268,23 @@ static int __gmemcpy_to_device
gdev_mem_lock(mem);
gdev_shm_evict_conflict(ctx, mem); /* evict conflicting data. */
- ret = __gmemcpy_to_device_locked(ctx, dst_addr, src_buf, size, async,
+ ret = __gmemcpy_to_device_locked(ctx, dst_addr, src_buf, size, id,
ch_size, p_count, vas, mem, dma_mem,
host_copy);
gdev_mem_unlock(mem);
-
+
+#ifndef GDEV_SCHEDULER_DISABLED
+ gdev_select_next_memory(h->gdev); /* memcpy needs to select next itself. */
+#endif
+
return ret;
}
/**
* copy device memory to host buffer with pipelining.
* host_copy() is either memcpy() or copy_to_user().
*/
-static int __gmemcpy_from_device_p
-(gdev_ctx_t *ctx, void *dst_buf, uint64_t src_addr, uint64_t size,
- int async, uint32_t ch_size, int p_count, gdev_mem_t **bmem,
- int (*host_copy)(void*, const void*, uint32_t))
+static int __gmemcpy_from_device_p(gdev_ctx_t *ctx, void *dst_buf, uint64_t src_addr, uint64_t size, uint32_t ch_size, int p_count, gdev_mem_t **bmem, int (*host_copy)(void*, const void*, uint32_t))
{
uint64_t rest_size = size;
uint64_t offset;
@@ -318,7 +305,7 @@ static int __gmemcpy_from_device_p
dma_size = __min(rest_size, ch_size);
rest_size -= dma_size;
/* DtoH */
- fence[0] = gdev_memcpy(ctx, dma_addr[0], src_addr + 0, dma_size, async);
+ fence[0] = gdev_memcpy(ctx, dma_addr[0], src_addr + 0, dma_size);
for (;;) {
for (i = 0; i < p_count; i++) {
if (rest_size == 0) {
@@ -332,11 +319,9 @@ static int __gmemcpy_from_device_p
offset += dma_size;
/* DtoH */
if (i + 1 == p_count)
- fence[0] = gdev_memcpy(ctx, dma_addr[0], src_addr + offset,
- dma_size, async);
+ fence[0] = gdev_memcpy(ctx, dma_addr[0], src_addr + offset, dma_size);
else
- fence[i+1] = gdev_memcpy(ctx, dma_addr[i+1], src_addr + offset,
- dma_size, async);
+ fence[i+1] = gdev_memcpy(ctx, dma_addr[i+1], src_addr + offset, dma_size);
/* HtoH */
gdev_poll(ctx, fence[i], NULL);
ret = host_copy(dst_buf + offset - dma_size, dma_buf[i], dma_size);
@@ -353,10 +338,7 @@ static int __gmemcpy_from_device_p
* copy device memory to host buffer without pipelining.
* host_copy() is either memcpy() or copy_to_user().
*/
-static int __gmemcpy_from_device_np
-(gdev_ctx_t *ctx, void *dst_buf, uint64_t src_addr, uint64_t size,
- int async, uint32_t ch_size, gdev_mem_t **bmem,
- int (*host_copy)(void*, const void*, uint32_t))
+static int __gmemcpy_from_device_np(gdev_ctx_t *ctx, void *dst_buf, uint64_t src_addr, uint64_t size, uint32_t ch_size, gdev_mem_t **bmem, int (*host_copy)(void*, const void*, uint32_t))
{
uint64_t rest_size = size;
uint64_t offset;
@@ -373,10 +355,9 @@ static int __gmemcpy_from_device_np
offset = 0;
while (rest_size) {
dma_size = __min(rest_size, ch_size);
- fence = gdev_memcpy(ctx, dma_addr[0], src_addr + offset,
- dma_size, async);
+ fence = gdev_memcpy(ctx, dma_addr[0], src_addr+offset, dma_size);
gdev_poll(ctx, fence, NULL);
- ret = host_copy(dst_buf + offset, dma_buf[0], dma_size);
+ ret = host_copy(dst_buf+offset, dma_buf[0], dma_size);
if (ret)
goto end;
rest_size -= dma_size;
@@ -390,26 +371,24 @@ static int __gmemcpy_from_device_np
/**
* copy device memory to host DMA buffer.
*/
-static int __gmemcpy_dma_from_device
-(gdev_ctx_t *ctx, uint64_t dst_addr, uint64_t src_addr, uint64_t size,
- int async)
+static int __gmemcpy_dma_from_device(gdev_ctx_t *ctx, uint64_t dst_addr, uint64_t src_addr, uint64_t size, uint32_t *id)
{
uint32_t fence;
/* we don't break data into chunks if copying directly from dma memory. */
- fence = gdev_memcpy(ctx, dst_addr, src_addr, size, async);
- gdev_poll(ctx, fence, NULL);
+ fence = gdev_memcpy(ctx, dst_addr, src_addr, size);
+ if (!id)
+ gdev_poll(ctx, fence, NULL);
+ else
+ *id = fence;
return 0;
}
/**
* a wrapper function of __gmemcpy_from_device().
*/
-static int __gmemcpy_from_device_locked
-(gdev_ctx_t *ctx, void *dst_buf, uint64_t src_addr, uint64_t size,
- int async, uint32_t ch_size, int p_count, gdev_vas_t *vas, gdev_mem_t *mem,
- gdev_mem_t **dma_mem, int (*host_copy)(void*, const void*, uint32_t))
+static int __gmemcpy_from_device_locked(gdev_ctx_t *ctx, void *dst_buf, uint64_t src_addr, uint64_t size, uint32_t *id, uint32_t ch_size, int p_count, gdev_vas_t *vas, gdev_mem_t *mem, gdev_mem_t **dma_mem, int (*host_copy)(void*, const void*, uint32_t))
{
gdev_mem_t *hmem;
gdev_mem_t **bmem;
@@ -423,7 +402,7 @@ static int __gmemcpy_from_device_locked
ret = gdev_read(mem, dst_buf, src_addr, size);
}
else if ((hmem = gdev_mem_lookup(vas, (uint64_t)dst_buf, GDEV_MEM_DMA))) {
- ret = __gmemcpy_dma_from_device(ctx, hmem->addr, src_addr, size, async);
+ ret = __gmemcpy_dma_from_device(ctx, hmem->addr, src_addr, size, id);
}
else {
/* prepare bounce buffer memory. */
@@ -436,11 +415,9 @@ static int __gmemcpy_from_device_locked
bmem = dma_mem;
if (p_count > 1 && size > ch_size)
- ret = __gmemcpy_from_device_p(ctx, dst_buf, src_addr, size, async,
- ch_size, p_count, bmem, host_copy);
+ ret = __gmemcpy_from_device_p(ctx, dst_buf, src_addr, size, ch_size, p_count, bmem, host_copy);
else
- ret = __gmemcpy_from_device_np(ctx, dst_buf, src_addr, size, async,
- ch_size, bmem, host_copy);
+ ret = __gmemcpy_from_device_np(ctx, dst_buf, src_addr, size, ch_size, bmem, host_copy);
/* free bounce buffer memory, if necessary. */
if (!dma_mem)
@@ -453,9 +430,7 @@ static int __gmemcpy_from_device_locked
/**
* a wrapper function of gmemcpy_from_device().
*/
-static int __gmemcpy_from_device
-(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size,
- int async, int (*host_copy)(void*, const void*, uint32_t))
+static int __gmemcpy_from_device(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size, uint32_t *id, int (*host_copy)(void*, const void*, uint32_t))
{
gdev_vas_t *vas = h->vas;
gdev_ctx_t *ctx = h->ctx;
@@ -475,19 +450,22 @@ static int __gmemcpy_from_device
gdev_mem_lock(mem);
gdev_shm_retrieve_swap(ctx, mem); /* retrieve data swapped. */
- ret = __gmemcpy_from_device_locked(ctx, dst_buf, src_addr, size, async,
+ ret = __gmemcpy_from_device_locked(ctx, dst_buf, src_addr, size, id,
ch_size, p_count, vas, mem, dma_mem,
host_copy);
gdev_mem_unlock(mem);
+#ifndef GDEV_SCHEDULER_DISABLED
+ gdev_select_next_memory(h->gdev); /* memcpy needs to select next itself. */
+#endif
+
return ret;
}
/**
* this function must be used when saving data to host.
*/
-int gdev_callback_save_to_host
-(void *h, void* dst_buf, uint64_t src_addr, uint64_t size)
+int gdev_callback_save_to_host(void *h, void* dst_buf, uint64_t src_addr, uint64_t size)
{
gdev_vas_t *vas = ((struct gdev_handle*)h)->vas;
gdev_ctx_t *ctx = ((struct gdev_handle*)h)->ctx;
@@ -496,21 +474,18 @@ int gdev_callback_save_to_host
uint32_t ch_size = ((struct gdev_handle*)h)->chunk_size;
int p_count = ((struct gdev_handle*)h)->pipeline_count;
- return __gmemcpy_from_device_locked(ctx, dst_buf, src_addr, size, 1,
- ch_size, p_count, vas, mem, dma_mem,
- __f_memcpy);
+ return __gmemcpy_from_device_locked(ctx, dst_buf, src_addr, size, NULL, ch_size, p_count, vas, mem, dma_mem, __f_memcpy);
}
/**
* this function must be used when saving data to device.
*/
-int gdev_callback_save_to_device
-(void *h, uint64_t dst_addr, uint64_t src_addr, uint64_t size)
+int gdev_callback_save_to_device(void *h, uint64_t dst_addr, uint64_t src_addr, uint64_t size)
{
gdev_ctx_t *ctx = ((struct gdev_handle*)h)->ctx;
uint32_t fence;
- fence = gdev_memcpy(ctx, dst_addr, src_addr, size, 0);
+ fence = gdev_memcpy(ctx, dst_addr, src_addr, size);
gdev_poll(ctx, fence, NULL);
return 0;
@@ -519,8 +494,7 @@ int gdev_callback_save_to_device
/**
* this function must be used when loading data from host.
*/
-int gdev_callback_load_from_host
-(void *h, uint64_t dst_addr, void *src_buf, uint64_t size)
+int gdev_callback_load_from_host(void *h, uint64_t dst_addr, void *src_buf, uint64_t size)
{
gdev_vas_t *vas = ((struct gdev_handle*)h)->vas;
gdev_ctx_t *ctx = ((struct gdev_handle*)h)->ctx;
@@ -529,21 +503,18 @@ int gdev_callback_load_from_host
uint32_t ch_size = ((struct gdev_handle*)h)->chunk_size;
int p_count = ((struct gdev_handle*)h)->pipeline_count;
- return __gmemcpy_to_device_locked(ctx, dst_addr, src_buf, size, 1,
- ch_size, p_count, vas, mem, dma_mem,
- __f_memcpy);
+ return __gmemcpy_to_device_locked(ctx, dst_addr, src_buf, size, NULL, ch_size, p_count, vas, mem, dma_mem, __f_memcpy);
}
/**
* this function must be used when loading data from device.
*/
-int gdev_callback_load_from_device
-(void *h, uint64_t dst_addr, uint64_t src_addr, uint64_t size)
+int gdev_callback_load_from_device(void *h, uint64_t dst_addr, uint64_t src_addr, uint64_t size)
{
gdev_ctx_t *ctx = ((struct gdev_handle*)h)->ctx;
uint32_t fence;
- fence = gdev_memcpy(ctx, dst_addr, src_addr, size, 0);
+ fence = gdev_memcpy(ctx, dst_addr, src_addr, size);
gdev_poll(ctx, fence, NULL);
return 0;
@@ -685,22 +656,24 @@ uint64_t gmalloc(struct gdev_handle *h, uint64_t size)
gdev_vas_t *vas = h->vas;
gdev_mem_t *mem;
- gdev->mem_used += size;
-
- if (gdev->mem_used > gdev->mem_size) {
+ if (gdev->mem_used + size > gdev->mem_size) {
/* try to share memory with someone (only for device memory).
the shared memory must be freed in gdev_mem_free() when
unreferenced by all users. */
- if (!(mem = gdev_mem_share(vas, size)))
+ if (!(mem = gdev_mem_share(vas, size))) {
+ GDEV_PRINT("Failed to share memory with victims\n");
goto fail;
+ }
}
else if (!(mem = gdev_mem_alloc(vas, size, GDEV_MEM_DEVICE)))
goto fail;
+ /* size could have been rounded up. */
+ gdev->mem_used += gdev_mem_get_size(mem);
+
return gdev_mem_get_addr(mem);
fail:
- gdev->mem_used -= size;
return 0;
}
@@ -738,17 +711,17 @@ void *gmalloc_dma(struct gdev_handle *h, uint64_t size)
gdev_vas_t *vas = h->vas;
gdev_mem_t *mem;
- gdev->dma_mem_used += size;
-
- if (gdev->dma_mem_used > gdev->dma_mem_size)
+ if (gdev->dma_mem_used + size > gdev->dma_mem_size)
goto fail;
else if (!(mem = gdev_mem_alloc(vas, size, GDEV_MEM_DMA)))
goto fail;
+ /* size could have been rounded up. */
+ gdev->dma_mem_used += gdev_mem_get_size(mem);
+
return gdev_mem_get_buf(mem);
fail:
- gdev->dma_mem_used -= size;
return 0;
}
@@ -780,66 +753,54 @@ uint64_t gfree_dma(struct gdev_handle *h, void *buf)
* gmemcpy_to_device():
* copy data from @buf to device memory at @addr.
*/
-int gmemcpy_to_device
-(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size)
+int gmemcpy_to_device(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size)
{
- /* async = false and host memcpy will use memcpy(). */
- return __gmemcpy_to_device(h, dst_addr, src_buf, size, 0, __f_memcpy);
+ return __gmemcpy_to_device(h, dst_addr, src_buf, size, NULL, __f_memcpy);
}
/**
* gmemcpy_to_device_async():
* asynchronously copy data from @buf to device memory at @addr.
*/
-int gmemcpy_to_device_async
-(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size)
+int gmemcpy_to_device_async(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size, uint32_t *id)
{
- /* async = true and host memcpy will use memcpy(). */
- return __gmemcpy_to_device(h, dst_addr, src_buf, size, 1, __f_memcpy);
+ return __gmemcpy_to_device(h, dst_addr, src_buf, size, id, __f_memcpy);
}
/**
* gmemcpy_user_to_device():
* copy data from "user-space" @buf to device memory at @addr.
*/
-int gmemcpy_user_to_device
-(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size)
+int gmemcpy_user_to_device(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size)
{
- /* async = false and host memcpy will use copy_from_user(). */
- return __gmemcpy_to_device(h, dst_addr, src_buf, size, 0, __f_cfu);
+ return __gmemcpy_to_device(h, dst_addr, src_buf, size, NULL, __f_cfu);
}
/**
* gmemcpy_user_to_device_async():
* asynchrounouly copy data from "user-space" @buf to device memory at @addr.
*/
-int gmemcpy_user_to_device_async
-(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size)
+int gmemcpy_user_to_device_async(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size, uint32_t *id)
{
- /* async = true and host memcpy will use copy_from_user(). */
- return __gmemcpy_to_device(h, dst_addr, src_buf, size, 1, __f_cfu);
+ return __gmemcpy_to_device(h, dst_addr, src_buf, size, id, __f_cfu);
}
/**
* gmemcpy_from_device():
* copy data from device memory at @addr to @buf.
*/
-int gmemcpy_from_device
-(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size)
+int gmemcpy_from_device(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size)
{
- /* async = false and host memcpy will use memcpy(). */
- return __gmemcpy_from_device(h, dst_buf, src_addr, size, 0, __f_memcpy);
+ return __gmemcpy_from_device(h, dst_buf, src_addr, size, NULL, __f_memcpy);
}
/**
* gmemcpy_from_device_async():
* asynchronously copy data from device memory at @addr to @buf.
*/
-int gmemcpy_from_device_async
-(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size)
+int gmemcpy_from_device_async(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size, uint32_t *id)
{
- /* async = true and host memcpy will use memcpy(). */
- return __gmemcpy_from_device(h, dst_buf, src_addr, size, 0, __f_memcpy);
+ return __gmemcpy_from_device(h, dst_buf, src_addr, size, id, __f_memcpy);
}
/**
@@ -849,19 +810,16 @@ int gmemcpy_from_device_async
int gmemcpy_user_from_device
(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size)
{
- /* async = false and host memcpy will use copy_to_user(). */
- return __gmemcpy_from_device(h, dst_buf, src_addr, size, 0, __f_ctu);
+ return __gmemcpy_from_device(h, dst_buf, src_addr, size, NULL, __f_ctu);
}
/**
* gmemcpy_user_from_device_async():
* asynchronously copy data from device memory at @addr to "user-space" @buf.
*/
-int gmemcpy_user_from_device_async
-(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size)
+int gmemcpy_user_from_device_async(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size, uint32_t *id)
{
- /* async = true and host memcpy will use copy_to_user(). */
- return __gmemcpy_from_device(h, dst_buf, src_addr, size, 1, __f_ctu);
+ return __gmemcpy_from_device(h, dst_buf, src_addr, size, id, __f_ctu);
}
/**
@@ -882,7 +840,7 @@ int gmemcpy_in_device
gdev_mem_lock(dst);
gdev_mem_lock(src);
- fence = gdev_memcpy(ctx, dst_addr, src_addr, size, 0);
+ fence = gdev_memcpy(ctx, dst_addr, src_addr, size);
gdev_poll(ctx, fence, NULL);
gdev_mem_unlock(src);
gdev_mem_unlock(dst);
View
8 common/gdev_api.h
@@ -49,13 +49,13 @@ uint64_t gfree(Ghandle h, uint64_t addr);
void *gmalloc_dma(Ghandle h, uint64_t size);
uint64_t gfree_dma(Ghandle h, void *buf);
int gmemcpy_to_device(Ghandle h, uint64_t dst_addr, const void *src_buf, uint64_t size);
-int gmemcpy_to_device_async(Ghandle h, uint64_t dst_addr, const void *src_buf, uint64_t size);
+int gmemcpy_to_device_async(Ghandle h, uint64_t dst_addr, const void *src_buf, uint64_t size, uint32_t *id);
int gmemcpy_user_to_device(Ghandle h, uint64_t dst_addr, const void *src_buf, uint64_t size);
-int gmemcpy_user_to_device_async(Ghandle h, uint64_t dst_addr, const void *src_buf, uint64_t size);
+int gmemcpy_user_to_device_async(Ghandle h, uint64_t dst_addr, const void *src_buf, uint64_t size, uint32_t *id);
int gmemcpy_from_device(Ghandle h, void *dst_buf, uint64_t src_addr, uint64_t size);
-int gmemcpy_from_device_async(Ghandle h, void *dst_buf, uint64_t src_addr, uint64_t size);
+int gmemcpy_from_device_async(Ghandle h, void *dst_buf, uint64_t src_addr, uint64_t size, uint32_t *id);
int gmemcpy_user_from_device(Ghandle h, void *dst_buf, uint64_t src_addr, uint64_t size);
-int gmemcpy_user_from_device_async(Ghandle h, void *dst_buf, uint64_t src_addr, uint64_t size);
+int gmemcpy_user_from_device_async(Ghandle h, void *dst_buf, uint64_t src_addr, uint64_t size, uint32_t *id);
int gmemcpy_in_device(Ghandle h, uint64_t dst_addr, uint64_t src_addr, uint64_t size);
int glaunch(Ghandle h, struct gdev_kernel *kernel, uint32_t *id);
int gsync(Ghandle h, uint32_t id, struct gdev_time *timeout);
View
2 common/gdev_arch.h
@@ -46,7 +46,7 @@ typedef struct gdev_mem gdev_mem_t;
*/
int gdev_compute_setup(struct gdev_device *gdev);
uint32_t gdev_launch(gdev_ctx_t *ctx, struct gdev_kernel *kern);
-uint32_t gdev_memcpy(gdev_ctx_t *ctx, uint64_t dst_addr, uint64_t src_addr, uint32_t size, int async);
+uint32_t gdev_memcpy(gdev_ctx_t *ctx, uint64_t dst_addr, uint64_t src_addr, uint32_t size);
uint32_t gdev_read32(gdev_mem_t *mem, uint64_t addr);
void gdev_write32(gdev_mem_t *mem, uint64_t addr, uint32_t val);
int gdev_read(gdev_mem_t *mem, void *buf, uint64_t addr, uint32_t size);
View
3 common/gdev_device.h
@@ -41,9 +41,8 @@
/**
* generic subchannel definitions
*/
-#define GDEV_SUBCH_LAUNCH 1
+#define GDEV_SUBCH_COMPUTE 1
#define GDEV_SUBCH_MEMCPY 2
-#define GDEV_SUBCH_MEMCPY_ASYNC 3
/**
* Gdev device struct:
View
1 common/gdev_ioctl_def.h
@@ -61,6 +61,7 @@ struct gdev_ioctl_dma {
uint64_t src_addr;
uint64_t dst_addr;
uint64_t size;
+ uint32_t *id;
};
struct gdev_ioctl_launch {
View
12 common/gdev_nvidia.h
@@ -35,10 +35,13 @@
#include "gdev_system.h"
#include "gdev_time.h"
-#define GDEV_SUBCH_COMPUTE GDEV_SUBCH_LAUNCH
-#define GDEV_SUBCH_M2MF GDEV_SUBCH_MEMCPY
-#define GDEV_SUBCH_PCOPY0 GDEV_SUBCH_MEMCPY_ASYNC
-#define GDEV_SUBCH_PCOPY1 (GDEV_SUBCH_MEMCPY_ASYNC + 1)
+#define GDEV_SUBCH_NV_COMPUTE GDEV_SUBCH_COMPUTE
+#ifndef GDEV_NVIDIA_MEMCPY_PCOPY
+#define GDEV_SUBCH_NV_M2MF GDEV_SUBCH_MEMCPY
+#else
+#define GDEV_SUBCH_NV_PCOPY0 GDEV_SUBCH_MEMCPY
+#endif
+#define GDEV_SUBCH_NV_PCOPY1 (GDEV_SUBCH_MEMCPY + 1)
#define GDEV_FENCE_BUF_SIZE 0x10000 /* 64KB */
#define GDEV_FENCE_QUERY_SIZE 0x10 /* aligned with nvc0's query */
@@ -175,7 +178,6 @@ struct gdev_compute {
void (*fence_write)(struct gdev_ctx *, int, uint32_t);
void (*fence_reset)(struct gdev_ctx *, uint32_t);
void (*memcpy)(struct gdev_ctx *, uint64_t, uint64_t, uint32_t);
- void (*memcpy_async)(struct gdev_ctx *, uint64_t, uint64_t, uint32_t);
void (*membar)(struct gdev_ctx *);
void (*notify_intr)(struct gdev_ctx *);
void (*init)(struct gdev_ctx *);
View
67 common/gdev_nvidia_compute.c
@@ -29,25 +29,6 @@
#include "gdev_api.h"
#include "gdev_device.h"
-/**
- * memcpy functions prototypes
- */
-static uint32_t gdev_memcpy_sync
-(struct gdev_ctx *, uint64_t, uint64_t, uint32_t, uint32_t);
-static uint32_t gdev_memcpy_async
-(struct gdev_ctx *, uint64_t, uint64_t, uint32_t, uint32_t);
-
-/**
- * pointers to memcpy functions.
- * gdev_memcpy_func[0] is synchronous memcpy.
- * gdev_memcpy_func[1] is asynchrounous memcpy.
- */
-static uint32_t (*gdev_memcpy_func[2])
-(struct gdev_ctx*, uint64_t, uint64_t, uint32_t, uint32_t) = {
- gdev_memcpy_sync,
- gdev_memcpy_async
-};
-
/* set up the architecture-dependent compute engine. */
int gdev_compute_setup(struct gdev_device *gdev)
{
@@ -105,62 +86,30 @@ uint32_t gdev_launch(struct gdev_ctx *ctx, struct gdev_kernel *kern)
return seq;
}
-/* synchrounously copy data of @size from @src_addr to @dst_addr. */
-static uint32_t gdev_memcpy_sync
-(struct gdev_ctx *ctx, uint64_t dst_addr, uint64_t src_addr, uint32_t size,
- uint32_t seq)
-{
- struct gdev_vas *vas = ctx->vas;
- struct gdev_device *gdev = vas->gdev;
- struct gdev_compute *compute = gdev->compute;
-
- compute->membar(ctx);
- /* it's important to emit a fence *before* memcpy():
- the EXEC method of the PCOPY and M2MF engines is associated with
- the QUERY method, i.e., if QUERY is set, the sequence will be
- written to the specified address when the data are transfered. */
- compute->fence_reset(ctx, seq);
- compute->fence_write(ctx, GDEV_SUBCH_M2MF, seq);
- compute->memcpy(ctx, dst_addr, src_addr, size);
-
- return seq;
-}
-
/* asynchrounously copy data of @size from @src_addr to @dst_addr. */
-static uint32_t gdev_memcpy_async
-(struct gdev_ctx *ctx, uint64_t dst_addr, uint64_t src_addr, uint32_t size,
- uint32_t seq)
+uint32_t gdev_memcpy(struct gdev_ctx *ctx, uint64_t dst_addr, uint64_t src_addr, uint32_t size)
{
struct gdev_vas *vas = ctx->vas;
struct gdev_device *gdev = vas->gdev;
struct gdev_compute *compute = gdev->compute;
+ uint32_t seq;
+
+ if (++ctx->fence.seq == GDEV_FENCE_COUNT)
+ ctx->fence.seq = 1;
+ seq = ctx->fence.seq;
compute->membar(ctx);
/* it's important to emit a fence *before* memcpy():
the EXEC method of the PCOPY and M2MF engines is associated with
the QUERY method, i.e., if QUERY is set, the sequence will be
written to the specified address when the data are transfered. */
compute->fence_reset(ctx, seq);
- compute->fence_write(ctx, GDEV_SUBCH_PCOPY0, seq);
- compute->memcpy_async(ctx, dst_addr, src_addr, size);
+ compute->fence_write(ctx, GDEV_SUBCH_MEMCPY, seq);
+ compute->memcpy(ctx, dst_addr, src_addr, size);
return seq;
}
-/* asynchrounously copy data of @size from @src_addr to @dst_addr. */
-uint32_t gdev_memcpy
-(struct gdev_ctx *ctx, uint64_t dst_addr, uint64_t src_addr, uint32_t size,
- int async)
-{
- uint32_t seq;
-
- if (++ctx->fence.seq == GDEV_FENCE_COUNT)
- ctx->fence.seq = 1;
- seq = ctx->fence.seq;
-
- return gdev_memcpy_func[async](ctx, dst_addr, src_addr, size, seq);
-}
-
/* read 32-bit value from @addr. */
uint32_t gdev_read32(struct gdev_mem *mem, uint64_t addr)
{
View
135 common/gdev_sched.c
@@ -143,6 +143,33 @@ static void __gdev_dequeue_compute(struct gdev_sched_entity *se)
}
/**
+ * insert the scheduling entity to the priority-ordered memory list.
+ * gdev->sched_mem_lock must be locked.
+ */
+static void __gdev_enqueue_memory(struct gdev_device *gdev, struct gdev_sched_entity *se)
+{
+ struct gdev_sched_entity *p;
+
+ gdev_list_for_each (p, &gdev->sched_mem_list, list_entry_mem) {
+ if (se->prio > p->prio) {
+ gdev_list_add_prev(&se->list_entry_mem, &p->list_entry_mem);
+ break;
+ }
+ }
+ if (gdev_list_empty(&se->list_entry_mem))
+ gdev_list_add_tail(&se->list_entry_mem, &gdev->sched_mem_list);
+}
+
+/**
+ * delete the scheduling entity from the priority-ordered memory list.
+ * gdev->sched_mem_lock must be locked.
+ */
+static void __gdev_dequeue_memory(struct gdev_sched_entity *se)
+{
+ gdev_list_del(&se->list_entry_mem);
+}
+
+/**
* scheduling policy files.
*/
#include "gdev_vsched_credit.c"
@@ -217,7 +244,6 @@ void gdev_select_next_compute(struct gdev_device *gdev)
gdev_time_sub(&exec, &now, &se->last_tick_com);
se->launch_instances--;
- printk("Gdev#%d instances %d\n", gdev->id, se->launch_instances);
if (se->launch_instances == 0) {
/* account for the credit. */
gdev_time_sub(&gdev->credit_com, &gdev->credit_com, &exec);
@@ -266,10 +292,47 @@ void gdev_select_next_compute(struct gdev_device *gdev)
}
/**
+ * automatically replenish the credit of compute launches.
+ */
+void gdev_replenish_credit_compute(struct gdev_device *gdev)
+{
+ gdev_vsched->replenish_compute(gdev);
+}
+
+/**
* schedule memcpy-copy calls.
*/
void gdev_schedule_memory(struct gdev_sched_entity *se)
{
+ struct gdev_device *gdev = se->gdev;
+
+resched:
+ /* algorithm-specific virtual device scheduler. */
+ gdev_vsched->schedule_memory(se);
+
+ /* local memory scheduler. */
+ gdev_lock(&gdev->sched_mem_lock);
+ if ((gdev->current_mem && gdev->current_mem != se) || se->memcpy_instances >= GDEV_INSTANCES_LIMIT) {
+ /* enqueue the scheduling entity to the memory queue. */
+ __gdev_enqueue_memory(gdev, se);
+ gdev_unlock(&gdev->sched_mem_lock);
+
+ /* now the corresponding task will be suspended until some other tasks
+ will awaken it upon completions of their memory transfers. */
+ gdev_sched_sleep();
+
+ goto resched;
+ }
+ else {
+ /* now, let's get offloaded to the device! */
+ if (se->memcpy_instances == 0) {
+ /* record the start time. */
+ gdev_time_stamp(&se->last_tick_mem);
+ }
+ se->memcpy_instances++;
+ gdev->current_mem = (void*)se;
+ gdev_unlock(&gdev->sched_mem_lock);
+ }
}
/**
@@ -278,19 +341,75 @@ void gdev_schedule_memory(struct gdev_sched_entity *se)
*/
void gdev_select_next_memory(struct gdev_device *gdev)
{
-}
+ struct gdev_sched_entity *se;
+ struct gdev_device *next;
+ struct gdev_time now, exec;
-/**
- * automatically replenish the credit of compute launches.
- */
-void gdev_replenish_credit_compute(struct gdev_device *gdev)
-{
- gdev_vsched->replenish_compute(gdev);
+ gdev_lock(&gdev->sched_mem_lock);
+ se = (struct gdev_sched_entity *)gdev->current_mem;
+ if (!se) {
+ gdev_unlock(&gdev->sched_mem_lock);
+ GDEV_PRINT("Invalid scheduling entity on Gdev#%d\n", gdev->id);
+ return;
+ }
+
+ /* record the end time (update on multiple launches too). */
+ gdev_time_stamp(&now);
+ /* aquire the execution time. */
+ gdev_time_sub(&exec, &now, &se->last_tick_mem);
+
+ se->memcpy_instances--;
+ if (se->memcpy_instances == 0) {
+ /* account for the credit. */
+ gdev_time_sub(&gdev->credit_mem, &gdev->credit_mem, &exec);
+ /* accumulate the memory transfer time. */
+ gdev->mem_time += gdev_time_to_us(&exec);
+
+ /* select the next context to be scheduled.
+ now don't reference the previous entity by se. */
+ se = gdev_list_container(gdev_list_head(&gdev->sched_mem_list));
+ /* setting the next entity here prevents lower-priority contexts
+ arriving in gdev_schedule_memory() from being dispatched onto
+ the device. note that se = NULL could happen. */
+ gdev->current_mem = (void*)se;
+ gdev_unlock(&gdev->sched_mem_lock);
+
+ printk("gdev%d->credit_mem = %s%lu\n", gdev->id,
+ gdev->credit_mem.neg ? "-" : "",
+ gdev_time_to_us(&gdev->credit_mem));
+
+ /* select the next device to be scheduled. */
+ next = gdev_vsched->select_next_memory(gdev);
+ if (!next)
+ return;
+
+ gdev_lock(&next->sched_mem_lock);
+ /* if the virtual device needs to be switched, change the next
+ scheduling entity to be scheduled also needs to be changed. */
+ if (next != gdev)
+ se = gdev_list_container(gdev_list_head(&next->sched_mem_list));
+
+ /* now remove the scheduling entity from the waiting list, and wake
+ up the corresponding task. */
+ if (se) {
+ __gdev_dequeue_memory(se);
+ gdev_unlock(&next->sched_mem_lock);
+
+ while (gdev_sched_wakeup(se->task) < 0) {
+ GDEV_PRINT("Failed to wake up context %d\n", se->ctx->cid);
+ }
+ }
+ else
+ gdev_unlock(&next->sched_mem_lock);
+ }
+ else
+ gdev_unlock(&gdev->sched_mem_lock);
}
/**
* automatically replenish the credit of memory copies.
*/
void gdev_replenish_credit_memory(struct gdev_device *gdev)
{
+ gdev_vsched->replenish_memory(gdev);
}
View
5 common/gdev_sched.h
@@ -44,7 +44,7 @@
*/
#define GDEV_PERIOD_DEFAULT 100000 /*30000*/ /* microseconds */
#define GDEV_CREDIT_INACTIVE_THRESHOLD GDEV_PERIOD_DEFAULT
-#define GDEV_UPDATE_INTERVAL (GDEV_PERIOD_DEFAULT * 30)
+#define GDEV_UPDATE_INTERVAL (GDEV_PERIOD_DEFAULT * 10)
/**
* scheduling properties.
@@ -69,6 +69,9 @@ struct gdev_vsched_policy {
void (*schedule_compute)(struct gdev_sched_entity *se);
struct gdev_device *(*select_next_compute)(struct gdev_device *gdev);
void (*replenish_compute)(struct gdev_device *gdev);
+ void (*schedule_memory)(struct gdev_sched_entity *se);
+ struct gdev_device *(*select_next_memory)(struct gdev_device *gdev);
+ void (*replenish_memory)(struct gdev_device *gdev);
};
int gdev_init_scheduler(struct gdev_device *gdev);
View
90 common/gdev_vsched_credit.c
@@ -43,7 +43,7 @@ static void gdev_vsched_credit_schedule_compute(struct gdev_sched_entity *se)
gdev_unlock_nested(&gdev->sched_com_lock);
gdev_unlock(&phys->sched_com_lock);
- GDEV_PRINT("Gdev#%d Sleep\n", gdev->id);
+ GDEV_PRINT("Gdev#%d Compute Sleep\n", gdev->id);
/* now the corresponding task will be suspended until some other tasks
will awaken it upon completions of their compute launches. */
@@ -55,7 +55,7 @@ static void gdev_vsched_credit_schedule_compute(struct gdev_sched_entity *se)
phys->current_com = (void *)gdev;
gdev_unlock(&phys->sched_com_lock);
- GDEV_PRINT("Gdev#%d Run\n", gdev->id);
+ GDEV_PRINT("Gdev#%d Compute Run\n", gdev->id);
}
}
@@ -109,11 +109,97 @@ static void gdev_vsched_credit_replenish_compute(struct gdev_device *gdev)
gdev_time_us(&gdev->credit_com, 0);
}
+static void gdev_vsched_credit_schedule_memory(struct gdev_sched_entity *se)
+{
+ struct gdev_device *gdev = se->gdev;
+ struct gdev_device *phys = gdev->parent;
+
+ if (!phys)
+ return;
+
+resched:
+ gdev_lock(&phys->sched_mem_lock);
+ if (phys->current_mem && phys->current_mem != gdev) {
+ /* insert the scheduling entity to its local priority-ordered list. */
+ gdev_lock_nested(&gdev->sched_mem_lock);
+ __gdev_enqueue_memory(gdev, se);
+ gdev_unlock_nested(&gdev->sched_mem_lock);
+ gdev_unlock(&phys->sched_mem_lock);
+
+ GDEV_PRINT("Gdev#%d Memory Sleep\n", gdev->id);
+
+ /* now the corresponding task will be suspended until some other tasks
+ will awaken it upon completions of their memory transfers. */
+ gdev_sched_sleep();
+
+ goto resched;
+ }
+ else {
+ phys->current_mem = (void *)gdev;
+ gdev_unlock(&phys->sched_mem_lock);
+
+ GDEV_PRINT("Gdev#%d Memory Run\n", gdev->id);
+ }
+}
+
+static struct gdev_device *gdev_vsched_credit_select_next_memory(struct gdev_device *gdev)
+{
+ struct gdev_device *phys = gdev->parent;
+ struct gdev_device *next;
+ struct gdev_time zero;
+
+ if (!phys)
+ return gdev;
+
+ gdev_lock(&phys->sched_mem_lock);
+
+ /* if the credit is exhausted, reinsert the device. */
+ gdev_time_us(&zero, 0);
+ if (gdev_time_le(&gdev->credit_mem, &zero)) {
+ gdev_list_del(&gdev->list_entry_mem);
+ gdev_list_add_tail(&gdev->list_entry_mem, &phys->sched_mem_list);
+ }
+
+ gdev_list_for_each(next, &phys->sched_mem_list, list_entry_mem) {
+ gdev_lock_nested(&next->sched_mem_lock);
+ if (!gdev_list_empty(&next->sched_mem_list)) {
+ gdev_unlock_nested(&next->sched_mem_lock);
+ goto device_switched;
+ }
+ gdev_unlock_nested(&next->sched_mem_lock);
+ }
+ next = NULL;
+device_switched:
+ phys->current_mem = (void*)next; /* could be null */
+ gdev_unlock(&phys->sched_mem_lock);
+
+ return next;
+}
+
+static void gdev_vsched_credit_replenish_memory(struct gdev_device *gdev)
+{
+ struct gdev_time credit, threshold;
+
+ gdev_time_us(&credit, gdev->period * gdev->mem_bw / 100);
+ gdev_time_add(&gdev->credit_mem, &gdev->credit_mem, &credit);
+ /* when the credit exceeds the threshold, all credits taken away. */
+ gdev_time_us(&threshold, GDEV_CREDIT_INACTIVE_THRESHOLD);
+ if (gdev_time_gt(&gdev->credit_mem, &threshold))
+ gdev_time_us(&gdev->credit_mem, 0);
+ /* when the credit exceeds the threshold in negative, even it. */
+ threshold.neg = 1;
+ if (gdev_time_lt(&gdev->credit_mem, &threshold))
+ gdev_time_us(&gdev->credit_mem, 0);
+}
+
/**
* the Xen Credit scheduler implementation.
*/
struct gdev_vsched_policy gdev_vsched_credit = {
.schedule_compute = gdev_vsched_credit_schedule_compute,
.select_next_compute = gdev_vsched_credit_select_next_compute,
.replenish_compute = gdev_vsched_credit_replenish_compute,
+ .schedule_memory = gdev_vsched_credit_schedule_memory,
+ .select_next_memory = gdev_vsched_credit_select_next_memory,
+ .replenish_memory = gdev_vsched_credit_replenish_memory,
};
View
12 driver/gdev/gdev_drv.c
@@ -69,11 +69,10 @@ static void __gdev_notify_handler(int subc, uint32_t data)
se = sched_entity_ptr[cid];
gdev = se->gdev;
switch (subc) {
- case GDEV_SUBCH_LAUNCH:
+ case GDEV_SUBCH_COMPUTE:
wake_up_process(gdev->sched_com_thread);
break;
case GDEV_SUBCH_MEMCPY:
- case GDEV_SUBCH_MEMCPY_ASYNC:
wake_up_process(gdev->sched_mem_thread);
break;
default:
@@ -161,6 +160,7 @@ static int __gdev_credit_mem_thread(void *__data)
{
struct gdev_device *gdev = (struct gdev_device*)__data;
struct timer_list timer;
+ unsigned long elapsed = 0;
GDEV_PRINT("Gdev#%d memory reserve running\n", gdev->id);
@@ -171,6 +171,14 @@ static int __gdev_credit_mem_thread(void *__data)
mod_timer(&timer, jiffies + usecs_to_jiffies(gdev->period));
set_current_state(TASK_UNINTERRUPTIBLE);
schedule();
+ elapsed += gdev->period;
+ if (elapsed >= GDEV_UPDATE_INTERVAL) {
+ gdev->mem_bw_used = gdev->mem_time * 100 / GDEV_UPDATE_INTERVAL;
+ if (gdev->mem_bw_used > 100)
+ gdev->mem_bw_used = 100;
+ gdev->mem_time = 0;
+ elapsed = 0;
+ }
}
local_irq_enable();
View
16 driver/gdev/gdev_ioctl.c
@@ -141,6 +141,7 @@ int gdev_ioctl_gmemcpy_to_device_async(Ghandle handle, unsigned long arg)
{
struct gdev_ioctl_dma dma;
int ret;
+ int id;
#ifndef GDEV_MEMCPY_USER_DIRECT
void *buf;
#endif
@@ -149,7 +150,7 @@ int gdev_ioctl_gmemcpy_to_device_async(Ghandle handle, unsigned long arg)
return -EFAULT;
#ifdef GDEV_MEMCPY_USER_DIRECT
- ret = gmemcpy_user_to_device_async(handle, dma.dst_addr, dma.src_buf, dma.size);
+ ret = gmemcpy_user_to_device_async(handle, dma.dst_addr, dma.src_buf, dma.size, &id);
if (ret)
return ret;
#else
@@ -164,7 +165,7 @@ int gdev_ioctl_gmemcpy_to_device_async(Ghandle handle, unsigned long arg)
if (copy_from_user(buf, (void __user *)dma.src_buf, dma.size))
return -EFAULT;
- ret = gmemcpy_to_device_async(handle, dma.dst_addr, buf, dma.size);
+ ret = gmemcpy_to_device_async(handle, dma.dst_addr, buf, dma.size, &id);
if (ret)
return ret;
@@ -174,6 +175,9 @@ int gdev_ioctl_gmemcpy_to_device_async(Ghandle handle, unsigned long arg)
kfree(buf);
#endif
+ if (copy_to_user((void __user *)dma.id, &id, sizeof(id)))
+ return -EFAULT;
+
return 0;
}
@@ -221,6 +225,7 @@ int gdev_ioctl_gmemcpy_from_device_async(Ghandle handle, unsigned long arg)
{
struct gdev_ioctl_dma dma;
int ret;
+ int id;
#ifndef GDEV_MEMCPY_USER_DIRECT
void *buf;
#endif
@@ -229,7 +234,7 @@ int gdev_ioctl_gmemcpy_from_device_async(Ghandle handle, unsigned long arg)
return -EFAULT;
#ifdef GDEV_MEMCPY_USER_DIRECT
- ret = gmemcpy_user_from_device_async(handle, dma.dst_buf, dma.src_addr, dma.size);
+ ret = gmemcpy_user_from_device_async(handle, dma.dst_buf, dma.src_addr, dma.size, &id);
if (ret)
return ret;
#else
@@ -241,7 +246,7 @@ int gdev_ioctl_gmemcpy_from_device_async(Ghandle handle, unsigned long arg)
if (!buf)
return -ENOMEM;
- ret = gmemcpy_from_device_async(handle, buf, dma.src_addr, dma.size);
+ ret = gmemcpy_from_device_async(handle, buf, dma.src_addr, dma.size, &id);
if (ret)
return ret;
@@ -254,6 +259,9 @@ int gdev_ioctl_gmemcpy_from_device_async(Ghandle handle, unsigned long arg)
kfree(buf);
#endif
+ if (copy_to_user((void __user *)dma.id, &id, sizeof(id)))
+ return -EFAULT;
+
return 0;
}
View
22 runtime/kernel/gdev_lib.c
@@ -154,7 +154,7 @@ uint64_t gfree_dma(struct gdev_handle *h, void *buf)
return mem.size;
}
-static int __gmemcpy_to_device(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size, int ioctl_cmd)
+static int __gmemcpy_to_device(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size, uint32_t *id, int ioctl_cmd)
{
struct gdev_map_bo *bo;
struct gdev_ioctl_dma dma;
@@ -171,23 +171,22 @@ static int __gmemcpy_to_device(struct gdev_handle *h, uint64_t dst_addr, const v
else
dma.src_buf = src_buf;
dma.size = size;
+ dma.id = id;
return ioctl(fd, ioctl_cmd, &dma);
}
int gmemcpy_to_device(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size)
{
- return __gmemcpy_to_device(h, dst_addr, src_buf, size,
- GDEV_IOCTL_GMEMCPY_TO_DEVICE);
+ return __gmemcpy_to_device(h, dst_addr, src_buf, size, NULL, GDEV_IOCTL_GMEMCPY_TO_DEVICE);
}
-int gmemcpy_to_device_async(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size)
+int gmemcpy_to_device_async(struct gdev_handle *h, uint64_t dst_addr, const void *src_buf, uint64_t size, uint32_t *id)
{
- return __gmemcpy_to_device(h, dst_addr, src_buf, size,
- GDEV_IOCTL_GMEMCPY_TO_DEVICE_ASYNC);
+ return __gmemcpy_to_device(h, dst_addr, src_buf, size, id, GDEV_IOCTL_GMEMCPY_TO_DEVICE_ASYNC);
}
-static int __gmemcpy_from_device(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size, int ioctl_cmd)
+static int __gmemcpy_from_device(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size, uint32_t *id, int ioctl_cmd)
{
struct gdev_map_bo *bo;
struct gdev_ioctl_dma dma;
@@ -204,20 +203,19 @@ static int __gmemcpy_from_device(struct gdev_handle *h, void *dst_buf, uint64_t
else
dma.dst_buf = dst_buf;
dma.size = size;
+ dma.id = id;
return ioctl(fd, ioctl_cmd, &dma);
}
int gmemcpy_from_device(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size)
{
- return __gmemcpy_from_device(h, dst_buf, src_addr, size,
- GDEV_IOCTL_GMEMCPY_FROM_DEVICE);
+ return __gmemcpy_from_device(h, dst_buf, src_addr, size, NULL, GDEV_IOCTL_GMEMCPY_FROM_DEVICE);
}
-int gmemcpy_from_device_async(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size)
+int gmemcpy_from_device_async(struct gdev_handle *h, void *dst_buf, uint64_t src_addr, uint64_t size, uint32_t *id)
{
- return __gmemcpy_from_device(h, dst_buf, src_addr, size,
- GDEV_IOCTL_GMEMCPY_FROM_DEVICE_ASYNC);
+ return __gmemcpy_from_device(h, dst_buf, src_addr, size, id, GDEV_IOCTL_GMEMCPY_FROM_DEVICE_ASYNC);
}
int gmemcpy_in_device(struct gdev_handle *h, uint64_t dst_addr, uint64_t src_addr, uint64_t size)
View
22 test/cuda/common/loop_repeated.c
@@ -25,7 +25,7 @@ static inline void tvsub(struct timeval *x,
}
}
-int cuda_test_loop_repeated(unsigned int n, int count, int id, char *path)
+int cuda_test_loop_repeated(unsigned int n, int sec, int id, char *path)
{
int i, j, idx;
CUresult res;
@@ -37,13 +37,15 @@ int cuda_test_loop_repeated(unsigned int n, int count, int id, char *path)
unsigned int *data = (unsigned int *) malloc (n * sizeof(unsigned int));
int block_x, block_y, grid_x, grid_y;
char fname[256];
- struct timeval tv;
+ struct timeval tv_start, tv_now, tv;
block_x = 1;
block_y = 1;
grid_x = 1;
grid_y = 1;
+ gettimeofday(&tv_start, NULL);
+
res = cuInit(0);
if (res != CUDA_SUCCESS) {
printf("cuInit failed: res = %lu\n", (unsigned long)res);
@@ -90,6 +92,8 @@ int cuda_test_loop_repeated(unsigned int n, int count, int id, char *path)
return -1;
}
+repeat:
+ usleep((tv.tv_usec % 10) * 100);
res = cuMemcpyHtoD(d_data, data, n * sizeof(unsigned int));
if (res != CUDA_SUCCESS) {
printf("cuMemcpyHtoD failed: res = %lu\n", (unsigned long)res);
@@ -123,26 +127,26 @@ int cuda_test_loop_repeated(unsigned int n, int count, int id, char *path)
return -1;
}
-repeat:
- usleep(50);
res = cuLaunchGrid(function, grid_x, grid_y);
if (res != CUDA_SUCCESS) {
printf("cuLaunchGrid failed: res = %lu\n", (unsigned long)res);
return -1;
}
cuCtxSynchronize();
- gettimeofday(&tv, NULL);
- printf("%lu:%lu\n", tv.tv_sec, tv.tv_usec);
- count--;
- if (count)
- goto repeat;
res = cuMemcpyDtoH(data, d_data, n * sizeof(unsigned int));
if (res != CUDA_SUCCESS) {
printf("cuMemcpyDtoH failed: res = %lu\n", (unsigned long)res);
return -1;
}
+ gettimeofday(&tv_now, NULL);
+ tvsub(&tv_now, &tv_start, &tv);
+ printf("%lu:%lu\n", tv.tv_sec, tv.tv_usec);
+ if (tv.tv_sec < sec) {
+ goto repeat;
+ }
+
res = cuMemFree(d_data);
if (res != CUDA_SUCCESS) {
printf("cuMemFree failed: res = %lu\n", (unsigned long)res);
View
8 test/cuda/user/loop_repeated/main.c
@@ -1,21 +1,21 @@
#include <stdio.h>
-int cuda_test_loop_repeated(unsigned int n, int count, int id, char *path);
+int cuda_test_loop_repeated(unsigned int n, int sec, int id, char *path);
int main(int argc, char *argv[])
{
unsigned int n = 3;
- int count = 10;
+ int sec = 10;
int id = 0;
if (argc > 1)
n = atoi(argv[1]);
if (argc > 2)
- count = atoi(argv[2]);
+ sec = atoi(argv[2]);
if (argc > 3)
id = atoi(argv[3]);
- if (cuda_test_loop_repeated(n, count, id, ".") < 0)
+ if (cuda_test_loop_repeated(n, sec, id, ".") < 0)
printf("Test failed\n");
else
printf("Test passed\n");

0 comments on commit dd4de83

Please sign in to comment.
Something went wrong with that request. Please try again.