Permalink
Browse files

gdev: changed list operation interface

gdev: changed contant memory space interface
  • Loading branch information...
Shinpei Kato
Shinpei Kato committed Nov 15, 2011
1 parent 0d11ec1 commit d2a083ccc35ca7756d6cf61d992767c76b044cc9
View
@@ -25,13 +25,8 @@
#include "gdev_api.h"
#include "gdev_conf.h"
-static inline uint64_t min_u64(uint64_t x, uint64_t y)
-{
- if (x > y)
- return y;
- else
- return x;
-}
+#define __max(x, y) (x) > (y) ? (x) : (y)
+#define __min(x, y) (x) < (y) ? (x) : (y)
static inline gdev_mem_t **__malloc_dma
(gdev_handle_t *handle, gdev_vas_t *vas, uint64_t size)
@@ -239,7 +234,7 @@ static inline int __gmemcpy_from_device_pipeline
#ifdef GDEV_NO_STATIC_BOUNCE_BUFFER
gdev_vas_t *vas = GDEV_VAS_GET(handle);
- uint64_t dma_size = min_u64(size, chunk_size);
+ uint64_t dma_size = __min(size, chunk_size);
if (!(dma_mem = __malloc_dma(handle, vas, dma_size)))
return -ENOMEM;
#else
@@ -253,7 +248,7 @@ static inline int __gmemcpy_from_device_pipeline
}
offset = 0;
- dma_size = min_u64(rest_size, chunk_size);
+ dma_size = __min(rest_size, chunk_size);
rest_size -= dma_size;
/* DtoH */
fence[0] = gdev_memcpy(ctx, dma_addr[0], src_addr + offset, dma_size);
@@ -266,7 +261,7 @@ static inline int __gmemcpy_from_device_pipeline
goto end;
}
- dma_size = min_u64(rest_size, chunk_size);
+ dma_size = __min(rest_size, chunk_size);
rest_size -= dma_size;
offset += dma_size;
@@ -317,7 +312,7 @@ static inline int __gmemcpy_from_device
#ifdef GDEV_NO_STATIC_BOUNCE_BUFFER
gdev_vas_t *vas = GDEV_VAS_GET(handle);
- uint64_t dma_size = min_u64(size, chunk_size);
+ uint64_t dma_size = __min(size, chunk_size);
if (!(dma_mem = __malloc_dma(handle, vas, dma_size)))
return -ENOMEM;
#else
@@ -330,7 +325,7 @@ static inline int __gmemcpy_from_device
/* copy data by the bounce buffer size. */
offset = 0;
while (rest_size) {
- dma_size = min_u64(rest_size, chunk_size);
+ dma_size = __min(rest_size, chunk_size);
fence = gdev_memcpy(ctx, dma_addr[0], src_addr + offset, dma_size);
gdev_poll(ctx, GDEV_FENCE_DMA, fence);
ret = memcpy_host(dst_buf + offset, dma_buf[0], dma_size);
@@ -386,7 +381,7 @@ static inline int __gmemcpy_to_device_pipeline
offset = 0;
for (;;) {
for (i = 0; i < pipelines; i++) {
- dma_size = min_u64(rest_size, chunk_size);
+ dma_size = __min(rest_size, chunk_size);
rest_size -= dma_size;
/* HtoH */
if (fence[i])
@@ -447,7 +442,7 @@ static inline int __gmemcpy_to_device
/* copy data by the bounce buffer size. */
while (rest_size) {
- dma_size = min_u64(rest_size, chunk_size);
+ dma_size = __min(rest_size, chunk_size);
ret = memcpy_host(dma_buf[0], src_buf + offset, dma_size);
if (ret)
goto end;
View
@@ -1,26 +1,26 @@
/*
- * Copyright 2011 Shinpei Kato
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
+* Copyright 2011 Shinpei Kato
+* All Rights Reserved.
+*
+* Permission is hereby granted, free of charge, to any person obtaining a
+* copy of this software and associated documentation files (the "Software"),
+* to deal in the Software without restriction, including without limitation
+* the rights to use, copy, modify, merge, publish, distribute, sublicense,
+* and/or sell copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice (including the next
+* paragraph) shall be included in all copies or substantial portions of the
+* Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+* VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+* OTHER DEALINGS IN THE SOFTWARE.
+*/
#ifndef __GDEV_LIST_H__
#define __GDEV_LIST_H__
@@ -47,7 +47,7 @@ static inline void __gdev_list_add
(struct gdev_list *entry, struct gdev_list *head)
{
struct gdev_list *next = head->next;
-
+
entry->next = next;
if (next)
next->prev = entry;
@@ -59,7 +59,7 @@ static inline void __gdev_list_del(struct gdev_list *entry)
{
struct gdev_list *next = entry->next;
struct gdev_list *prev = entry->prev;
-
+
if (next) {
next->prev = entry->prev;
}
@@ -83,7 +83,7 @@ static inline void *__gdev_list_container(struct gdev_list *entry)
return entry->container;
}
-#define gdev_list_for_each(p, entry, list) \
+#define gdev_list_for_each(p, entry, list) \
for (entry = __gdev_list_head(list), p = __gdev_list_container(entry); \
entry != NULL; \
entry = (entry)->next, p = __gdev_list_container(entry))
View
@@ -31,10 +31,15 @@
#include <stdint.h>
#endif
+/**
+ * static numbers for nvidia GPUs.
+ */
+#define GDEV_NVIDIA_CONST_SEGMENT_MAX_COUNT 16 /* by definition? */
+
/**
* query values for the device-specific information
*/
-#define GDEV_QUERY_NVIDIA_MP_COUNT 100
+#define GDEV_NVIDIA_QUERY_MP_COUNT 100
/**
* GPGPU kernel object struct:
@@ -47,23 +52,26 @@
struct gdev_kernel {
uint64_t code_addr; /* code address in VAS */
uint32_t code_pc; /* initial program counter */
- uint64_t cmem_addr; /* constant memory address in VAS */
- uint32_t cmem_segment; /* constant segment index */
- uint32_t cmem_size; /* constant memory size */
+ struct gdev_cmem {
+ uint64_t addr; /* constant memory address in VAS */
+ uint32_t size; /* constant memory size */
+ uint32_t offset; /* offset in constant memory */
+ uint32_t *buf; /* data buffer */
+ } cmem[GDEV_NVIDIA_CONST_SEGMENT_MAX_COUNT];
+ uint32_t cmem_param_segment; /* constant memory segment for parameters */
+ uint32_t cmem_count; /* constant memory count */
uint64_t lmem_addr; /* local memory address in VAS */
uint64_t lmem_size_total; /* local memory size for all threads */
uint32_t lmem_size; /* local memory size per thread (l[positive]) */
uint32_t lmem_size_neg; /* local memory size per thread (l[negaive]) */
uint32_t lmem_base; /* $lbase */
uint32_t smem_size; /* shared memory size */
uint32_t smem_base; /* $sbase */
- uint32_t param_start; /* parameter start position (compiler-dependent) */
- uint32_t param_count; /* parameter count */
- uint32_t *param_buf; /* parameter data */
uint32_t stack_level; /* stack level */
uint32_t warp_size; /* warp size */
uint32_t reg_count; /* register count */
uint32_t bar_count; /* barrier count */
+ uint32_t call_limit; /* call limit log */
uint32_t grid_id; /* grid ID */
uint32_t grid_x; /* grid dimension X */
uint32_t grid_y; /* grid dimension Y */
View
@@ -68,7 +68,6 @@ typedef struct gdev_ctx gdev_ctx_t;
typedef struct gdev_mem gdev_mem_t;
typedef struct gdev_handle gdev_handle_t;
typedef struct gdev_device gdev_device_t;
-typedef struct gdev_list_head gdev_list_t;
/**
* Gdev handle struct:
@@ -24,6 +24,7 @@
#include "gdev_conf.h"
#include "gdev_drv.h"
+#include "gdev_list.h"
#include "nouveau_drv.h"
#include "pscnv_chan.h"
#include "pscnv_fifo.h"
@@ -134,7 +135,7 @@ int gdev_info_query(gdev_device_t *gdev, uint32_t type, uint32_t *result)
uint32_t chipset = priv->chipset;
switch (type) {
- case GDEV_QUERY_NVIDIA_MP_COUNT:
+ case GDEV_NVIDIA_QUERY_MP_COUNT:
if ((chipset & 0xf0) != 0xc0)
return -EINVAL;
getparam.param = PSCNV_GETPARAM_MP_COUNT;
@@ -75,7 +75,6 @@ typedef struct gdev_ctx gdev_ctx_t;
typedef struct gdev_mem gdev_mem_t;
typedef struct gdev_handle gdev_handle_t;
typedef struct gdev_device gdev_device_t;
-typedef struct list_head gdev_list_t;
/**
* Gdev handle struct:
@@ -108,7 +108,7 @@ int gdev_info_query(struct gdev_device *gdev, uint32_t type, uint32_t *result)
int fd = gdev->fd;
switch (type) {
- case GDEV_QUERY_NVIDIA_MP_COUNT:
+ case GDEV_NVIDIA_QUERY_MP_COUNT:
if (pscnv_getparam(fd, PSCNV_GETPARAM_MP_COUNT, (uint64_t *)result))
return -EINVAL;
break;
View
@@ -152,10 +152,9 @@ int gdev_test_matrixadd(uint32_t *a, uint32_t *b, uint32_t *c, int n)
c_size = n * n * sizeof(uint32_t);
k.code_pc = 0;
- k.cmem_segment = 0;
- k.cmem_size = PARAM_SIZE;
- if (k.cmem_size == 0 || k.cmem_size & 0xff)
- k.cmem_size = (k.cmem_size + 0x100) & ~0xff;
+ k.cmem[0].size = PARAM_SIZE;
+ if (k.cmem[0].size == 0 || k.cmem[0].size & 0xff)
+ k.cmem[0].size = (k.cmem[0].size + 0x100) & ~0xff;
k.lmem_size = LOCAL_SIZE;
if (k.lmem_size & 0xf)
k.lmem_size = (k.lmem_size + 0x10) & ~0xf;
@@ -181,7 +180,7 @@ int gdev_test_matrixadd(uint32_t *a, uint32_t *b, uint32_t *c, int n)
k.warp_size = 32 * (stack_size + k.lmem_size + k.lmem_size_neg);
/* FIXME: the number of active warps may differ from 48. */
- gquery(handle, GDEV_QUERY_NVIDIA_MP_COUNT, &mp_count);
+ gquery(handle, GDEV_NVIDIA_QUERY_MP_COUNT, &mp_count);
k.lmem_size_total = 48 * mp_count * k.warp_size;
k.lmem_size_total = __round_up_pow2(k.lmem_size_total);
if (k.lmem_size_total > 128 * 1024)
@@ -195,24 +194,32 @@ int gdev_test_matrixadd(uint32_t *a, uint32_t *b, uint32_t *c, int n)
return -1;
if (!(k.code_addr = gmalloc(handle, code_size)))
return -1;
- if (!(k.cmem_addr = gmalloc(handle, k.cmem_size)))
+ if (!(k.cmem[0].addr = gmalloc(handle, k.cmem[0].size)))
return -1;
if (!(k.lmem_addr = gmalloc(handle, k.lmem_size_total)))
return -1;
- k.param_count = PARAM_SIZE / 4; /* note param is integer size. */
- k.param_start = 0;
- k.param_buf = c0;
- k.param_buf[NVCC_PARAM_OFFSET/4 + 0] = a_addr;
- k.param_buf[NVCC_PARAM_OFFSET/4 + 1] = a_addr >> 32;
- k.param_buf[NVCC_PARAM_OFFSET/4 + 2] = b_addr;
- k.param_buf[NVCC_PARAM_OFFSET/4 + 3] = b_addr >> 32;
- k.param_buf[NVCC_PARAM_OFFSET/4 + 4] = c_addr;
- k.param_buf[NVCC_PARAM_OFFSET/4 + 5] = c_addr >> 32;
- k.param_buf[NVCC_PARAM_OFFSET/4 + 6] = n;
+ k.cmem[0].offset = 0;
+ k.cmem[0].buf = c0;
+ k.cmem[0].buf[NVCC_PARAM_OFFSET/4 + 0] = a_addr;
+ k.cmem[0].buf[NVCC_PARAM_OFFSET/4 + 1] = a_addr >> 32;
+ k.cmem[0].buf[NVCC_PARAM_OFFSET/4 + 2] = b_addr;
+ k.cmem[0].buf[NVCC_PARAM_OFFSET/4 + 3] = b_addr >> 32;
+ k.cmem[0].buf[NVCC_PARAM_OFFSET/4 + 4] = c_addr;
+ k.cmem[0].buf[NVCC_PARAM_OFFSET/4 + 5] = c_addr >> 32;
+ k.cmem[0].buf[NVCC_PARAM_OFFSET/4 + 6] = n;
+ for (i = 1; i < GDEV_NVIDIA_CONST_SEGMENT_MAX_COUNT; i++) {
+ k.cmem[i].addr = 0;
+ k.cmem[i].size = 0;
+ k.cmem[i].offset = 0;
+ k.cmem[i].buf = NULL;
+ }
+ k.cmem_count = GDEV_NVIDIA_CONST_SEGMENT_MAX_COUNT;
+ k.cmem_param_segment = 0; /* c0[] is used for parameters in nvcc. */
k.reg_count = REG_COUNT;
k.bar_count = BARRIER_COUNT;
+ k.call_limit = 0xf;
k.grid_id = 1;
k.block_x = n < 32 ? n : 32;
@@ -237,7 +244,7 @@ int gdev_test_matrixadd(uint32_t *a, uint32_t *b, uint32_t *c, int n)
gfree(handle, b_addr);
gfree(handle, c_addr);
gfree(handle, k.code_addr);
- gfree(handle, k.cmem_addr);
+ gfree(handle, k.cmem[0].addr);
gfree(handle, k.lmem_addr);
gclose(handle);

0 comments on commit d2a083c

Please sign in to comment.