Permalink
Browse files

cuda: fixed cuDeviceTotalMem()

test: added memcpy_swap test
  • Loading branch information...
1 parent ab641f3 commit d799917da97b6fa0e19595058cffe29cb34d4603 Shinpei Kato committed Dec 13, 2011
View
@@ -140,7 +140,18 @@ int gdev_poll
/* query device-specific information. */
int gdev_query(struct gdev_device *gdev, uint32_t type, uint64_t *result)
{
- return gdev_raw_query(gdev, type, result);
+ int ret;
+
+ if ((ret = gdev_raw_query(gdev, type, result)))
+ return ret;
+
+ switch (type) {
+ case GDEV_NVIDIA_QUERY_DEVICE_MEM_SIZE:
+ *result -= 0xc010000; /* FIXME: this shouldn't be hardcoded. */
+ break;
+ }
+
+ return 0;
}
/* open a new Gdev object associated with the specified device. */
View
@@ -38,12 +38,12 @@
/**
* query values for the device-specific information.
- * they should be >= 100.
+ * they should be >= 0x100.
*/
-#define GDEV_NVIDIA_QUERY_MP_COUNT 100
-#define GDEV_NVIDIA_QUERY_DEVICE_MEM_SIZE 101
-#define GDEV_NVIDIA_QUERY_DMA_MEM_SIZE 102
-#define GDEV_NVIDIA_QUERY_CHIPSET 103
+#define GDEV_NVIDIA_QUERY_MP_COUNT 0x100
+#define GDEV_NVIDIA_QUERY_DEVICE_MEM_SIZE 0x101
+#define GDEV_NVIDIA_QUERY_DMA_MEM_SIZE 0x102
+#define GDEV_NVIDIA_QUERY_CHIPSET 0x103
/**
* GPGPU kernel object struct:
View
@@ -27,6 +27,12 @@
#ifndef __CUDA_H__
#define __CUDA_H__
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stddef.h>
+#endif
+
typedef unsigned long long CUdeviceptr;
typedef int CUdevice;
typedef struct CUctx_st* CUcontext;
@@ -707,7 +713,7 @@ CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
CUresult cuDeviceGetCount(int *count);
CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
CUresult cuDeviceGetProperties(CUdevprop *prop, CUdevice dev);
-CUresult cuDeviceTotalMem(unsigned int *bytes, CUdevice dev);
+CUresult cuDeviceTotalMem(size_t *bytes, CUdevice dev);
/* Version Management */
CUresult cuDriverGetVersion (int *driverVersion);
View
@@ -262,7 +262,7 @@ CUresult cuDeviceGetProperties(CUdevprop *prop, CUdevice dev)
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_DEVICE
*/
-CUresult cuDeviceTotalMem(unsigned int *bytes, CUdevice dev)
+CUresult cuDeviceTotalMem(size_t *bytes, CUdevice dev)
{
Ghandle handle;
@@ -277,7 +277,7 @@ CUresult cuDeviceTotalMem(unsigned int *bytes, CUdevice dev)
handle = gdev_ctx_current->gdev_handle;
- if (gquery(handle, GDEV_NVIDIA_QUERY_MP_COUNT, (uint64_t*) bytes)) {
+ if (gquery(handle, GDEV_NVIDIA_QUERY_DEVICE_MEM_SIZE, (uint64_t*) bytes)) {
return CUDA_ERROR_UNKNOWN;
}
@@ -0,0 +1,12 @@
+# Makefile
+TARGET = user_test
+CC = gcc
+NVCC = nvcc -arch sm_20 -cubin
+LIBS = -lcuda -lgdev
+CFLAGS = -L /usr/local/gdev/lib64 -I /usr/local/gdev/include
+
+all:
+ gcc -o $(TARGET) $(CFLAGS) $(LIBS) main.c
+
+clean:
+ rm -f $(TARGET) *.cubin ./*~
@@ -0,0 +1,12 @@
+# Makefile
+TARGET = user_test
+CC = gcc
+NVCC = nvcc -arch sm_20 -cubin
+LIBS = -lcuda
+CFLAGS = -I /usr/local/cuda/include
+
+all:
+ gcc -o $(TARGET) $(CFLAGS) $(LIBS) main.c
+
+clean:
+ rm -f $(TARGET) *.cubin ./*~
@@ -0,0 +1,177 @@
+#include <cuda.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+
+int main(int argc, char *argv[])
+{
+ int i;
+ pid_t pid;
+ int status;
+ unsigned int size = 0x10000000; /* 256MB */
+ size_t total_mem;
+ CUresult res;
+ CUdevice dev;
+ CUcontext ctx;
+ CUdeviceptr data_addr;
+ unsigned int *in, *out;
+
+ if (argc > 1)
+ sscanf(argv[1], "%x", &size);
+
+ res = cuInit(0);
+ if (res != CUDA_SUCCESS) {
+ printf("cuInit failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ res = cuDeviceGet(&dev, 0);
+ if (res != CUDA_SUCCESS) {
+ printf("cuDeviceGet failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ res = cuCtxCreate(&ctx, 0, dev);
+ if (res != CUDA_SUCCESS) {
+ printf("cuCtxCreate failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ res = cuDeviceTotalMem(&total_mem, dev);
+ if (res != CUDA_SUCCESS) {
+ printf("cuDeviceTotalMem failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+ printf("Total Memory = 0x%x\n", total_mem);
+
+ res = cuMemAlloc(&data_addr, size);
+ if (res != CUDA_SUCCESS) {
+ printf("cuMemAlloc failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ pid = fork();
+
+ if (pid == 0) { /* child */
+ res = cuInit(0);
+ if (res != CUDA_SUCCESS) {
+ printf("cuInit failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ res = cuDeviceGet(&dev, 0);
+ if (res != CUDA_SUCCESS) {
+ printf("cuDeviceGet failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ res = cuCtxCreate(&ctx, 0, dev);
+ if (res != CUDA_SUCCESS) {
+ printf("cuCtxCreate failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ res = cuMemAlloc(&data_addr, size);
+ if (res != CUDA_SUCCESS) {
+ printf("cuMemAlloc failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ in = (unsigned int *) malloc(size);
+ out = (unsigned int *) malloc(size);
+ for (i = 0; i < size / 4; i++) {
+ in[i] = i+1;
+ out[i] = 0;
+ }
+
+ res = cuMemcpyHtoD(data_addr, in, size);
+ if (res != CUDA_SUCCESS) {
+ printf("cuMemcpyHtoD failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ res = cuMemcpyDtoH(out, data_addr, size);
+ if (res != CUDA_SUCCESS) {
+ printf("cuMemcpyDtoH failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ res = cuMemFree(data_addr);
+ if (res != CUDA_SUCCESS) {
+ printf("cuMemFree failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ res = cuCtxDestroy(ctx);
+ if (res != CUDA_SUCCESS) {
+ printf("cuCtxDestroy failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ for (i = 0; i < size / 4; i++) {
+ if (in[i] != out[i]) {
+ printf("in[%d] = %u, out[%d] = %u\n",
+ i, in[i], i, out[i]);
+ break;
+ }
+ }
+
+ free(in);
+ free(out);
+
+ printf("Child finished\n");
+ exit(-1);
+ }
+ else { /* parent */
+ waitpid(pid, &status, 0);
+
+ in = (unsigned int *) malloc(size);
+ out = (unsigned int *) malloc(size);
+ for (i = 0; i < size / 4; i++) {
+ in[i] = i+1;
+ out[i] = 0;
+ }
+
+ res = cuMemcpyHtoD(data_addr, in, size);
+ if (res != CUDA_SUCCESS) {
+ printf("cuMemcpyHtoD failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ res = cuMemcpyDtoH(out, data_addr, size);
+ if (res != CUDA_SUCCESS) {
+ printf("cuMemcpyDtoH failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ res = cuMemFree(data_addr);
+ if (res != CUDA_SUCCESS) {
+ printf("cuMemFree failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ res = cuCtxDestroy(ctx);
+ if (res != CUDA_SUCCESS) {
+ printf("cuCtxDestroy failed: res = %u\n", (unsigned int)res);
+ exit(-1);
+ }
+
+ for (i = 0; i < size / 4; i++) {
+ if (in[i] != out[i]) {
+ printf("in[%d] = %u, out[%d] = %u\n",
+ i, in[i], i, out[i]);
+ break;
+ }
+ }
+
+ free(in);
+ free(out);
+
+ printf("Parent finished\n");
+ exit(0);
+ }
+
+ return 0;
+}

0 comments on commit d799917

Please sign in to comment.