Skip to content

Commit

Permalink
cuda: fixed cuDeviceTotalMem()
Browse files Browse the repository at this point in the history
test: added memcpy_swap test
  • Loading branch information
Shinpei Kato committed Dec 13, 2011
1 parent ab641f3 commit d799917
Show file tree
Hide file tree
Showing 7 changed files with 227 additions and 9 deletions.
13 changes: 12 additions & 1 deletion common/gdev_nvidia.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,18 @@ int gdev_poll
/* query device-specific information. */
int gdev_query(struct gdev_device *gdev, uint32_t type, uint64_t *result)
{
return gdev_raw_query(gdev, type, result);
int ret;

if ((ret = gdev_raw_query(gdev, type, result)))
return ret;

switch (type) {
case GDEV_NVIDIA_QUERY_DEVICE_MEM_SIZE:
*result -= 0xc010000; /* FIXME: this shouldn't be hardcoded. */
break;
}

return 0;
}

/* open a new Gdev object associated with the specified device. */
Expand Down
10 changes: 5 additions & 5 deletions common/gdev_nvidia_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@

/**
* query values for the device-specific information.
* they should be >= 100.
* they should be >= 0x100.
*/
#define GDEV_NVIDIA_QUERY_MP_COUNT 100
#define GDEV_NVIDIA_QUERY_DEVICE_MEM_SIZE 101
#define GDEV_NVIDIA_QUERY_DMA_MEM_SIZE 102
#define GDEV_NVIDIA_QUERY_CHIPSET 103
#define GDEV_NVIDIA_QUERY_MP_COUNT 0x100
#define GDEV_NVIDIA_QUERY_DEVICE_MEM_SIZE 0x101
#define GDEV_NVIDIA_QUERY_DMA_MEM_SIZE 0x102
#define GDEV_NVIDIA_QUERY_CHIPSET 0x103

/**
* GPGPU kernel object struct:
Expand Down
8 changes: 7 additions & 1 deletion cuda/driver_api/cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@
#ifndef __CUDA_H__
#define __CUDA_H__

#ifdef __KERNEL__
#include <linux/types.h>
#else
#include <stddef.h>
#endif

typedef unsigned long long CUdeviceptr;
typedef int CUdevice;
typedef struct CUctx_st* CUcontext;
Expand Down Expand Up @@ -707,7 +713,7 @@ CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
CUresult cuDeviceGetCount(int *count);
CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
CUresult cuDeviceGetProperties(CUdevprop *prop, CUdevice dev);
CUresult cuDeviceTotalMem(unsigned int *bytes, CUdevice dev);
CUresult cuDeviceTotalMem(size_t *bytes, CUdevice dev);

/* Version Management */
CUresult cuDriverGetVersion (int *driverVersion);
Expand Down
4 changes: 2 additions & 2 deletions cuda/driver_api/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ CUresult cuDeviceGetProperties(CUdevprop *prop, CUdevice dev)
* CUDA_ERROR_INVALID_CONTEXT, CUDA_ERROR_INVALID_VALUE,
* CUDA_ERROR_INVALID_DEVICE
*/
CUresult cuDeviceTotalMem(unsigned int *bytes, CUdevice dev)
CUresult cuDeviceTotalMem(size_t *bytes, CUdevice dev)
{
Ghandle handle;

Expand All @@ -277,7 +277,7 @@ CUresult cuDeviceTotalMem(unsigned int *bytes, CUdevice dev)

handle = gdev_ctx_current->gdev_handle;

if (gquery(handle, GDEV_NVIDIA_QUERY_MP_COUNT, (uint64_t*) bytes)) {
if (gquery(handle, GDEV_NVIDIA_QUERY_DEVICE_MEM_SIZE, (uint64_t*) bytes)) {
return CUDA_ERROR_UNKNOWN;
}

Expand Down
12 changes: 12 additions & 0 deletions test/cuda/user/memcpy_swap/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Makefile
TARGET = user_test
CC = gcc
NVCC = nvcc -arch sm_20 -cubin
LIBS = -lcuda -lgdev
CFLAGS = -L /usr/local/gdev/lib64 -I /usr/local/gdev/include

all:
gcc -o $(TARGET) $(CFLAGS) $(LIBS) main.c

clean:
rm -f $(TARGET) *.cubin ./*~
12 changes: 12 additions & 0 deletions test/cuda/user/memcpy_swap/Makefile.nvidia
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Makefile
TARGET = user_test
CC = gcc
NVCC = nvcc -arch sm_20 -cubin
LIBS = -lcuda
CFLAGS = -I /usr/local/cuda/include

all:
gcc -o $(TARGET) $(CFLAGS) $(LIBS) main.c

clean:
rm -f $(TARGET) *.cubin ./*~
177 changes: 177 additions & 0 deletions test/cuda/user/memcpy_swap/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
#include <cuda.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/time.h>
#include <sys/wait.h>

int main(int argc, char *argv[])
{
int i;
pid_t pid;
int status;
unsigned int size = 0x10000000; /* 256MB */
size_t total_mem;
CUresult res;
CUdevice dev;
CUcontext ctx;
CUdeviceptr data_addr;
unsigned int *in, *out;

if (argc > 1)
sscanf(argv[1], "%x", &size);

res = cuInit(0);
if (res != CUDA_SUCCESS) {
printf("cuInit failed: res = %u\n", (unsigned int)res);
exit(-1);
}

res = cuDeviceGet(&dev, 0);
if (res != CUDA_SUCCESS) {
printf("cuDeviceGet failed: res = %u\n", (unsigned int)res);
exit(-1);
}

res = cuCtxCreate(&ctx, 0, dev);
if (res != CUDA_SUCCESS) {
printf("cuCtxCreate failed: res = %u\n", (unsigned int)res);
exit(-1);
}

res = cuDeviceTotalMem(&total_mem, dev);
if (res != CUDA_SUCCESS) {
printf("cuDeviceTotalMem failed: res = %u\n", (unsigned int)res);
exit(-1);
}
printf("Total Memory = 0x%x\n", total_mem);

res = cuMemAlloc(&data_addr, size);
if (res != CUDA_SUCCESS) {
printf("cuMemAlloc failed: res = %u\n", (unsigned int)res);
exit(-1);
}

pid = fork();

if (pid == 0) { /* child */
res = cuInit(0);
if (res != CUDA_SUCCESS) {
printf("cuInit failed: res = %u\n", (unsigned int)res);
exit(-1);
}

res = cuDeviceGet(&dev, 0);
if (res != CUDA_SUCCESS) {
printf("cuDeviceGet failed: res = %u\n", (unsigned int)res);
exit(-1);
}

res = cuCtxCreate(&ctx, 0, dev);
if (res != CUDA_SUCCESS) {
printf("cuCtxCreate failed: res = %u\n", (unsigned int)res);
exit(-1);
}

res = cuMemAlloc(&data_addr, size);
if (res != CUDA_SUCCESS) {
printf("cuMemAlloc failed: res = %u\n", (unsigned int)res);
exit(-1);
}

in = (unsigned int *) malloc(size);
out = (unsigned int *) malloc(size);
for (i = 0; i < size / 4; i++) {
in[i] = i+1;
out[i] = 0;
}

res = cuMemcpyHtoD(data_addr, in, size);
if (res != CUDA_SUCCESS) {
printf("cuMemcpyHtoD failed: res = %u\n", (unsigned int)res);
exit(-1);
}

res = cuMemcpyDtoH(out, data_addr, size);
if (res != CUDA_SUCCESS) {
printf("cuMemcpyDtoH failed: res = %u\n", (unsigned int)res);
exit(-1);
}

res = cuMemFree(data_addr);
if (res != CUDA_SUCCESS) {
printf("cuMemFree failed: res = %u\n", (unsigned int)res);
exit(-1);
}

res = cuCtxDestroy(ctx);
if (res != CUDA_SUCCESS) {
printf("cuCtxDestroy failed: res = %u\n", (unsigned int)res);
exit(-1);
}

for (i = 0; i < size / 4; i++) {
if (in[i] != out[i]) {
printf("in[%d] = %u, out[%d] = %u\n",
i, in[i], i, out[i]);
break;
}
}

free(in);
free(out);

printf("Child finished\n");
exit(-1);
}
else { /* parent */
waitpid(pid, &status, 0);

in = (unsigned int *) malloc(size);
out = (unsigned int *) malloc(size);
for (i = 0; i < size / 4; i++) {
in[i] = i+1;
out[i] = 0;
}

res = cuMemcpyHtoD(data_addr, in, size);
if (res != CUDA_SUCCESS) {
printf("cuMemcpyHtoD failed: res = %u\n", (unsigned int)res);
exit(-1);
}

res = cuMemcpyDtoH(out, data_addr, size);
if (res != CUDA_SUCCESS) {
printf("cuMemcpyDtoH failed: res = %u\n", (unsigned int)res);
exit(-1);
}

res = cuMemFree(data_addr);
if (res != CUDA_SUCCESS) {
printf("cuMemFree failed: res = %u\n", (unsigned int)res);
exit(-1);
}

res = cuCtxDestroy(ctx);
if (res != CUDA_SUCCESS) {
printf("cuCtxDestroy failed: res = %u\n", (unsigned int)res);
exit(-1);
}

for (i = 0; i < size / 4; i++) {
if (in[i] != out[i]) {
printf("in[%d] = %u, out[%d] = %u\n",
i, in[i], i, out[i]);
break;
}
}

free(in);
free(out);

printf("Parent finished\n");
exit(0);
}

return 0;
}

0 comments on commit d799917

Please sign in to comment.