Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

cuda: added kcuda

cuda: seperated kcuda, libcuda, and cudump directories
gdev: fixed MALLOC/FREE macros in gdev_drv
  • Loading branch information...
commit 86763f0e6e9de55022315d1b7c8b89000eaef609 1 parent 5f31504
Shinpei Kato authored
View
29 cuda/configure
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+# libcuda or kcuda
+target=''
+
+# parse the given options.
+for option
+do
+ case "$option" in
+ -*=*) optarg=`echo "$option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
+ *) optarg= ;;
+ esac
+
+ case "$option" in
+ --target=*)
+ target="$optarg" ;;
+ esac
+done
+
+if [ $target = 'libcuda' ] ; then
+cp -f ../libcuda/* .
+cp -f ../driver_api/* .
+cp -f ../cudump/* .
+elif [ $target = 'kcuda' ] ; then
+cp -f ../kcuda/* .
+cp -f ../driver_api/* .
+else
+echo "Error: invalid target '$target'"
+fi
View
0  cuda/driver_api/cudump.c → cuda/cudump/cudump.c
File renamed without changes
View
7 cuda/driver_api/context.c
@@ -27,7 +27,6 @@
#include "cuda.h"
#include "gdev_cuda.h"
#include "gdev_api.h"
-#include "gdev_cuda.h"
#include "gdev_list.h"
struct CUctx_st *gdev_ctx_current = NULL;
@@ -107,7 +106,7 @@ CUresult cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev)
if (!pctx)
return CUDA_ERROR_INVALID_VALUE;
- if (!(ctx = (CUcontext)malloc(sizeof(*ctx)))) {
+ if (!(ctx = (CUcontext)MALLOC(sizeof(*ctx)))) {
res = CUDA_ERROR_OUT_OF_MEMORY;
goto fail_malloc_ctx;
}
@@ -145,7 +144,7 @@ CUresult cuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev)
fail_query_mp_count:
gclose(handle);
fail_open_gdev:
- free(ctx);
+ FREE(ctx);
fail_malloc_ctx:
return res;
}
@@ -179,7 +178,7 @@ CUresult cuCtxDestroy(CUcontext ctx)
if (gdev_ctx_current)
__gdev_list_del(&gdev_ctx_current->list_entry);
- free(ctx);
+ FREE(ctx);
return CUDA_SUCCESS;
}
View
83 cuda/driver_api/gdev_cuda.c
@@ -26,24 +26,26 @@
#include "cuda.h"
#include "gdev_cuda.h"
+
+#ifdef __KERNEL__
+#include <linux/proc_fs.h>
+#ifdef CONFIG_64BIT
+#define Elf_Phdr Elf32_Phdr
+#define FILE struct file
+#define FOPEN(fname) filp_open(fname, O_RDONLY | O_DIRECT, 0)
+#define FSEEK(fp, offset, whence) vfs_llseek(fp, 0, whence)
+#define FTELL(fp) (fp)->f_pos
+#define FREAD(ptr, size, fp) kernel_read(fp, 0, ptr, size)
+#define FCLOSE(fp) filp_close(fp, NULL)
+#else
+#define Elf_Phdr Elf64_Phdr
+#endif
+#else /* !__KERNEL__ */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <elf.h>
#include <limits.h>
-
-#define SH_TEXT ".text."
-#define SH_INFO ".nv.info"
-#define SH_INFO_FUNC ".nv.info."
-#define SH_LOCAL ".nv.local."
-#define SH_SHARED ".nv.shared."
-#define SH_CONST ".nv.constant"
-#define SH_REL ".rel.nv.constant"
-#define SH_RELSPACE ".nv.constant14"
-#define SH_GLOBAL ".nv.global"
-#define SH_GLOBAL_INIT ".nv.global.init"
-#define NV_GLOBAL 0x10
-
#if (ULONG_MAX == UINT_MAX)
#define Elf_Ehdr Elf32_Ehdr
#define Elf_Shdr Elf32_Shdr
@@ -55,7 +57,24 @@
#define Elf_Phdr Elf64_Phdr
#define Elf_Sym Elf64_Sym
#endif
+#define FOPEN(fname) fopen(fname, "rb")
+#define FSEEK(fp, offset, whence) fseek(fp, 0, whence)
+#define FTELL(fp) ftell(fp)
+#define FREAD(ptr, size, fp) fread(ptr, size, 1, fp)
+#define FCLOSE(fp) fclose(fp)
+#endif
+#define SH_TEXT ".text."
+#define SH_INFO ".nv.info"
+#define SH_INFO_FUNC ".nv.info."
+#define SH_LOCAL ".nv.local."
+#define SH_SHARED ".nv.shared."
+#define SH_CONST ".nv.constant"
+#define SH_REL ".rel.nv.constant"
+#define SH_RELSPACE ".nv.constant14"
+#define SH_GLOBAL ".nv.global"
+#define SH_GLOBAL_INIT ".nv.global.init"
+#define NV_GLOBAL 0x10
typedef struct section_entry_ {
uint16_t type;
@@ -98,19 +117,19 @@ static CUresult load_bin(char **pbin, FILE **pfp, const char *fname)
FILE *fp;
uint32_t len;
- if (!(fp = fopen(fname, "rb")))
+ if (!(fp = FOPEN(fname)))
return CUDA_ERROR_FILE_NOT_FOUND;
- fseek(fp, 0, SEEK_END);
- len = ftell(fp);
- fseek(fp, 0, SEEK_SET);
+ FSEEK(fp, 0, SEEK_END);
+ len = FTELL(fp);
+ FSEEK(fp, 0, SEEK_SET);
- if (!(bin = (char *) malloc(len + 1)))
+ if (!(bin = (char *) MALLOC(len + 1)))
return CUDA_ERROR_OUT_OF_MEMORY;
- if (!fread(bin, len, 1, fp)) {
- free(bin);
- fclose(fp);
+ if (!FREAD(bin, len, fp)) {
+ FREE(bin);
+ FCLOSE(fp);
return CUDA_ERROR_UNKNOWN;
}
@@ -122,8 +141,8 @@ static CUresult load_bin(char **pbin, FILE **pfp, const char *fname)
static void unload_bin(char *bin, FILE *fp)
{
- free(bin);
- fclose(fp);
+ FREE(bin);
+ FCLOSE(fp);
}
static CUresult cubin_func_skip(char **pos, section_entry_t *e)
@@ -288,7 +307,7 @@ static CUresult cubin_func
}
/* allocate memory for a new function. */
- if (!(func = malloc(sizeof(*func))))
+ if (!(func = MALLOC(sizeof(*func))))
goto fail_malloc_func;
init_kernel(&func->kernel);
@@ -354,7 +373,7 @@ static CUresult cubin_func
switch (sh_e->type) {
case 0x0c04: /* 4-byte aligned param_size */
cubin_func_skip(&sh_pos, sh_e);
- if (!(raw_func->param_info = malloc(sh_e->size / 4)))
+ if (!(raw_func->param_info = MALLOC(sh_e->size / 4)))
goto fail_malloc_param_info;
break;
case 0x0a04: /* kernel parameters base and size */
@@ -388,7 +407,7 @@ static CUresult cubin_func
return CUDA_SUCCESS;
fail_malloc_param_info:
- free(func);
+ FREE(func);
fail_malloc_func:
return CUDA_ERROR_OUT_OF_MEMORY;
}
@@ -553,10 +572,10 @@ CUresult gdev_cuda_unload_cubin(struct CUmod_st *mod)
free(func) will delte the entry itself in gdev_list_for_each(). */
while (entry) {
func = __gdev_list_container(entry);
- raw_func = &func->raw_func;
- free(raw_func->param_info);
entry = entry->next;
- free(func);
+ raw_func = &func->raw_func;
+ FREE(raw_func->param_info);
+ FREE(func);
}
unload_bin(mod->bin, mod->fp);
@@ -602,7 +621,7 @@ CUresult gdev_cuda_construct_kernels
k->code_pc = 0;
k->param_size = f->param_base + f->param_size;
- if (!(k->param_buf = malloc(k->param_size)))
+ if (!(k->param_buf = MALLOC(k->param_size)))
goto fail_malloc_param;
memcpy(k->param_buf, f->cmem[0].buf, f->param_base);
@@ -659,7 +678,7 @@ CUresult gdev_cuda_construct_kernels
gdev_list_for_each(func, &mod->func_list) {
k = &func->kernel;
if (k->param_buf)
- free(k->param_buf);
+ FREE(k->param_buf);
}
return CUDA_ERROR_OUT_OF_MEMORY;
}
@@ -673,7 +692,7 @@ CUresult gdev_cuda_destruct_kernels(struct CUmod_st *mod)
gdev_list_for_each(func, &mod->func_list) {
k = &func->kernel;
if (k->param_buf)
- free(k->param_buf);
+ FREE(k->param_buf);
else
res = CUDA_ERROR_DEINITIALIZED; /* appropriate? */
}
View
4 cuda/driver_api/gdev_cuda.h
@@ -74,7 +74,11 @@ struct CUctx_st {
};
struct CUmod_st {
+#ifdef __KERNEL__
+ struct file *fp;
+#else
FILE *fp;
+#endif
void *bin;
uint64_t code_addr;
uint32_t code_size;
View
57 cuda/driver_api/init.c
@@ -26,12 +26,45 @@
#include "cuda.h"
#include "gdev_cuda.h"
-#include <fcntl.h>
+
+#ifdef __KERNEL__
+#include <linux/proc_fs.h>
+static inline int FILE_EXIST(char *fname)
+{
+ struct file *fp = filp_open(fname, O_RDONLY, 0);
+ if (fp)
+ return 1;
+ filp_close(fp, NULL);
+ return 0;
+}
+#else /* !__KERNEL__ */
#include <stdio.h>
#include <sys/stat.h>
#include <sys/unistd.h>
+static inline int FILE_EXIST(char *fname)
+{
+ struct stat st;
+ if (stat(fname, &st) == 0)
+ return 1;
+ return 0;
+}
+#endif
-int gdev_initialized = 0;
+static int __gdev_get_device_count(void)
+{
+ char fname[64];
+ int minor = 0;
+
+ /* check the number of devices. */
+ for (;;) {
+ sprintf(fname, "/dev/gdev%d", minor);
+ if (!FILE_EXIST(fname))
+ break;
+ minor++;
+ }
+
+ return minor;
+}
/**
* Initializes the driver API and must be called before any other function
@@ -47,10 +80,6 @@ int gdev_initialized = 0;
*/
CUresult cuInit(unsigned int Flags)
{
- char fname[64];
- int minor = 0;
- struct stat st;
-
/* mark initialized. */
gdev_initialized = 1;
@@ -58,19 +87,15 @@ CUresult cuInit(unsigned int Flags)
if (Flags != 0)
return CUDA_ERROR_INVALID_VALUE;
- /* check the number of devices. */
- for (;;) {
- sprintf(fname, "/dev/gdev%d", minor);
- if ((stat(fname, &st)))
- break;
- minor++;
- }
-
- if (!minor)
+ if (!(gdev_device_count = __gdev_get_device_count()))
return CUDA_ERROR_INVALID_DEVICE;
- gdev_device_count = minor;
__gdev_list_init(&gdev_ctx_list, NULL);
return CUDA_SUCCESS;
}
+
+/**
+ * global variables.
+ */
+int gdev_initialized = 0;
View
12 cuda/driver_api/module.c
@@ -63,7 +63,7 @@ CUresult cuModuleLoad(CUmodule *module, const char *fname)
ctx = gdev_ctx_current;
handle = ctx->gdev_handle;
- if (!(mod = malloc(sizeof(*mod)))) {
+ if (!(mod = MALLOC(sizeof(*mod)))) {
GDEV_PRINT("Failed to allocate memory for module\n");
res = CUDA_ERROR_OUT_OF_MEMORY;
goto fail_malloc_mod;
@@ -113,7 +113,7 @@ CUresult cuModuleLoad(CUmodule *module, const char *fname)
/* the following malloc() and memcpy() for bounce buffer could be
removed if we use gmalloc_host() here, but they are just an easy
implementation, and don't really affect performance anyway. */
- if (!(bnc_buf = malloc(mod->code_size))) {
+ if (!(bnc_buf = MALLOC(mod->code_size))) {
GDEV_PRINT("Failed to allocate host memory for code\n");
res = CUDA_ERROR_OUT_OF_MEMORY;
goto fail_malloc_code;
@@ -134,7 +134,7 @@ CUresult cuModuleLoad(CUmodule *module, const char *fname)
}
/* free the bounce buffer now. */
- free(bnc_buf);
+ FREE(bnc_buf);
mod->ctx = ctx;
*module = mod;
@@ -143,7 +143,7 @@ CUresult cuModuleLoad(CUmodule *module, const char *fname)
fail_gmemcpy_code:
fail_memcpy_code:
- free(bnc_buf);
+ FREE(bnc_buf);
fail_malloc_code:
fail_locate_code:
gfree(handle, mod->code_addr);
@@ -156,7 +156,7 @@ CUresult cuModuleLoad(CUmodule *module, const char *fname)
fail_construct_kernels:
gdev_cuda_unload_cubin(mod);
fail_load_cubin:
- free(mod);
+ FREE(mod);
fail_malloc_mod:
*module = NULL;
return res;
@@ -203,7 +203,7 @@ CUresult cuModuleUnload(CUmodule hmod)
if ((res = gdev_cuda_unload_cubin(mod)) != CUDA_SUCCESS)
return res;
- free(mod);
+ FREE(mod);
return CUDA_SUCCESS;
}
View
31 cuda/kcuda/Makefile
@@ -0,0 +1,31 @@
+TARGET = kcuda
+OBJS = init.o device.o version.o context.o module.o memory.o execution.o gdev_cuda.o
+GDEVDIR = /usr/local/gdev
+GDEVINC = $(GDEVDIR)/include
+DRMINC = /lib/modules/$(shell uname -r)/source/include/drm
+CPPFLAGS += -Wno-strict-prototypes
+
+# If KERNELRELEASE is define, we have been invoked from the
+# kernel build system and can use its languages.
+ifneq ($(KERNELRELEASE),)
+ obj-m := $(TARGET).o
+ $(TARGET)-objs := $(OBJS)
+ EXTRA_CFLAGS := -I$(GDEVINC) -I$(DRMINC)
+
+# Otherwise we were called directly from the command line;
+# invoke the kernel build system
+else
+ KERNELDIR ?= /lib/modules/$(shell uname -r)/build
+ EXTRA_CFLAGS := -I$(GDEVINC) -I$(DRMINC)
+
+all:
+ @cp -f $(GDEVDIR)/Module.symvers .
+ $(MAKE) -C $(KERNELDIR) M=$(PWD) modules
+
+$(TARGET).o: $(OBJS)
+ $(LD) $(LD_RFLAG) -r -o $@ $(OBJS)
+
+clean:
+ @rm -fr .tmp* .*.cmd *.mod.* *.ko *.o modules.order Module.* *~
+
+endif
View
105 cuda/kcuda/kcuda.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2011 Shinpei Kato
+ *
+ * University of California at Santa Cruz
+ * Systems Research Lab.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/version.h>
+#include <linux/stat.h>
+
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Gdev/KCUDA");
+MODULE_AUTHOR("Shinpei Kato");
+
+#define MODULE_NAME "kcuda"
+
+static int __init kcuda_module_init(void)
+{
+ printk("[kcuda] Gdev/KCUDA loaded\n");
+ return 0;
+}
+
+static void __exit kcuda_module_exit(void)
+{
+ printk("[kcuda] Gdev/KCUDA unloaded\n");
+}
+
+module_init(kcuda_module_init);
+module_exit(kcuda_module_exit);
+
+/* Initialization */
+EXPORT_SYMBOL(cuInit);
+/* Device Management */
+EXPORT_SYMBOL(cuDeviceComputeCapability);
+EXPORT_SYMBOL(cuDeviceGet);
+EXPORT_SYMBOL(cuDeviceGetAttribute);
+EXPORT_SYMBOL(cuDeviceGetCount);
+EXPORT_SYMBOL(cuDeviceGetName);
+EXPORT_SYMBOL(cuDeviceGetProperties);
+EXPORT_SYMBOL(cuDeviceTotalMem);
+/* Version Management */
+EXPORT_SYMBOL(cuDriverGetVersion);
+/* Context Management */
+EXPORT_SYMBOL(cuCtxAttach);
+EXPORT_SYMBOL(cuCtxCreate);
+EXPORT_SYMBOL(cuCtxDestroy);
+EXPORT_SYMBOL(cuCtxDetach);
+EXPORT_SYMBOL(cuCtxGetDevice);
+EXPORT_SYMBOL(cuCtxPopCurrent);
+EXPORT_SYMBOL(cuCtxPushCurrent);
+EXPORT_SYMBOL(cuCtxSynchronize);
+/* Module Management */
+EXPORT_SYMBOL(cuModuleGetFunction);
+EXPORT_SYMBOL(cuModuleGetGlobal);
+EXPORT_SYMBOL(cuModuleGetTexRef);
+EXPORT_SYMBOL(cuModuleLoad);
+EXPORT_SYMBOL(cuModuleLoadData);
+EXPORT_SYMBOL(cuModuleLoadDataEx);
+EXPORT_SYMBOL(cuModuleLoadFatBinary);
+EXPORT_SYMBOL(cuModuleUnload);
+/* Execution Control */
+EXPORT_SYMBOL(cuFuncGetAttribute);
+EXPORT_SYMBOL(cuFuncSetBlockShape);
+EXPORT_SYMBOL(cuFuncSetSharedSize);
+EXPORT_SYMBOL(cuLaunch);
+EXPORT_SYMBOL(cuLaunchGrid);
+EXPORT_SYMBOL(cuLaunchGridAsync);
+EXPORT_SYMBOL(cuParamSetf);
+EXPORT_SYMBOL(cuParamSeti);
+EXPORT_SYMBOL(cuParamSetSize);
+EXPORT_SYMBOL(cuParamSetTexRef);
+EXPORT_SYMBOL(cuParamSetv);
+/* Memory Management (Incomplete) */
+EXPORT_SYMBOL(cuMemAlloc);
+EXPORT_SYMBOL(cuMemFree);
+EXPORT_SYMBOL(cuMemAllocHost);
+EXPORT_SYMBOL(cuMemFreeHost);
+EXPORT_SYMBOL(cuMemcpyDtoH);
+EXPORT_SYMBOL(cuMemcpyHtoD);
+EXPORT_SYMBOL(cuMemcpyDtoD);
View
2  cuda/driver_api/Makefile → cuda/libcuda/Makefile
@@ -4,7 +4,7 @@ CC = gcc
TARGET = libcuda
CUDUMP = cudump
GDEVDIR = /usr/local/gdev
-CFLAGS = -O3 -Wall -I$(GDEVDIR)/include
+CFLAGS = -O3 -Wall -I $(GDEVDIR)/include
#OBJS = $(patsubst %.c,%.o,$(wildcard ./*.c))
OBJS = init.o device.o version.o context.o module.o execution.o memory.o \
View
4 driver/gdev/gdev_drv.h
@@ -51,8 +51,8 @@
if (DEBUG_PRINT) \
printk("[gdev:debug] " fmt, ##arg)
-#define MALLOC(x) kmalloc(x, GFP_KERNEL)
-#define FREE(x) kfree(x)
+#define MALLOC(x) vmalloc(x)
+#define FREE(x) vfree(x)
#define SCHED_YIELD() schedule_timeout(1)
#define MB() mb()
#define COPY_FROM_USER(dst, src, size) \
Please sign in to comment.
Something went wrong with that request. Please try again.