pytorch · apaszke · Sep 28, 2016
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,7 @@ torch/lib/*.dylib*
 torch/lib/*.h
 torch/lib/build
 torch/lib/tmp_install
+torch/lib/include
 torch/lib/torch_shm_manager
 torch/csrc/nn/THNN.cwrap
 torch/csrc/nn/THNN.cpp

diff --git a/setup.py b/setup.py
@@ -244,6 +244,11 @@ def make_relative_rpath(path):
         'clean': clean,
     },
     packages=packages,
-    package_data={'torch': ['lib/*.so*', 'lib/*.dylib*', 'lib/*.h', 'lib/torch_shm_manager']},
+    package_data={'torch': [
+        'lib/*.so*', 'lib/*.dylib*',
+        'lib/torch_shm_manager',
+        'lib/*.h',
+        'lib/include/TH/*.h', 'lib/include/TH/generic/*.h',
+        'lib/include/THC/*.h', 'lib/include/THC/generic/*.h']},
     install_requires=['pyyaml'],
 )
diff --git a/test/ffi/src/cpu/lib.h b/test/ffi/src/cpu/lib.h
@@ -0,0 +1,6 @@
+
+void good_func(THFloatTensor *tensor, int a, float b);
+void bad_func(THFloatTensor *tensor, int a, float b);
+THFloatTensor * new_tensor(int a);
+float int_to_float(int a);
+
diff --git a/test/ffi/src/cpu/lib1.c b/test/ffi/src/cpu/lib1.c
@@ -0,0 +1,19 @@
+#include <TH/TH.h>
+
+void good_func(THFloatTensor *tensor, int a, float b)
+{
+  THFloatTensor_mul(tensor, tensor, a);
+  THFloatTensor_add(tensor, tensor, b);
+}
+
+THFloatTensor * new_tensor(int a)
+{
+  THFloatTensor *t = THFloatTensor_newWithSize2d(a, a);
+  THFloatTensor_fill(t, a);
+  return t;
+}
+
+float int_to_float(int a)
+{
+  return a;
+}
diff --git a/test/ffi/src/cpu/lib2.c b/test/ffi/src/cpu/lib2.c
@@ -0,0 +1,8 @@
+#include <TH/TH.h>
+
+void bad_func(THFloatTensor *tensor, int a, float b)
+{
+  THFloatTensor_mul(tensor, tensor, a);
+  THFloatTensor_add(tensor, tensor, b);
+  THFloatTensor_addbmm(tensor, 1, tensor, 1, tensor, tensor);
+}
diff --git a/test/ffi/src/cuda/cudalib.c b/test/ffi/src/cuda/cudalib.c
@@ -0,0 +1,12 @@
+#include <TH/TH.h>
+#include <THC/THC.h>
+
+extern THCState *state;
+
+#include "../cpu/lib1.c"
+
+void cuda_func(THCudaTensor *tensor, int a, float b)
+{
+  THCudaTensor_mul(state, tensor, tensor, a);
+  THCudaTensor_add(state, tensor, tensor, b);
+}
diff --git a/test/ffi/src/cuda/cudalib.h b/test/ffi/src/cuda/cudalib.h
@@ -0,0 +1,5 @@
+
+void good_func(THFloatTensor *tensor, int a, float b);
+void cuda_func(THCudaTensor *tensor, int a, float b);
+THFloatTensor * new_tensor(int a);
+float int_to_float(int a);
diff --git a/test/ffi/src/lib.h b/test/ffi/src/lib.h
@@ -0,0 +1,5 @@
+
+void my_func(THFloatTensor *tensor, int a, float b);
+void my_cuda_func(THCudaTensor *tensor, int a, float b);
+THFloatTensor * new_t(int a);
+float new_int(int a);
diff --git a/test/test_utils.py b/test/test_utils.py
@@ -1,12 +1,18 @@
+import sys
+import os
 import math
+import shutil
 import random
+import tempfile
 import unittest
 import torch
+import torch.cuda
 from torch.autograd import Variable
 from torch.utils.trainer import Trainer
 from torch.utils.trainer.plugins import *
 from torch.utils.trainer.plugins.plugin import Plugin
 from torch.utils.data import *
+from torch.utils.ffi import compile_extension
 
 from common import TestCase
 
@@ -242,6 +248,72 @@ def test_types(self):
             self.assertIs(type(targets), torch.IntTensor)
 
 
+test_dir = os.path.abspath(os.path.dirname(__file__))
+
+
+class TestFFI(TestCase):
+
+    def setUp(self):
+        self.tmpdir = tempfile.mkdtemp()
+        os.chdir(self.tmpdir)
+        sys.path.append(self.tmpdir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmpdir)
+
+    def test_cpu(self):
+        compile_extension(
+                name='test_extensions.cpulib',
+                header=test_dir + '/ffi/src/cpu/lib.h',
+                sources=[
+                    test_dir + '/ffi/src/cpu/lib1.c',
+                    test_dir + '/ffi/src/cpu/lib2.c',
+                ],
+                verbose=False,
+        )
+        from test_extensions import cpulib
+        tensor = torch.ones(2, 2).float()
+
+        cpulib.good_func(tensor, 2, 1.5)
+        self.assertEqual(tensor, torch.ones(2, 2) * 2 + 1.5)
+
+        new_tensor = cpulib.new_tensor(4)
+        self.assertEqual(new_tensor, torch.ones(4, 4) * 4)
+
+        f = cpulib.int_to_float(5)
+        self.assertIs(type(f), float)
+
+        self.assertRaises(TypeError,
+                lambda: cpulib.good_func(tensor.double(), 2, 1.5))
+        self.assertRaises(torch.FatalError,
+                lambda: cpulib.bad_func(tensor, 2, 1.5))
+
+    def test_gpu(self):
+        compile_extension(
+                name='gpulib',
+                header=test_dir + '/ffi/src/cuda/cudalib.h',
+                sources=[
+                    test_dir + '/ffi/src/cuda/cudalib.c',
+                ],
+                with_cuda=True,
+                verbose=False,
+        )
+        import gpulib
+        tensor = torch.ones(2, 2).float()
+
+        gpulib.good_func(tensor, 2, 1.5)
+        self.assertEqual(tensor, torch.ones(2, 2) * 2 + 1.5)
+
+        ctensor = tensor.cuda().fill_(1)
+        gpulib.cuda_func(ctensor, 2, 1.5)
+        self.assertEqual(ctensor, torch.ones(2, 2) * 2 + 1.5)
+
+        self.assertRaises(TypeError,
+                lambda: gpulib.cuda_func(tensor, 2, 1.5))
+        self.assertRaises(TypeError,
+                lambda: gpulib.cuda_func(ctensor.storage(), 2, 1.5))
+
+
 if __name__ == '__main__':
     unittest.main()
 
diff --git a/torch/csrc/Exceptions.cpp b/torch/csrc/Exceptions.cpp
@@ -4,8 +4,10 @@
 
 PyObject *THPException_FatalError;
 
+#define ASSERT_TRUE(cond) if (!(cond)) return false
 bool THPException_init(PyObject *module)
 {
-  THPException_FatalError = PyErr_NewException("torch.FatalError", NULL, NULL);
-  return THPException_FatalError != NULL;
+  ASSERT_TRUE(THPException_FatalError = PyErr_NewException("torch.FatalError", NULL, NULL));
+  ASSERT_TRUE(PyModule_AddObject(module, "FatalError", THPException_FatalError) == 0);
+  return true;
 }
diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp
@@ -528,17 +528,20 @@ static PyObject * THPModule_cat(PyObject *_unused, PyObject *args)
 PyObject *THPModule_safeCall(PyObject *_unused, PyObject *args, PyObject *kwargs)
 {
   PyObject *result = NULL;
+  PyObject *args_slice = NULL;
   PyThreadState *thread_state = PyThreadState_Get();
   Py_ssize_t num_args = args ? PyTuple_Size(args) : 0;
   THPUtils_assert(num_args > 0, "expected at least one argument");
   try {
-    THPObjectPtr args_slice = PyTuple_GetSlice(args, 1, num_args);
+    args_slice = PyTuple_GetSlice(args, 1, num_args);
     result = PyObject_Call(PyTuple_GET_ITEM(args, 0), args_slice, kwargs);
   } catch (std::exception &e) {
     PyEval_RestoreThread(thread_state);
+    Py_DECREF(args_slice);
     PyErr_SetString(THPException_FatalError, e.what());
     Py_LeaveRecursiveCall();
   }
+  Py_DECREF(args_slice);
   return result;
 }
 

diff --git a/torch/lib/build_all.sh b/torch/lib/build_all.sh
@@ -52,4 +52,5 @@ build libshm
 cp $INSTALL_DIR/lib/* .
 cp THNN/generic/THNN.h .
 cp THCUNN/THCUNN.h .
+cp -r tmp_install/include .
 cp $INSTALL_DIR/bin/* .
diff --git a/torch/utils/ffi/__init__.py b/torch/utils/ffi/__init__.py
@@ -1,10 +1,143 @@
-from functools import wraps
+import os
+import glob
+import tempfile
+import shutil
+from functools import wraps, reduce
+from string import Template
 import torch
+import torch.cuda
 
-def catch_exceptions(function):
+try:
+    import cffi
+except ImportError:
+    raise ImportError("torch.utils.ffi requires the cffi package")
+
+
+def _generate_typedefs():
+    typedefs = []
+    for t in ['Double', 'Float', 'Long', 'Int', 'Short', 'Char', 'Byte']:
+        for lib in ['TH', 'THCuda']:
+            for kind in ['Tensor', 'Storage']:
+                python_name = t + kind
+                if t == 'Float' and lib == 'THCuda':
+                    th_name = 'THCuda' + kind
+                else:
+                    th_name = lib + t + kind
+                th_struct = 'struct ' + th_name
+
+                typedefs += ['typedef {} {};'.format(th_struct, th_name)]
+                module = torch if lib == 'TH' else torch.cuda
+                python_class = getattr(module, python_name)
+                _cffi_to_torch[th_struct] = python_class
+                _torch_to_cffi[python_class] = th_struct
+    return '\n'.join(typedefs) + '\n'
+_cffi_to_torch = {}
+_torch_to_cffi = {}
+_typedefs = _generate_typedefs()
+
+
+PY_MODULE_TEMPLATE = Template("""
+from torch.utils.ffi import _wrap_function
+from .$cffi_wrapper_name import lib as _lib, ffi as _ffi
+
+__all__ = []
+def _import_symbols(locals):
+    for symbol in dir(_lib):
+        fn = getattr(_lib, symbol)
+        locals[symbol] = _wrap_function(fn, _ffi)
+        __all__.append(symbol)
+
+_import_symbols(locals())
+""")
+
+
+def _setup_wrapper(with_cuda):
+    here = os.path.abspath(os.path.dirname(__file__))
+    lib_dir = os.path.join(here, '..', '..', 'lib')
+    include_dirs = [
+        os.path.join(lib_dir, 'include'),
+        os.path.join(lib_dir, 'include', 'TH'),
+    ]
+
+    wrapper_source = '#include <TH/TH.h>\n'
+    if with_cuda:
+        import torch.cuda
+        wrapper_source += '#include <THC/THC.h>\n'
+        cuda_include_dirs = glob.glob('/usr/local/cuda/include')
+        cuda_include_dirs += glob.glob('/Developer/NVIDIA/CUDA-*/include')
+        include_dirs.append(os.path.join(lib_dir, 'include', 'THC'))
+        include_dirs.extend(cuda_include_dirs)
+    return wrapper_source, include_dirs
+
+
+def _create_module_dir(fullname):
+    module, _, name = fullname.rpartition('.')
+    if not module:
+        target_dir = name
+    else:
+        target_dir = reduce(lambda path, segment: os.path.join(path, segment),
+                            fullname.split('.'))
+    try:
+        os.makedirs(target_dir)
+    except FileExistsError:
+        pass
+    return name, target_dir
+
+
+def _build_extension(ffi, cffi_wrapper_name, target_dir, verbose):
+    try:
+        tmpdir = tempfile.mkdtemp()
+        libname = cffi_wrapper_name + '.so'
+        ffi.compile(tmpdir=tmpdir, verbose=verbose, target=libname)
+        shutil.copy(os.path.join(tmpdir, libname),
+                    os.path.join(target_dir, libname))
+    finally:
+        shutil.rmtree(tmpdir)
+
+
+def _make_python_wrapper(name, cffi_wrapper_name, target_dir):
+    py_source = PY_MODULE_TEMPLATE.substitute(name=name,
+            cffi_wrapper_name=cffi_wrapper_name)
+    with open(os.path.join(target_dir, '__init__.py'), 'w') as f:
+        f.write(py_source)
+
+
+def compile_extension(name, header, sources=[], verbose=True, with_cuda=False,
+        **kwargs):
+    name, target_dir = _create_module_dir(name)
+    cffi_wrapper_name = '_' + name
+
+    wrapper_source, include_dirs = _setup_wrapper(with_cuda)
+    include_dirs.extend(kwargs.pop('include_dirs', []))
+    with open(header, 'r') as f:
+        header_source = f.read()
+
+    ffi = cffi.FFI()
+    sources = [os.path.abspath(src) for src in sources]
+    ffi.set_source(cffi_wrapper_name, wrapper_source + header_source,
+            sources=sources,
+            include_dirs=include_dirs, **kwargs)
+    ffi.cdef(_typedefs + header_source);
+    _build_extension(ffi, cffi_wrapper_name, target_dir, verbose)
+    _make_python_wrapper(name, cffi_wrapper_name, target_dir)
+
+
+def _wrap_function(function, ffi):
     @wraps(function)
     def safe_call(*args, **kwargs):
+        args = tuple(ffi.cast(_torch_to_cffi.get(type(arg), 'void') + '*', arg._cdata)
+                if torch.is_tensor(arg) or torch.is_storage(arg)
+                else arg
+                for arg in args)
         args = (function,) + args
-        torch._C._safe_call(*args, **kwargs)
+        result = torch._C._safe_call(*args, **kwargs)
+        if isinstance(result, ffi.CData):
+            typeof = ffi.typeof(result)
+            if typeof.kind == 'pointer':
+                cdata = int(ffi.cast('uintptr_t', result))
+                cname = typeof.item.cname
+                if cname in _cffi_to_torch:
+                    return _cffi_to_torch[cname](cdata=cdata)
+        return result
     return safe_call