Merge commit '8262920b72374b1d9643f35057663ab02ab20330' into use_omp1

* commit '8262920b72374b1d9643f35057663ab02ab20330': (272 commits) Add ATen overload to AutoGPU. (pytorch#2234) Add comments for default value (pytorch#2242) Remove dead THPP code that has been replaced with ATen objects. (pytorch#2235) fix a bug where an uninitialized at::Tensor was passed to createPyObject (pytorch#2239) Replace thpp::Tensor with ATen Tensor in autograd csrc (pytorch#2170) Added aarch64 support (pytorch#2226) Increase tol. for float tensor qr big test. Improve Variable.retain_grad add `retain_grad` method, to variable, so gradient gets stored during backpop, on non-user variables Implement BatchNorm double backwards (pytorch#2207) [bugfix] in bce_with_logits logsumexp calculation (pytorch#2221) fix for ATen API Change Opt into Trusty builds. (pytorch#2214) allow retain to be specified for unsafeTensorFromTH Deduplicate THPUtils_checkLong/THPUtils_unpackLong (pytorch#2218) fix osx build errors related to long/int64_t Note [Undefined-dim versus 0-dim] Remove __func__ hack in auto nn. Enable Conv groups gradgradchecks. (pytorch#2216) fix a bug where some scalars were getting truncated to integers incorrectly. ...
ruotianluo · Aug 1, 2017 · 8e92db3 · 8e92db3
2 parents a7c56a9 + 8262920
commit 8e92db3
Show file tree

Hide file tree

Showing 276 changed files with 19,099 additions and 2,163 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,7 +1,8 @@
 # https://travis-ci.org/pytorch/pytorch
 language: python
+dist: trusty
 python:
-    - 2.7.8
+    - 2.7.9
     - 2.7
     - 3.5
     - 3.6
@@ -15,23 +16,22 @@ cache:
 install:
     - unset CCACHE_DISABLE
     - export CCACHE_DIR=$HOME/.ccache
-    - export CC="ccache gcc-4.8"
-    - export CXX="ccache g++-4.8"
+    - export CC="ccache gcc-5"
+    - export CXX="ccache g++-5"
     - ccache --show-stats
     - travis_retry pip install --upgrade pip setuptools wheel
     - travis_retry pip install -r requirements.txt --only-binary=scipy
-    - python setup.py install
-
-script:
-    - OMP_NUM_THREADS=2 ./test/run_test.sh
+    - MAX_JOBS=8 python setup.py install
 
 addons:
     apt:
         sources:
             - ubuntu-toolchain-r-test
         packages:
-            - gcc-4.8
-            - g++-4.8
+            - g++-5
+
+script:
+    - OMP_NUM_THREADS=2 ./test/run_test.sh
 
 # This reportedly works around an issue downloading packages from pypi on
 # travis.  Consider removing this after the underlying issue is fixed.

diff --git a/README.md b/README.md
@@ -167,7 +167,7 @@ If you want to disable CUDA support, export environment variable `NO_CUDA=1`.
 
 On Linux
 ```bash
-export CMAKE_PREFIX_PATH=[anaconda root directory]
+export CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" # [anaconda root directory]
 
 # Install basic dependencies
 conda install numpy pyyaml mkl setuptools cmake gcc cffi

diff --git a/docs/source/distributed.rst b/docs/source/distributed.rst
@@ -7,6 +7,35 @@ Distributed communication package - torch.distributed
 .. automodule:: torch.distributed
 .. currentmodule:: torch.distributed
 
+Currently torch.distributed supports three backends, each with
+different capabilities. The table below shows which functions are available
+for use with CPU / CUDA tensors.
+MPI supports cuda only iff the implementation used to build PyTorch supports it.
+
++------------+-----------+-----------+-----------+
+| Backend    | ``tcp``   | ``gloo``  | ``mpi``   |
++------------+-----+-----+-----+-----+-----+-----+
+| Device     | CPU | GPU | CPU | GPU | CPU | GPU |
++============+=====+=====+=====+=====+=====+=====+
+| send       | ✓   | ✘   | ✘   | ✘   | ✓   | ?   |
++------------+-----+-----+-----+-----+-----+-----+
+| recv       | ✓   | ✘   | ✘   | ✘   | ✓   | ?   |
++------------+-----+-----+-----+-----+-----+-----+
+| broadcast  | ✓   | ✘   | ✓   | ✓   | ✓   | ?   |
++------------+-----+-----+-----+-----+-----+-----+
+| all_reduce | ✓   | ✘   | ✓   | ✓   | ✓   | ?   |
++------------+-----+-----+-----+-----+-----+-----+
+| reduce     | ✓   | ✘   | ✘   | ✘   | ✓   | ?   |
++------------+-----+-----+-----+-----+-----+-----+
+| all_gather | ✓   | ✘   | ✘   | ✘   | ✓   | ?   |
++------------+-----+-----+-----+-----+-----+-----+
+| gather     | ✓   | ✘   | ✘   | ✘   | ✓   | ?   |
++------------+-----+-----+-----+-----+-----+-----+
+| scatter    | ✓   | ✘   | ✘   | ✘   | ✓   | ?   |
++------------+-----+-----+-----+-----+-----+-----+
+| barrier    | ✓   | ✘   | ✓   | ✓   | ✓   | ?   |
++------------+-----+-----+-----+-----+-----+-----+
+
 Initialization
 --------------
 
@@ -28,10 +57,10 @@ TCP initialization
 
 Initialization will utilize a network address reachable from all processes.
 If the address belongs to one of the machines, initialization requires that all processes
-have manually specified ranks. 
+have manually specified ranks.
 
 Alternatively, the address has to be a valid IP multicast address, in which case,
-ranks can be assigned automatically. Multicast initialization also supports 
+ranks can be assigned automatically. Multicast initialization also supports
 a ``group_name`` argument, which allows you to use the same address for multiple jobs,
 as long as they use different group names.
 
@@ -80,7 +109,7 @@ are:
 * ``WORLD_SIZE`` - required; can be set either here, or in a call to init function
 * ``RANK`` - required; can be set either here, or in a call to init function
 
-The machine with rank 0 will be used to set up all connections. 
+The machine with rank 0 will be used to set up all connections.
 
 This is the default method, meaning that ``init_method`` does not have to be specified (or
 can be ``env://``).
@@ -93,7 +122,7 @@ require all processes to enter the distributed function call. However, some work
 from more fine-grained communication. This is where distributed groups come
 into play. :func:`~torch.distributed.new_group` function can be
 used to create new groups, with arbitrary subsets of all processes. It returns
-an opaque group handle that can be given as a ``group`` argument to all collectives 
+an opaque group handle that can be given as a ``group`` argument to all collectives
 (collectives are distributed functions to exchange information in certain well-known programming patterns).
 
 .. autofunction:: new_group

diff --git a/docs/source/nn.rst b/docs/source/nn.rst
@@ -913,6 +913,16 @@ Dropout functions
 
 .. autofunction:: alpha_dropout
 
+:hidden:`dropout2d`
+~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: dropout2d
+
+:hidden:`dropout3d`
+~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: dropout3d
+
 Distance functions
 ----------------------------------
 
@@ -930,30 +940,70 @@ Distance functions
 Loss functions
 --------------
 
-:hidden:`nll_loss`
-~~~~~~~~~~~~~~~~~~
+:hidden:`binary_cross_entropy`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. autofunction:: nll_loss
+.. autofunction:: binary_cross_entropy
 
 :hidden:`poisson_nll_loss`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autofunction:: poisson_nll_loss
 
-:hidden:`kl_div`
-~~~~~~~~~~~~~~~~
+:hidden:`cosine_embedding_loss`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. autofunction:: kl_div
+.. autofunction:: cosine_embedding_loss
 
 :hidden:`cross_entropy`
 ~~~~~~~~~~~~~~~~~~~~~~~
 
 .. autofunction:: cross_entropy
 
-:hidden:`binary_cross_entropy`
+:hidden:`hinge_embedding_loss`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. autofunction:: binary_cross_entropy
+.. autofunction:: hinge_embedding_loss
+
+:hidden:`kl_div`
+~~~~~~~~~~~~~~~~
+
+.. autofunction:: kl_div
+
+:hidden:`l1_loss`
+~~~~~~~~~~~~~~~~~
+
+.. autofunction:: l1_loss
+
+:hidden:`mse_loss`
+~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: mse_loss
+
+:hidden:`margin_ranking_loss`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: margin_ranking_loss
+
+:hidden:`multilabel_margin_loss`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: multilabel_margin_loss
+
+:hidden:`multilabel_soft_margin_loss`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: multilabel_soft_margin_loss
+
+:hidden:`multi_margin_loss`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: multi_margin_loss
+
+:hidden:`nll_loss`
+~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: nll_loss
 
 :hidden:`binary_cross_entropy_with_logits`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -965,6 +1015,11 @@ Loss functions
 
 .. autofunction:: smooth_l1_loss
 
+:hidden:`soft_margin_loss`
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: soft_margin_loss
+
 :hidden:`triplet_margin_loss`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/docs/source/notes/autograd.rst b/docs/source/notes/autograd.rst
@@ -86,7 +86,7 @@ no intermediate states are saved.
     False
     >>> model(volatile_input).volatile
     True
-    >>> model(volatile_input).creator is None
+    >>> model(volatile_input).grad_fn is None
     True
 
 How autograd encodes the history

diff --git a/docs/source/torchvision/models.rst b/docs/source/torchvision/models.rst
@@ -7,5 +7,6 @@ torchvision.models
 .. automodule:: torchvision.models
    :members: alexnet, resnet18, resnet34, resnet50, resnet101, resnet152,
              vgg11, vgg11_bn, vgg13, vgg13_bn, vgg16, vgg16_bn, vgg19,
-             vgg19_bn
+             vgg19_bn, inception_v3, squeezenet1_0, squeezenet1_1, densenet121, 
+             densenet169, densenet201, densenet161
    :undoc-members:
diff --git a/setup.py b/setup.py
@@ -54,6 +54,9 @@ def _single_compile(obj):
         src, ext = build[obj]
         self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
     num_jobs = multiprocessing.cpu_count()
+    max_jobs = os.getenv("MAX_JOBS")
+    if max_jobs is not None:
+        num_jobs = min(num_jobs, int(max_jobs))
     multiprocessing.pool.ThreadPool(num_jobs).map(_single_compile, objects)
 
     return objects
@@ -231,8 +234,14 @@ def run(self):
                       '-fno-strict-aliasing']
 if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux':
     print('PYTORCH_BINARY_BUILD found. Static linking libstdc++ on Linux')
-    extra_compile_args += ['-static-libstdc++']
-    extra_link_args += ['-static-libstdc++']
+    # get path of libstdc++ and link manually.
+    # for reasons unknown, -static-libstdc++ doesn't fully link some symbols
+    CXXNAME = os.getenv('CXX', 'g++')
+    path = subprocess.check_output([CXXNAME, '-print-file-name=libstdc++.a'])
+    path = path[:-1]
+    if type(path) != str:  # python 3
+        path = path.decode(sys.stdout.encoding)
+    extra_link_args += [path]
 
 cwd = os.path.dirname(os.path.abspath(__file__))
 lib_path = os.path.join(cwd, "torch", "lib")
@@ -311,7 +320,6 @@ def run(self):
     "torch/csrc/autograd/functions/accumulate_grad.cpp",
     "torch/csrc/autograd/functions/utils.cpp",
     "torch/csrc/autograd/functions/init.cpp",
-    "torch/csrc/nn/THNN_generic.cpp",
 ]
 main_sources += split_types("torch/csrc/Tensor.cpp")
 
@@ -449,7 +457,7 @@ def make_relative_rpath(path):
                        )
     extensions.append(THCUNN)
 
-version = '0.1.12'
+version = '0.2.0'
 if os.getenv('PYTORCH_BUILD_VERSION'):
     assert os.getenv('PYTORCH_BUILD_NUMBER') is not None
     version = os.getenv('PYTORCH_BUILD_VERSION') \
@@ -482,5 +490,5 @@ def make_relative_rpath(path):
           'lib/*.h',
           'lib/include/TH/*.h', 'lib/include/TH/generic/*.h',
           'lib/include/THC/*.h', 'lib/include/THC/generic/*.h']},
-      install_requires=['pyyaml'],
+      install_requires=['pyyaml', 'numpy'],
       )