update Sphinx (#69)

* update Sphinx * docstring updates
svenkreiss · Jul 30, 2017 · c9c524f · c9c524f
1 parent e8ed676
commit c9c524f
Show file tree

Hide file tree

Showing 6 changed files with 36 additions and 32 deletions.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -1,2 +1,2 @@
-Sphinx>=1.4.5
+Sphinx==1.6.3
 sphinx_rtd_theme
diff --git a/docs/sphinx/api.rst b/docs/sphinx/api.rst
@@ -5,7 +5,7 @@ API
 
 .. currentmodule:: pysparkling
 
-A usual ``pysparkling`` session starts with either parallelizing a ``list``
+A usual ``pysparkling`` session starts with either parallelizing a `list`
 with :func:`Context.parallelize` or by reading data from a file using
 :func:`Context.textFile`. These two methods return :class:`RDD` instances that
 can then be processed.

diff --git a/docs/sphinx/api_context.rst b/docs/sphinx/api_context.rst
@@ -5,14 +5,14 @@
 Context
 -------
 
-A :class:`Context` describes the setup. Instantiating a Context with the default
+A :class:`~pysparkling.Context` describes the setup. Instantiating a Context with the default
 arguments using ``Context()`` is the most lightweight setup. All data is just
 in the local thread and is never serialized or deserialized.
 
-If you want to process the data in parallel, you can use the ``multiprocessing``
-module. Given the limitations of the default ``pickle`` serializer, you can
-specify to serialize all methods with ``cloudpickle`` instead. For example,
-a common instantiation with ``multiprocessing`` looks like this:
+If you want to process the data in parallel, you can use the `multiprocessing`
+module. Given the limitations of the default `pickle` serializer, you can
+specify to serialize all methods with `cloudpickle` instead. For example,
+a common instantiation with `multiprocessing` looks like this:
 
 .. code-block:: python
 
@@ -22,7 +22,7 @@ a common instantiation with ``multiprocessing`` looks like this:
       deserializer=pickle.loads,
   )
 
-This assumes that your data is serializable with ``pickle`` which is generally
+This assumes that your data is serializable with `pickle` which is generally
 faster. You can also specify a custom serializer/deserializer for data.
 
 .. autoclass:: pysparkling.Context

diff --git a/docs/sphinx/conf.py b/docs/sphinx/conf.py
@@ -226,4 +226,6 @@
 
 
 # Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {'https://docs.python.org/': None}
+intersphinx_mapping = {
+    'https://docs.python.org/': None,
+}
diff --git a/pysparkling/context.py b/pysparkling/context.py
@@ -102,10 +102,9 @@ class Context(object):
     :param pool: An instance with a ``map(func, iterable)`` method.
     :param serializer:
         Serializer for functions. Examples are `pickle.dumps` and
-        ``dill.dumps``.
+        `cloudpickle.dumps`.
     :param deserializer:
-        Deserializer for functions. Examples are `pickle.loads` and
-        ``dill.loads``.
+        Deserializer for functions. For example `pickle.loads`.
     :param data_serializer: Serializer for the data.
     :param data_deserializer: Deserializer for the data.
     :param int max_retries: maximum number a partition is retried

diff --git a/pysparkling/rdd.py b/pysparkling/rdd.py
@@ -86,8 +86,8 @@ def aggregate(self, zeroValue, seqOp, combOp):
 
         :param zeroValue:
             The initial value to an aggregation, for example ``0`` or ``0.0``
-            for aggregating ``int`` s and ``float`` s, but any Python object is
-            possible. Can be ``None``.
+            for aggregating `int` s and `float` s, but any Python object is
+            possible.
 
         :param seqOp:
             A reference to a function that combines the current state with a
@@ -126,8 +126,8 @@ def aggregateByKey(self, zeroValue, seqFunc, combFunc, numPartitions=None):
 
         :param zeroValue:
             The initial value to an aggregation, for example ``0`` or ``0.0``
-            for aggregating ``int`` s and ``float`` s, but any Python object is
-            possible. Can be ``None``.
+            for aggregating `int` s and `float` s, but any Python object is
+            possible.
 
         :param seqFunc:
             A reference to a function that combines the current state with a
@@ -137,8 +137,7 @@ def aggregateByKey(self, zeroValue, seqFunc, combFunc, numPartitions=None):
             A reference to a function that combines outputs of seqFunc.
             In the first iteration, the current state is zeroValue.
 
-        :param int numPartitions: (optional)
-            Not used.
+        :param int numPartitions: Not used.
 
         :returns: An RDD with the output of ``combOp`` operations.
         :rtype: RDD
@@ -176,7 +175,7 @@ def combFuncByKey(l):
     def cache(self):
         """Once a partition is computed, cache the result.
 
-        Alias for :func:`RDD.persist`.
+        Alias for :func:`~pysparkling.RDD.persist`.
 
 
         Example:
@@ -334,14 +333,14 @@ def count(self):
                                    resultHandler=sum)
 
     def countApprox(self):
-        """same as :func:`RDD.count()`
+        """same as :func:`~pysparkling.RDD.count()`
 
         :rtype: int
         """
         return self.count()
 
     def countByKey(self):
-        """returns a ``dict`` containing the count for every key
+        """returns a `dict` containing the count for every key
 
         :rtype: dict
 
@@ -357,7 +356,7 @@ def countByKey(self):
         return self.map(lambda r: r[0]).countByValue()
 
     def countByValue(self):
-        """returns a ``dict`` containing the count for every value
+        """returns a `dict` containing the count for every value
 
         :rtype: dict
 
@@ -521,7 +520,7 @@ def foldByKey(self, zeroValue, op):
     def foreach(self, f):
         """applies ``f`` to every element
 
-        It does not return a new RDD like :func:`RDD.map()`.
+        It does not return a new RDD like :func:`~pysparkling.RDD.map`.
 
         :param f: Apply a function to every element.
         :rtype: None
@@ -542,7 +541,8 @@ def foreach(self, f):
     def foreachPartition(self, f):
         """applies ``f`` to every partition
 
-        It does not return a new RDD like :func:`RDD.mapPartitions()`.
+        It does not return a new RDD like
+        :func:`~pysparkling.RDD.mapPartitions`.
 
         :param f: Apply a function to every partition.
         :rtype: None
@@ -554,7 +554,7 @@ def fullOuterJoin(self, other, numPartitions=None):
         """returns the full outer join of two RDDs
 
         The output contains all keys from both input RDDs, with missing
-        keys replaced with None.
+        keys replaced with `None`.
 
         :param RDD other: The RDD to join to this one.
         :param int numPartitions: Number of partitions in the resulting RDD.
@@ -592,7 +592,10 @@ def getNumPartitions(self):
         return len(self.partitions())
 
     def getPartitions(self):
-        """returns the partitions of this RDD"""
+        """returns the partitions of this RDD
+
+        :rtype: list
+        """
         return self.partitions()
 
     def groupBy(self, f, numPartitions=None):
@@ -1038,7 +1041,7 @@ def reduceByKey(self, f):
         :rtype: RDD
 
         .. note::
-            This operation includes a :func:`pysparkling.RDD.groupByKey()`
+            This operation includes a :func:`~pysparkling.RDD.groupByKey()`
             which is a local operation.
 
 
@@ -1070,7 +1073,7 @@ def repartitionAndSortWithinPartitions(
 
         :param int numPartitions: Number of partitions in new RDD.
         :param partitionFunc: function that partitions
-        :param ascending: Default is True.
+        :param ascending: Sort order.
         :param keyfunc: Returns the value that will be sorted.
         :rtype: RDD
 
@@ -1330,9 +1333,9 @@ def sortBy(self, keyfunc, ascending=True, numPartitions=None):
         """sort by keyfunc
 
         :param keyfunc: Returns the value that will be sorted.
-        :param ascending: Default is True.
+        :param ascending: Specify sort order.
         :param int numPartitions:
-            Default is None. None means the output will have the same number of
+            `None` means the output will have the same number of
             partitions as the input.
         :rtype: RDD
 
@@ -1365,8 +1368,8 @@ def sortByKey(self, ascending=True, numPartitions=None,
                   keyfunc=itemgetter(0)):
         """sort by key
 
-        :param ascending: Default is True.
-        :param int numPartitions: Default is None. None means the output will
+        :param ascending: Sort order.
+        :param int numPartitions: `None` means the output will
             have the same number of partitions as the input.
         :param keyfunc: Returns the value that will be sorted.
         :rtype: RDD