pandas-dev · datapythonista · Nov 9, 2018 · Oct 25, 2018 · Oct 26, 2018 · Oct 26, 2018
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -44,6 +44,17 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    echo "flake8-rst --version"
+    flake8-rst --version
+
+    MSG='Linting code-blocks in .py docstrings' ; echo $MSG
+    flake8-rst pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Linting code-blocks in .rst documentation' ; echo $MSG
+    flake8-rst doc --filename=*.rst
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     # readability/casting: Warnings about C casting instead of C++ casting
     # runtime/int: Warnings about using C number types instead of C++ ones
     # build/include_subdir: Warnings about prefacing included header files with directory
@@ -56,6 +67,9 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    echo "isort --version-number"
+    isort --version-number
+
     # Imports - Check formatting using isort see setup.cfg for settings
     MSG='Check import format using isort ' ; echo $MSG
     isort --recursive --check-only pandas

diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml
@@ -7,6 +7,7 @@ dependencies:
   - NumPy
   - flake8
   - flake8-comprehensions
+  - flake8-rst
   - hypothesis>=3.58.0
   - isort
   - moto

diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt
@@ -4,6 +4,7 @@ Cython>=0.28.2
 NumPy
 flake8
 flake8-comprehensions
+flake8-rst
 hypothesis>=3.58.0
 isort
 moto

diff --git a/ci/travis-36.yaml b/ci/travis-36.yaml
@@ -10,6 +10,7 @@ dependencies:
   - feather-format
   - flake8>=3.5
   - flake8-comprehensions
+  - flake8-rst
   - gcsfs
   - geopandas
   - html5lib

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
@@ -45,7 +45,7 @@ a default integer index:
 
 .. ipython:: python
 
-   s = pd.Series([1,3,5,np.nan,6,8])
+   s = pd.Series([1, 3, 5, np.nan, 6, 8])
    s
 
 Creating a :class:`DataFrame` by passing a NumPy array, with a datetime index
@@ -62,12 +62,12 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s
 
 .. ipython:: python
 
-   df2 = pd.DataFrame({ 'A' : 1.,
-                        'B' : pd.Timestamp('20130102'),
-                        'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
-                        'D' : np.array([3] * 4,dtype='int32'),
-                        'E' : pd.Categorical(["test","train","test","train"]),
-                        'F' : 'foo' })
+   df2 = pd.DataFrame({'A': 1.,
+                       'B': pd.Timestamp('20130102'),
+                       'C': pd.Series(1, index=list(range(4)),dtype='float32'),
+                       'D': np.array([3] * 4, dtype='int32'),
+                       'E': pd.Categorical(["test", "train", "test", "train"]),
+                       'F': 'foo'})
    df2
 
 The columns of the resulting ``DataFrame`` have different 
@@ -283,9 +283,9 @@ Using the :func:`~Series.isin` method for filtering:
 .. ipython:: python
 
    df2 = df.copy()
-   df2['E'] = ['one', 'one','two','three','four','three']
+   df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
    df2
-   df2[df2['E'].isin(['two','four'])]
+   df2[df2['E'].isin(['two', 'four'])]
 
 Setting
 ~~~~~~~
@@ -295,7 +295,7 @@ by the indexes.
 
 .. ipython:: python
 
-   s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))
+   s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
    s1
    df['F'] = s1
 
@@ -394,7 +394,7 @@ In addition, pandas automatically broadcasts along the specified dimension.
 
 .. ipython:: python
 
-   s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2)
+   s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
    s
    df.sub(s, axis='index')
 
@@ -492,7 +492,7 @@ section.
 
 .. ipython:: python
 
-   df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])
+   df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D'])
    df
    s = df.iloc[3]
    df.append(s, ignore_index=True)
@@ -512,12 +512,12 @@ See the :ref:`Grouping section <groupby>`.
 
 .. ipython:: python
 
-   df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
-                             'foo', 'bar', 'foo', 'foo'],
-                      'B' : ['one', 'one', 'two', 'three',
-                             'two', 'two', 'one', 'three'],
-                      'C' : np.random.randn(8),
-                      'D' : np.random.randn(8)})
+   df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                            'foo', 'bar', 'foo', 'foo'],
+                      'B': ['one', 'one', 'two', 'three',
+                            'two', 'two', 'one', 'three'],
+                      'C': np.random.randn(8),
+                      'D': np.random.randn(8)})
    df
 
 Grouping and then applying the :meth:`~DataFrame.sum` function to the resulting 
@@ -532,7 +532,7 @@ apply the ``sum`` function.
 
 .. ipython:: python
 
-   df.groupby(['A','B']).sum()
+   df.groupby(['A', 'B']).sum()
 
 Reshaping
 ---------
@@ -578,11 +578,11 @@ See the section on :ref:`Pivot Tables <reshaping.pivot>`.
 
 .. ipython:: python
 
-   df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
-                      'B' : ['A', 'B', 'C'] * 4,
-                      'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
-                      'D' : np.random.randn(12),
-                      'E' : np.random.randn(12)})
+   df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3,
+                      'B': ['A', 'B', 'C'] * 4,
+                      'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
+                      'D': np.random.randn(12),
+                      'E': np.random.randn(12)})
    df
 
 We can produce pivot tables from this data very easily:
@@ -653,7 +653,7 @@ pandas can include categorical data in a ``DataFrame``. For full docs, see the
 
 .. ipython:: python
 
-    df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
+    df = pd.DataFrame({"id":[1, 2, 3, 4, 5, 6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
 
 Convert the raw grades to a categorical data type.
 
@@ -753,13 +753,13 @@ Writing to a HDF5 Store.
 
 .. ipython:: python
 
-   df.to_hdf('foo.h5','df')
+   df.to_hdf('foo.h5', 'df')
 
 Reading from a HDF5 Store.
 
 .. ipython:: python
 
-   pd.read_hdf('foo.h5','df')
+   pd.read_hdf('foo.h5', 'df')
 
 .. ipython:: python
    :suppress:
@@ -796,7 +796,7 @@ If you are attempting to perform an operation you might see an exception like:
 .. code-block:: python
 
     >>> if pd.Series([False, True, False]):
-        print("I was true")
+    ...     print("I was true")
     Traceback
         ...
     ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all().

diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
@@ -318,13 +318,13 @@ As usual, **both sides** of the slicers are included as this is label indexing.
 
    .. code-block:: python
 
-      df.loc[(slice('A1','A3'),.....), :]
+      df.loc[(slice('A1', 'A3'), ...), :]             # noqa: E999
 
    You should **not** do this:
 
    .. code-block:: python
 
-      df.loc[(slice('A1','A3'),.....)]
+      df.loc[(slice('A1', 'A3'), ...)]                # noqa: E999
 
 .. ipython:: python
 
@@ -532,7 +532,7 @@ used to move the values from the ``MultiIndex`` to a column.
 .. ipython:: python
 
    df.rename_axis(index=['abc', 'def'])
-   
+
 Note that the columns of a ``DataFrame`` are an index, so that using
 ``rename_axis`` with the ``columns`` argument will change the name of that
 index.
@@ -779,7 +779,7 @@ values **not** in the categories, similarly to how you can reindex **any** panda
    Reshaping and Comparison operations on a ``CategoricalIndex`` must have the same categories
    or a ``TypeError`` will be raised.
 
-   .. code-block:: python
+   .. code-block:: ipython
 
       In [9]: df3 = pd.DataFrame({'A' : np.arange(6),
                                   'B' : pd.Series(list('aabbca')).astype('category')})
@@ -1071,7 +1071,7 @@ On the other hand, if the index is not monotonic, then both slice bounds must be
     # OK because 2 and 4 are in the index
     df.loc[2:4, :]
 
-.. code-block:: python
+.. code-block:: ipython
 
     # 0 is not in the index
     In [9]: df.loc[0:4, :]

diff --git a/doc/source/basics.rst b/doc/source/basics.rst
@@ -306,18 +306,18 @@ To evaluate single-element pandas objects in a boolean context, use the method
 
    .. code-block:: python
 
-       >>> if df:
-            ...
+       >>> if df:                         # noqa: E999
+               ...
 
    Or
 
    .. code-block:: python
 
-       >>> df and df2
+       >>> df and df2                     # noqa: E999
 
    These will both raise errors, as you are trying to compare multiple values.
 
-   .. code-block:: python
+   .. code-block:: console
 
        ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all().
 
@@ -732,9 +732,8 @@ with the equivalent
 .. code-block:: python
 
    >>> (df.pipe(h)
-          .pipe(g, arg1=1)
-          .pipe(f, arg2=2, arg3=3)
-       )
+   ...    .pipe(g, arg1=1)
+   ...    .pipe(f, arg2=2, arg3=3))
 
 Pandas encourages the second style, which is known as method chaining.
 ``pipe`` makes it easy to use your own or another library's functions

diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst
@@ -744,7 +744,7 @@ XPORT is a relatively limited format and the parsing of it is not as
 optimized as some of the other pandas readers. An alternative way
 to interop data between SAS and pandas is to serialize to csv.
 
-.. code-block:: python
+.. code-block:: ipython
 
    # version 0.17, 10M rows
 

diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
@@ -744,15 +744,15 @@ Transitioning to ``pytest``
 .. code-block:: python
 
     class TestReallyCoolFeature(object):
-        ....
+        ....                                  # noqa: E999
 
 Going forward, we are moving to a more *functional* style using the `pytest <http://docs.pytest.org/en/latest/>`__ framework, which offers a richer testing
 framework that will facilitate testing and developing. Thus, instead of writing test classes, we will write test functions like this:
 
 .. code-block:: python
 
     def test_really_cool_feature():
-        ....
+        ....                                  # noqa: E999
 
 Using ``pytest``
 ~~~~~~~~~~~~~~~~
@@ -777,25 +777,30 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place
    import pandas as pd
    from pandas.util import testing as tm
 
+
    @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64'])
    def test_dtypes(dtype):
        assert str(np.dtype(dtype)) == dtype
 
-   @pytest.mark.parametrize('dtype', ['float32',
-       pytest.param('int16', marks=pytest.mark.skip),
-       pytest.param('int32',
-                    marks=pytest.mark.xfail(reason='to show how it works'))])
+
+   @pytest.mark.parametrize(
+       'dtype', ['float32', pytest.param('int16', marks=pytest.mark.skip),
+                 pytest.param('int32', marks=pytest.mark.xfail(
+                     reason='to show how it works'))])
    def test_mark(dtype):
        assert str(np.dtype(dtype)) == 'float32'
 
+
    @pytest.fixture
    def series():
        return pd.Series([1, 2, 3])
 
+
    @pytest.fixture(params=['int8', 'int16', 'int32', 'int64'])
    def dtype(request):
        return request.param
 
+
    def test_series(series, dtype):
        result = series.astype(dtype)
        assert result.dtype == dtype
@@ -864,6 +869,7 @@ for details <https://hypothesis.readthedocs.io/en/latest/index.html>`_.
         st.lists(any_json_value), st.dictionaries(st.text(), any_json_value)
     ))
 
+
     @given(value=any_json_value)
     def test_json_roundtrip(value):
         result = json.loads(json.dumps(value))