pandas-dev · datapythonista · Nov 9, 2018 · Oct 25, 2018 · Oct 26, 2018 · Oct 26, 2018
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -44,6 +44,17 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     flake8 pandas/_libs --filename=*.pxi.in,*.pxd --select=E501,E302,E203,E111,E114,E221,E303,E231,E126,F403
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    echo "flake8-rst --version"
+    flake8-rst --version
+
+    MSG='Linting code-blocks in .py docstrings' ; echo $MSG
+    flake8-rst pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
+    MSG='Linting code-blocks in .rst documentation' ; echo $MSG
+    flake8-rst doc --filename=*.rst
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     # readability/casting: Warnings about C casting instead of C++ casting
     # runtime/int: Warnings about using C number types instead of C++ ones
     # build/include_subdir: Warnings about prefacing included header files with directory
@@ -56,6 +67,9 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    echo "isort --version-number"
+    isort --version-number
+
     # Imports - Check formatting using isort see setup.cfg for settings
     MSG='Check import format using isort ' ; echo $MSG
     isort --recursive --check-only pandas

diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml
@@ -7,6 +7,7 @@ dependencies:
   - NumPy
   - flake8
   - flake8-comprehensions
+  - flake8-rst
   - hypothesis>=3.58.0
   - isort
   - moto

diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt
@@ -4,6 +4,7 @@ Cython>=0.28.2
 NumPy
 flake8
 flake8-comprehensions
+flake8-rst
 hypothesis>=3.58.0
 isort
 moto

diff --git a/ci/travis-36.yaml b/ci/travis-36.yaml
@@ -10,6 +10,7 @@ dependencies:
   - feather-format
   - flake8>=3.5
   - flake8-comprehensions
+  - flake8-rst
   - gcsfs
   - geopandas
   - html5lib

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
@@ -45,7 +45,7 @@ a default integer index:
 
 .. ipython:: python
 
-   s = pd.Series([1,3,5,np.nan,6,8])
+   s = pd.Series([1, 3, 5, np.nan, 6, 8])
    s
 
 Creating a :class:`DataFrame` by passing a NumPy array, with a datetime index
@@ -62,12 +62,12 @@ Creating a ``DataFrame`` by passing a dict of objects that can be converted to s
 
 .. ipython:: python
 
-   df2 = pd.DataFrame({ 'A' : 1.,
-                        'B' : pd.Timestamp('20130102'),
-                        'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
-                        'D' : np.array([3] * 4,dtype='int32'),
-                        'E' : pd.Categorical(["test","train","test","train"]),
-                        'F' : 'foo' })
+   df2 = pd.DataFrame({'A': 1.,
+                       'B': pd.Timestamp('20130102'),
+                       'C': pd.Series(1, index=list(range(4)),dtype='float32'),
+                       'D': np.array([3] * 4, dtype='int32'),
+                       'E': pd.Categorical(["test", "train", "test", "train"]),
+                       'F': 'foo'})
    df2
 
 The columns of the resulting ``DataFrame`` have different 
@@ -283,9 +283,9 @@ Using the :func:`~Series.isin` method for filtering:
 .. ipython:: python
 
    df2 = df.copy()
-   df2['E'] = ['one', 'one','two','three','four','three']
+   df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
    df2
-   df2[df2['E'].isin(['two','four'])]
+   df2[df2['E'].isin(['two', 'four'])]
 
 Setting
 ~~~~~~~
@@ -295,7 +295,7 @@ by the indexes.
 
 .. ipython:: python
 
-   s1 = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130102', periods=6))
+   s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20130102', periods=6))
    s1
    df['F'] = s1
 
@@ -394,7 +394,7 @@ In addition, pandas automatically broadcasts along the specified dimension.
 
 .. ipython:: python
 
-   s = pd.Series([1,3,5,np.nan,6,8], index=dates).shift(2)
+   s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)
    s
    df.sub(s, axis='index')
 
@@ -492,7 +492,7 @@ section.
 
 .. ipython:: python
 
-   df = pd.DataFrame(np.random.randn(8, 4), columns=['A','B','C','D'])
+   df = pd.DataFrame(np.random.randn(8, 4), columns=['A', 'B', 'C', 'D'])
    df
    s = df.iloc[3]
    df.append(s, ignore_index=True)
@@ -512,12 +512,12 @@ See the :ref:`Grouping section <groupby>`.
 
 .. ipython:: python
 
-   df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
-                             'foo', 'bar', 'foo', 'foo'],
-                      'B' : ['one', 'one', 'two', 'three',
-                             'two', 'two', 'one', 'three'],
-                      'C' : np.random.randn(8),
-                      'D' : np.random.randn(8)})
+   df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+                            'foo', 'bar', 'foo', 'foo'],
+                      'B': ['one', 'one', 'two', 'three',
+                            'two', 'two', 'one', 'three'],
+                      'C': np.random.randn(8),
+                      'D': np.random.randn(8)})
    df
 
 Grouping and then applying the :meth:`~DataFrame.sum` function to the resulting 
@@ -532,7 +532,7 @@ apply the ``sum`` function.
 
 .. ipython:: python
 
-   df.groupby(['A','B']).sum()
+   df.groupby(['A', 'B']).sum()
 
 Reshaping
 ---------
@@ -578,11 +578,11 @@ See the section on :ref:`Pivot Tables <reshaping.pivot>`.
 
 .. ipython:: python
 
-   df = pd.DataFrame({'A' : ['one', 'one', 'two', 'three'] * 3,
-                      'B' : ['A', 'B', 'C'] * 4,
-                      'C' : ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
-                      'D' : np.random.randn(12),
-                      'E' : np.random.randn(12)})
+   df = pd.DataFrame({'A': ['one', 'one', 'two', 'three'] * 3,
+                      'B': ['A', 'B', 'C'] * 4,
+                      'C': ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] * 2,
+                      'D': np.random.randn(12),
+                      'E': np.random.randn(12)})
    df
 
 We can produce pivot tables from this data very easily:
@@ -653,7 +653,7 @@ pandas can include categorical data in a ``DataFrame``. For full docs, see the
 
 .. ipython:: python
 
-    df = pd.DataFrame({"id":[1,2,3,4,5,6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
+    df = pd.DataFrame({"id":[1, 2, 3, 4, 5, 6], "raw_grade":['a', 'b', 'b', 'a', 'a', 'e']})
 
 Convert the raw grades to a categorical data type.
 
@@ -753,13 +753,13 @@ Writing to a HDF5 Store.
 
 .. ipython:: python
 
-   df.to_hdf('foo.h5','df')
+   df.to_hdf('foo.h5', 'df')
 
 Reading from a HDF5 Store.
 
 .. ipython:: python
 
-   pd.read_hdf('foo.h5','df')
+   pd.read_hdf('foo.h5', 'df')
 
 .. ipython:: python
    :suppress:
@@ -796,7 +796,7 @@ If you are attempting to perform an operation you might see an exception like:
 .. code-block:: python
 
     >>> if pd.Series([False, True, False]):
-        print("I was true")
+    ...     print("I was true")
     Traceback
         ...
     ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all().

diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
@@ -318,13 +318,13 @@ As usual, **both sides** of the slicers are included as this is label indexing.
 
    .. code-block:: python
 
-      df.loc[(slice('A1','A3'),.....), :]
+      df.loc[(slice('A1', 'A3'), ...), :]             # noqa: E999
 
    You should **not** do this:
 
    .. code-block:: python
 
-      df.loc[(slice('A1','A3'),.....)]
+      df.loc[(slice('A1', 'A3'), ...)]                # noqa: E999
 
 .. ipython:: python
 
@@ -740,15 +740,13 @@ values **not** in the categories, similarly to how you can reindex **any** panda
 
    .. code-block:: python
 
-      In [9]: df3 = pd.DataFrame({'A' : np.arange(6),
-                                  'B' : pd.Series(list('aabbca')).astype('category')})
+      >>> df3 = pd.DataFrame({'A': np.arange(6),
+      ...                     'B': pd.Series(list('aabbca')).astype('category')})
+      >>> df3 = df3.set_index('B')
+      >>> df3.index
+      CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], categories=[u'a', u'b', u'c'], ordered=False, name=u'B', dtype='category')
 
-      In [11]: df3 = df3.set_index('B')
-
-      In [11]: df3.index
-      Out[11]: CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], categories=[u'a', u'b', u'c'], ordered=False, name=u'B', dtype='category')
-
-      In [12]: pd.concat([df2, df3]
+      >>> pd.concat([df2, df3])
       TypeError: categories must match existing categories when appending
 
 .. _indexing.rangeindex:
@@ -1033,11 +1031,11 @@ On the other hand, if the index is not monotonic, then both slice bounds must be
 .. code-block:: python
 
     # 0 is not in the index
-    In [9]: df.loc[0:4, :]
+    >>> df.loc[0:4, :]
     KeyError: 0
 
     # 3 is not a unique label
-    In [11]: df.loc[2:3, :]
+    >>> df.loc[2:3, :]
     KeyError: 'Cannot get right slice bound for non-unique label: 3'
 
 ``Index.is_monotonic_increasing`` and ``Index.is_monotonic_decreasing`` only check that

diff --git a/doc/source/basics.rst b/doc/source/basics.rst
@@ -302,23 +302,17 @@ To evaluate single-element pandas objects in a boolean context, use the method
 
 .. warning::
 
-   You might be tempted to do the following:
+   Using a DataFrame as a condition will raise errors,
+   as you are trying to compare multiple values:
 
    .. code-block:: python
 
        >>> if df:
-            ...
-
-   Or
-
-   .. code-block:: python
-
-       >>> df and df2
-
-   These will both raise errors, as you are trying to compare multiple values.
-
-   .. code-block:: python
+       ...     do_something()
+       ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all().
 
+       >>> if df and df2:
+       ...     do_something()
        ValueError: The truth value of an array is ambiguous. Use a.empty, a.any() or a.all().
 
 See :ref:`gotchas<gotchas.truth>` for a more detailed discussion.
@@ -732,9 +726,8 @@ with the equivalent
 .. code-block:: python
 
    >>> (df.pipe(h)
-          .pipe(g, arg1=1)
-          .pipe(f, arg2=2, arg3=3)
-       )
+   ...    .pipe(g, arg1=1)
+   ...    .pipe(f, arg2=2, arg3=3))
 
 Pandas encourages the second style, which is known as method chaining.
 ``pipe`` makes it easy to use your own or another library's functions

diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst
@@ -298,8 +298,8 @@ see the :ref:`timeseries documentation<timeseries>` for more details.
 .. ipython:: python
    :suppress:
 
-   tips = tips.drop(['date1','date2','date1_year',
-      'date2_month','date1_next','months_between'], axis=1)
+   tips = tips.drop(['date1', 'date2', 'date1_year', 'date2_month',
+                     'date1_next', 'months_between'], axis=1)
 
 Selection of Columns
 ~~~~~~~~~~~~~~~~~~~~
@@ -744,12 +744,9 @@ XPORT is a relatively limited format and the parsing of it is not as
 optimized as some of the other pandas readers. An alternative way
 to interop data between SAS and pandas is to serialize to csv.
 
-.. code-block:: python
-
-   # version 0.17, 10M rows
-
-   In [8]: %time df = pd.read_sas('big.xpt')
-   Wall time: 14.6 s
+>>> # version 0.17, 10M rows
+>>> %time df = pd.read_sas('big.xpt')
+Wall time: 14.6 s
 
-   In [9]: %time df = pd.read_csv('big.csv')
-   Wall time: 4.86 s
+>>> %time df = pd.read_csv('big.csv')
+Wall time: 4.86 s
diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst
@@ -744,15 +744,15 @@ Transitioning to ``pytest``
 .. code-block:: python
 
     class TestReallyCoolFeature(object):
-        ....
+        ....                                  # noqa: E999
 
 Going forward, we are moving to a more *functional* style using the `pytest <http://docs.pytest.org/en/latest/>`__ framework, which offers a richer testing
 framework that will facilitate testing and developing. Thus, instead of writing test classes, we will write test functions like this:
 
 .. code-block:: python
 
     def test_really_cool_feature():
-        ....
+        ....                                  # noqa: E999
 
 Using ``pytest``
 ~~~~~~~~~~~~~~~~
@@ -777,25 +777,32 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place
    import pandas as pd
    from pandas.util import testing as tm
 
+
    @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64'])
    def test_dtypes(dtype):
        assert str(np.dtype(dtype)) == dtype
 
-   @pytest.mark.parametrize('dtype', ['float32',
-       pytest.param('int16', marks=pytest.mark.skip),
-       pytest.param('int32',
-                    marks=pytest.mark.xfail(reason='to show how it works'))])
+
+   @pytest.mark.parametrize('dtype',
+                            ['float32',
+                             pytest.param('int16', marks=pytest.mark.skip),
+                             pytest.param('int32', marks=pytest.mark.xfail(
+                                 reason='example'))
+                             ])
    def test_mark(dtype):
        assert str(np.dtype(dtype)) == 'float32'
 
+
    @pytest.fixture
    def series():
        return pd.Series([1, 2, 3])
 
+
    @pytest.fixture(params=['int8', 'int16', 'int32', 'int64'])
    def dtype(request):
        return request.param
 
+
    def test_series(series, dtype):
        result = series.astype(dtype)
        assert result.dtype == dtype
@@ -864,6 +871,7 @@ for details <https://hypothesis.readthedocs.io/en/latest/index.html>`_.
         st.lists(any_json_value), st.dictionaries(st.text(), any_json_value)
     ))
 
+
     @given(value=any_json_value)
     def test_json_roundtrip(value):
         result = json.loads(json.dumps(value))