From 322ea45af919b6496ecd3dbcf25b27a70723c337 Mon Sep 17 00:00:00 2001
From: aliang <aliang@air>
Date: Sat, 6 Jun 2020 14:12:06 -0400
Subject: [PATCH 01/16] auto flake8 stuff

---
 sklearn/datasets/_covtype.py           | 36 +++++++++++++++++++++++++-
 sklearn/datasets/tests/test_covtype.py | 18 ++++++++++++-
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index de93b22ac4f56..d5708ab046e96 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -23,6 +23,7 @@
 import joblib
 
 from . import get_data_home
+from ._base import _convert_data_dataframe
 from ._base import _fetch_remote
 from ._base import RemoteFileMetadata
 from ..utils import Bunch
@@ -44,7 +45,8 @@
 
 @_deprecate_positional_args
 def fetch_covtype(*, data_home=None, download_if_missing=True,
-                  random_state=None, shuffle=False, return_X_y=False):
+                  random_state=None, shuffle=False, return_X_y=False,
+                  as_frame=False):
     """Load the covertype dataset (classification).
 
     Download it if necessary.
@@ -80,6 +82,8 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
         If True, returns ``(data.data, data.target)`` instead of a Bunch
         object.
 
+    as_frame : boolean, default=False.
+        If True, returns ``pandas.DataFrame`` instead of a Bunch object
         .. versionadded:: 0.20
 
     Returns
@@ -98,6 +102,9 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
 
     (data, target) : tuple if ``return_X_y`` is True
 
+    dataframe: :class: `pandas.DataFrame`
+        Pandas dataframe
+
         .. versionadded:: 0.20
     """
 
@@ -145,4 +152,31 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
     if return_X_y:
         return X, y
 
+    if as_frame:
+        """
+        Column names reference: https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.info
+        """
+        feat_cols = ["Elevation",
+                    "Aspect",
+                    "Slope",
+                    "Horizontal_Distance_To_Hydrology",
+                    "Vertical_Distance_To_Hydrology",
+                    "Horizontal_Distance_To_Roadways",
+                    "Hillshade_9am",
+                    "Hillshade_Noon",
+                    "Hillshade_3pm",
+                    "Horizontal_Distance_To_Fire_Points"]
+        feat_cols += ['Wilderness_Area_'+str(i) for i in range(1,5)]
+        feat_cols += ['Soil_Type_'+str(i) for i in range(1,41)]
+        target_col = ["Cover_Type"]
+
+        frame, X, y = _convert_data_dataframe("fetch_covtype", X, y,
+                                              feat_cols, target_col)
+        return Bunch(data=X,
+                     target=y,
+                     frame=frame,
+                     target_names=target_col,
+                     feature_names=feat_cols,
+                     DESCR=fdescr)
+
     return Bunch(data=X, target=y, DESCR=fdescr)
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index d966e6c3890d0..d66fff0940942 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -1,7 +1,7 @@
 """Test the covtype loader, if the data is available,
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
-
+import pytest
 from sklearn.datasets.tests.test_common import check_return_X_y
 from functools import partial
 
@@ -23,3 +23,19 @@ def test_fetch(fetch_covtype_fxt):
     # test return_X_y option
     fetch_func = partial(fetch_covtype_fxt)
     check_return_X_y(data1, fetch_func)
+
+def test_fetch_asframe(fetch_covtype_fxt):
+    pd = pytest.importorskip('pandas')
+    bunch = fetch_covtype_fxt(as_frame=True)
+    frame = bunch.frame
+    assert hasattr(bunch, frame) is True
+    assert frame.shape == (581012, 55)
+    assert isinstance(bunch.data, pd.DataFrame)
+    assert isinstance(bunch.target, pd.Series)
+
+def test_pandas_dependency_message(fetch_covtype_fxt,
+                                   hide_available_pandas):
+    expected_msg = ('fetch_covtype_fxt with as_frame=True'
+                    ' requires pandas')
+    with pytest.raises(ImportError, match=expected_msg):
+        fetch_covtype_fxt(as_frame=True)

From c5f17eb7fd9e4b6dbbe30585f36ff68946fa6a4f Mon Sep 17 00:00:00 2001
From: aliang <aliang@air>
Date: Sat, 6 Jun 2020 14:26:15 -0400
Subject: [PATCH 02/16] tests passed

---
 sklearn/datasets/tests/test_covtype.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index d66fff0940942..246482507230c 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -35,7 +35,7 @@ def test_fetch_asframe(fetch_covtype_fxt):
 
 def test_pandas_dependency_message(fetch_covtype_fxt,
                                    hide_available_pandas):
-    expected_msg = ('fetch_covtype_fxt with as_frame=True'
+    expected_msg = ('fetch_covtype with as_frame=True'
                     ' requires pandas')
     with pytest.raises(ImportError, match=expected_msg):
         fetch_covtype_fxt(as_frame=True)

From 9ab6f5c1ab4291c2fc15aaa2a05041b7eff70528 Mon Sep 17 00:00:00 2001
From: aliang <aliang@air>
Date: Sat, 6 Jun 2020 14:48:06 -0400
Subject: [PATCH 03/16] adjust lineendings

---
 sklearn/datasets/_covtype.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index d5708ab046e96..58914bdbe66dd 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -157,15 +157,15 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
         Column names reference: https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.info
         """
         feat_cols = ["Elevation",
-                    "Aspect",
-                    "Slope",
-                    "Horizontal_Distance_To_Hydrology",
-                    "Vertical_Distance_To_Hydrology",
-                    "Horizontal_Distance_To_Roadways",
-                    "Hillshade_9am",
-                    "Hillshade_Noon",
-                    "Hillshade_3pm",
-                    "Horizontal_Distance_To_Fire_Points"]
+                     "Aspect",
+                     "Slope",
+                     "Horizontal_Distance_To_Hydrology",
+                     "Vertical_Distance_To_Hydrology",
+                     "Horizontal_Distance_To_Roadways",
+                     "Hillshade_9am",
+                     "Hillshade_Noon",
+                     "Hillshade_3pm",
+                     "Horizontal_Distance_To_Fire_Points"]
         feat_cols += ['Wilderness_Area_'+str(i) for i in range(1,5)]
         feat_cols += ['Soil_Type_'+str(i) for i in range(1,41)]
         target_col = ["Cover_Type"]

From 24df8fa8f314c2327b96255dea0850d6703a094d Mon Sep 17 00:00:00 2001
From: aliang <aliang@air>
Date: Sat, 6 Jun 2020 15:25:49 -0400
Subject: [PATCH 04/16] fixing flake8 linting issue

---
 sklearn/datasets/_covtype.py           | 7 ++++---
 sklearn/datasets/tests/test_covtype.py | 2 ++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index 58914bdbe66dd..1bcf5308474c4 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -154,7 +154,8 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
 
     if as_frame:
         """
-        Column names reference: https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.info
+        Column names reference:
+        https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.info
         """
         feat_cols = ["Elevation",
                      "Aspect",
@@ -166,8 +167,8 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
                      "Hillshade_Noon",
                      "Hillshade_3pm",
                      "Horizontal_Distance_To_Fire_Points"]
-        feat_cols += ['Wilderness_Area_'+str(i) for i in range(1,5)]
-        feat_cols += ['Soil_Type_'+str(i) for i in range(1,41)]
+        feat_cols += ['Wilderness_Area_'+str(i) for i in range(1, 5)]
+        feat_cols += ['Soil_Type_'+str(i) for i in range(1, 41)]
         target_col = ["Cover_Type"]
 
         frame, X, y = _convert_data_dataframe("fetch_covtype", X, y,
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index 246482507230c..c0b83d2bb691e 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -24,6 +24,7 @@ def test_fetch(fetch_covtype_fxt):
     fetch_func = partial(fetch_covtype_fxt)
     check_return_X_y(data1, fetch_func)
 
+
 def test_fetch_asframe(fetch_covtype_fxt):
     pd = pytest.importorskip('pandas')
     bunch = fetch_covtype_fxt(as_frame=True)
@@ -33,6 +34,7 @@ def test_fetch_asframe(fetch_covtype_fxt):
     assert isinstance(bunch.data, pd.DataFrame)
     assert isinstance(bunch.target, pd.Series)
 
+
 def test_pandas_dependency_message(fetch_covtype_fxt,
                                    hide_available_pandas):
     expected_msg = ('fetch_covtype with as_frame=True'

From 5a34ebf48b0146072e647c0600ed99452789d866 Mon Sep 17 00:00:00 2001
From: aliang <aliang@air>
Date: Mon, 8 Jun 2020 20:17:10 -0400
Subject: [PATCH 05/16] adjusted API

---
 sklearn/datasets/_covtype.py           | 48 +++++++++++++++-----------
 sklearn/datasets/tests/test_covtype.py |  1 +
 2 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index 1bcf5308474c4..00d4afbad6c76 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -82,8 +82,12 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
         If True, returns ``(data.data, data.target)`` instead of a Bunch
         object.
 
-    as_frame : boolean, default=False.
-        If True, returns ``pandas.DataFrame`` instead of a Bunch object
+    as_frame : bool, default=False
+        If True, the data is a pandas DataFrame including columns with
+        appropriate dtypes (numeric). The target is
+        a pandas DataFrame or Series depending on the number of target columns.
+        If `return_X_y` is True, then (`data`, `target`) will be pandas
+        DataFrames or Series as described below.
         .. versionadded:: 0.20
 
     Returns
@@ -149,35 +153,37 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
     with open(join(module_path, 'descr', 'covtype.rst')) as rst_file:
         fdescr = rst_file.read()
 
-    if return_X_y:
-        return X, y
-
-    if as_frame:
+    if as_frame or return_X_y:
         """
         Column names reference:
         https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.info
         """
         feat_cols = ["Elevation",
-                     "Aspect",
-                     "Slope",
-                     "Horizontal_Distance_To_Hydrology",
-                     "Vertical_Distance_To_Hydrology",
-                     "Horizontal_Distance_To_Roadways",
-                     "Hillshade_9am",
-                     "Hillshade_Noon",
-                     "Hillshade_3pm",
-                     "Horizontal_Distance_To_Fire_Points"]
+                        "Aspect",
+                        "Slope",
+                        "Horizontal_Distance_To_Hydrology",
+                        "Vertical_Distance_To_Hydrology",
+                        "Horizontal_Distance_To_Roadways",
+                        "Hillshade_9am",
+                        "Hillshade_Noon",
+                        "Hillshade_3pm",
+                        "Horizontal_Distance_To_Fire_Points"]
         feat_cols += ['Wilderness_Area_'+str(i) for i in range(1, 5)]
         feat_cols += ['Soil_Type_'+str(i) for i in range(1, 41)]
         target_col = ["Cover_Type"]
 
         frame, X, y = _convert_data_dataframe("fetch_covtype", X, y,
                                               feat_cols, target_col)
-        return Bunch(data=X,
-                     target=y,
-                     frame=frame,
-                     target_names=target_col,
-                     feature_names=feat_cols,
-                     DESCR=fdescr)
+
+        if as_frame:
+            return Bunch(data=X,
+                        target=y,
+                        frame=frame,
+                        target_names=target_col,
+                        feature_names=feat_cols,
+                        DESCR=fdescr)
+
+        if return_X_y:
+            return X, y
 
     return Bunch(data=X, target=y, DESCR=fdescr)
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index c0b83d2bb691e..ca26a4494b840 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -31,6 +31,7 @@ def test_fetch_asframe(fetch_covtype_fxt):
     frame = bunch.frame
     assert hasattr(bunch, frame) is True
     assert frame.shape == (581012, 55)
+    assert isinstance(bunch.frame, pd.DataFrame)
     assert isinstance(bunch.data, pd.DataFrame)
     assert isinstance(bunch.target, pd.Series)
 

From 04feec5acabcf85a83e264256d021389d3b4f797 Mon Sep 17 00:00:00 2001
From: aliang <aliang@air>
Date: Mon, 15 Jun 2020 22:26:24 -0400
Subject: [PATCH 06/16] flake8 error

---
 sklearn/datasets/_covtype.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index 00d4afbad6c76..ef4a08fbcee60 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -159,15 +159,15 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
         https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.info
         """
         feat_cols = ["Elevation",
-                        "Aspect",
-                        "Slope",
-                        "Horizontal_Distance_To_Hydrology",
-                        "Vertical_Distance_To_Hydrology",
-                        "Horizontal_Distance_To_Roadways",
-                        "Hillshade_9am",
-                        "Hillshade_Noon",
-                        "Hillshade_3pm",
-                        "Horizontal_Distance_To_Fire_Points"]
+                     "Aspect",
+                     "Slope",
+                     "Horizontal_Distance_To_Hydrology",
+                     "Vertical_Distance_To_Hydrology",
+                     "Horizontal_Distance_To_Roadways",
+                     "Hillshade_9am",
+                     "Hillshade_Noon",
+                     "Hillshade_3pm",
+                     "Horizontal_Distance_To_Fire_Points"]
         feat_cols += ['Wilderness_Area_'+str(i) for i in range(1, 5)]
         feat_cols += ['Soil_Type_'+str(i) for i in range(1, 41)]
         target_col = ["Cover_Type"]
@@ -177,11 +177,11 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
 
         if as_frame:
             return Bunch(data=X,
-                        target=y,
-                        frame=frame,
-                        target_names=target_col,
-                        feature_names=feat_cols,
-                        DESCR=fdescr)
+                         target=y,
+                         frame=frame,
+                         target_names=target_col,
+                         feature_names=feat_cols,
+                         DESCR=fdescr)
 
         if return_X_y:
             return X, y

From f8ff4155ee7771bc26f81ccb96b67e537e394745 Mon Sep 17 00:00:00 2001
From: aliang <aliang@air>
Date: Fri, 19 Jun 2020 12:22:44 -0400
Subject: [PATCH 07/16] trailing white spaces

---
 sklearn/datasets/_covtype.py           | 77 +++++++++++++-------------
 sklearn/datasets/descr/covtype.rst     |  6 +-
 sklearn/datasets/tests/test_covtype.py | 10 +---
 3 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index 5ceedf68334c8..88ef4d6651748 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -42,6 +42,24 @@
 
 logger = logging.getLogger(__name__)
 
+"""
+Column names reference:
+https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.info
+"""
+FEATURE_NAMES = ["Elevation",
+                 "Aspect",
+                 "Slope",
+                 "Horizontal_Distance_To_Hydrology",
+                 "Vertical_Distance_To_Hydrology",
+                 "Horizontal_Distance_To_Roadways",
+                 "Hillshade_9am",
+                 "Hillshade_Noon",
+                 "Hillshade_3pm",
+                 "Horizontal_Distance_To_Fire_Points"]
+FEATURE_NAMES += ['Wilderness_Area_'+str(i) for i in range(1, 5)]
+FEATURE_NAMES += ['Soil_Type_'+str(i) for i in range(1, 41)]
+TARGET_NAMES = ["Cover_Type"]
+
 
 @_deprecate_positional_args
 def fetch_covtype(*, data_home=None, download_if_missing=True,
@@ -88,7 +106,7 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
         a pandas DataFrame or Series depending on the number of target columns.
         If `return_X_y` is True, then (`data`, `target`) will be pandas
         DataFrames or Series as described below.
-        .. versionadded:: 0.20
+        .. versionadded:: 0.24
 
     Returns
     -------
@@ -101,15 +119,16 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
             Each value corresponds to one of
             the 7 forest covertypes with values
             ranging between 1 to 7.
+        frame : dataframe of shape (581012, 53)
+            Only present when `as_frame=True`. Contains `data` and `target`.
         DESCR : str
             Description of the forest covertype dataset.
 
     (data, target) : tuple if ``return_X_y`` is True
 
-    dataframe: :class: `pandas.DataFrame`
-        Pandas dataframe
 
-        .. versionadded:: 0.20
+
+        .. versionadded:: 0.24
     """
 
     data_home = get_data_home(data_home=data_home)
@@ -153,37 +172,19 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
     with open(join(module_path, 'descr', 'covtype.rst')) as rst_file:
         fdescr = rst_file.read()
 
-    if as_frame or return_X_y:
-        """
-        Column names reference:
-        https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.info
-        """
-        feat_cols = ["Elevation",
-                     "Aspect",
-                     "Slope",
-                     "Horizontal_Distance_To_Hydrology",
-                     "Vertical_Distance_To_Hydrology",
-                     "Horizontal_Distance_To_Roadways",
-                     "Hillshade_9am",
-                     "Hillshade_Noon",
-                     "Hillshade_3pm",
-                     "Horizontal_Distance_To_Fire_Points"]
-        feat_cols += ['Wilderness_Area_'+str(i) for i in range(1, 5)]
-        feat_cols += ['Soil_Type_'+str(i) for i in range(1, 41)]
-        target_col = ["Cover_Type"]
-
-        frame, X, y = _convert_data_dataframe("fetch_covtype", X, y,
-                                              feat_cols, target_col)
-
-        if as_frame:
-            return Bunch(data=X,
-                         target=y,
-                         frame=frame,
-                         target_names=target_col,
-                         feature_names=feat_cols,
-                         DESCR=fdescr)
-
-        if return_X_y:
-            return X, y
-
-    return Bunch(data=X, target=y, DESCR=fdescr)
+    frame = None
+    if as_frame:
+        frame, X, y = _convert_data_dataframe(caller_name="fetch_covtype",
+                                              data=X,
+                                              target=y,
+                                              feature_names=FEATURE_NAMES,
+                                              target_names=TARGET_NAMES)
+    if return_X_y:
+        return X, y
+
+    return Bunch(data=X,
+                 target=y,
+                 frame=frame,
+                 target_names=TARGET_NAMES,
+                 feature_names=FEATURE_NAMES,
+                 DESCR=fdescr)
diff --git a/sklearn/datasets/descr/covtype.rst b/sklearn/datasets/descr/covtype.rst
index 4e79b5b89b9a1..0090b8e4a6b7d 100644
--- a/sklearn/datasets/descr/covtype.rst
+++ b/sklearn/datasets/descr/covtype.rst
@@ -22,7 +22,9 @@ while others are discrete or continuous measurements.
     =================   ============
 
 :func:`sklearn.datasets.fetch_covtype` will load the covertype dataset;
-it returns a dictionary-like object
+it returns a dictionary-like 'Bunch' object
 with the feature matrix in the ``data`` member
-and the target values in ``target``.
+and the target values in ``target``. If optional argument 'as_frame' is
+set to 'True', it will return ``data`` and ``target`` as pandas
+data frame, and there will be an additional member ``frame`` as well.
 The dataset will be downloaded from the web if necessary.
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index ca26a4494b840..cbac452104816 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -1,9 +1,9 @@
 """Test the covtype loader, if the data is available,
 or if specifically requested via environment variable
 (e.g. for travis cron job)."""
+from functools import partial
 import pytest
 from sklearn.datasets.tests.test_common import check_return_X_y
-from functools import partial
 
 
 def test_fetch(fetch_covtype_fxt):
@@ -25,15 +25,11 @@ def test_fetch(fetch_covtype_fxt):
     check_return_X_y(data1, fetch_func)
 
 
-def test_fetch_asframe(fetch_covtype_fxt):
-    pd = pytest.importorskip('pandas')
+def test_fetch_asframe_shape(fetch_covtype_fxt):
     bunch = fetch_covtype_fxt(as_frame=True)
+    assert hasattr(bunch, 'frame')
     frame = bunch.frame
-    assert hasattr(bunch, frame) is True
     assert frame.shape == (581012, 55)
-    assert isinstance(bunch.frame, pd.DataFrame)
-    assert isinstance(bunch.data, pd.DataFrame)
-    assert isinstance(bunch.target, pd.Series)
 
 
 def test_pandas_dependency_message(fetch_covtype_fxt,

From 0134c73a66f0f86d5bbee42aa67a9ec0b99c9f42 Mon Sep 17 00:00:00 2001
From: aliang <aliang@air>
Date: Fri, 19 Jun 2020 16:12:16 -0400
Subject: [PATCH 08/16] [scipy-dev]

---
 sklearn/datasets/_covtype.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index 88ef4d6651748..2e1489ae20130 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -127,7 +127,6 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
     (data, target) : tuple if ``return_X_y`` is True
 
 
-
         .. versionadded:: 0.24
     """
 

From 7e06f8f6760d9019ca42f51df27042ad0d811e1f Mon Sep 17 00:00:00 2001
From: aliang <aliang@air>
Date: Sat, 20 Jun 2020 12:01:06 -0400
Subject: [PATCH 09/16] versionadded error fixes

---
 sklearn/datasets/_covtype.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index 2e1489ae20130..5d5fa77248c09 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -99,7 +99,7 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
     return_X_y : bool, default=False
         If True, returns ``(data.data, data.target)`` instead of a Bunch
         object.
-
+    .. versionadded:: 0.20
     as_frame : bool, default=False
         If True, the data is a pandas DataFrame including columns with
         appropriate dtypes (numeric). The target is
@@ -126,8 +126,8 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
 
     (data, target) : tuple if ``return_X_y`` is True
 
+        .. versionadded:: 0.20
 
-        .. versionadded:: 0.24
     """
 
     data_home = get_data_home(data_home=data_home)

From 8fc854e5131e6f99a8c80c59cfa1992d57eade31 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Sat, 20 Jun 2020 12:32:01 -0400
Subject: [PATCH 10/16] ENH Update style and starting idx

---
 sklearn/datasets/_covtype.py           | 17 ++++++++++-------
 sklearn/datasets/tests/test_covtype.py |  8 +++++++-
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index 5d5fa77248c09..422ba293dffaa 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -56,8 +56,8 @@
                  "Hillshade_Noon",
                  "Hillshade_3pm",
                  "Horizontal_Distance_To_Fire_Points"]
-FEATURE_NAMES += ['Wilderness_Area_'+str(i) for i in range(1, 5)]
-FEATURE_NAMES += ['Soil_Type_'+str(i) for i in range(1, 41)]
+FEATURE_NAMES += [f"Wilderness_Area_{i}" for i in range(4)]
+FEATURE_NAMES += [f"Soil_Type_{i}" for i in range(40)]
 TARGET_NAMES = ["Cover_Type"]
 
 
@@ -99,13 +99,16 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
     return_X_y : bool, default=False
         If True, returns ``(data.data, data.target)`` instead of a Bunch
         object.
-    .. versionadded:: 0.20
+
+        .. versionadded:: 0.20
+
     as_frame : bool, default=False
         If True, the data is a pandas DataFrame including columns with
-        appropriate dtypes (numeric). The target is
-        a pandas DataFrame or Series depending on the number of target columns.
-        If `return_X_y` is True, then (`data`, `target`) will be pandas
-        DataFrames or Series as described below.
+        appropriate dtypes (numeric). The target is a pandas DataFrame or
+        Series depending on the number of target columns. If `return_X_y` is
+        True, then (`data`, `target`) will be pandas DataFrames or Series as
+        described below.
+
         .. versionadded:: 0.24
 
     Returns
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index cbac452104816..ae9433e0f6981 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -25,12 +25,18 @@ def test_fetch(fetch_covtype_fxt):
     check_return_X_y(data1, fetch_func)
 
 
-def test_fetch_asframe_shape(fetch_covtype_fxt):
+def test_fetch_asframe(fetch_covtype_fxt):
     bunch = fetch_covtype_fxt(as_frame=True)
     assert hasattr(bunch, 'frame')
     frame = bunch.frame
     assert frame.shape == (581012, 55)
 
+    column_names = set(frame.columns)
+
+    # enumerated names are added correctly
+    assert set(f"Wilderness_Area_{i}" for i in range(4)) < column_names
+    assert set(f"Soil_Type_{i}" for i in range(40)) < column_names
+
 
 def test_pandas_dependency_message(fetch_covtype_fxt,
                                    hide_available_pandas):

From 6729f9a6f101ea94e884ae411bda44a995a568b8 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Sat, 20 Jun 2020 12:33:56 -0400
Subject: [PATCH 11/16] BLD [scipy-dev]


From 7b77c03ec4671e2d1292b5997918143e2b436d18 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Sat, 20 Jun 2020 13:27:34 -0400
Subject: [PATCH 12/16] DOC Use comment

---
 sklearn/datasets/_covtype.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index 422ba293dffaa..653fe5d286247 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -42,10 +42,8 @@
 
 logger = logging.getLogger(__name__)
 
-"""
-Column names reference:
-https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.info
-"""
+# Column names reference:
+# https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.info
 FEATURE_NAMES = ["Elevation",
                  "Aspect",
                  "Slope",

From f9ee16235f74eba2f8e2abdf61f941015a1af683 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Sat, 20 Jun 2020 13:54:44 -0400
Subject: [PATCH 13/16] DOC documents target and feature names

---
 sklearn/datasets/_covtype.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index 653fe5d286247..0f3093807a83a 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -124,6 +124,10 @@ def fetch_covtype(*, data_home=None, download_if_missing=True,
             Only present when `as_frame=True`. Contains `data` and `target`.
         DESCR : str
             Description of the forest covertype dataset.
+        feature_names : list
+            The names of the dataset columns
+        target_names: list
+            The names of the target columns
 
     (data, target) : tuple if ``return_X_y`` is True
 

From 5ecc8202817694ead947cdaccb001f3c6e19d3e2 Mon Sep 17 00:00:00 2001
From: aliang <aliang@air>
Date: Sat, 20 Jun 2020 22:09:09 -0400
Subject: [PATCH 14/16] added what's new

---
 doc/whats_new/v0.24.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index 6982079525f72..31c548e7de0b5 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -53,6 +53,12 @@ Changelog
   unless data is sparse.
   :pr:`17396` by :user:`Jiaxiang <fujiaxiang>`.
 
+- |Enhancement| :func:`datasets.fetch_covtype` now now supports optional
+  argument `as_frame`; when it is set to true, the returned Bunch object's
+  `data` and `target` members are in pandas DataFrame format, and the Bunch
+  object will also have an additional `frame` member as a pandas DataFrame.
+  :pr:`17491` by :user:`Alex Liang <tianchuliang>`.
+
 :mod:`sklearn.decomposition`
 ............................
 

From 335b4bfe73e2422a027065bc91d571723fecf77f Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Sat, 20 Jun 2020 22:26:10 -0400
Subject: [PATCH 15/16] DOC Update whats enw

---
 doc/whats_new/v0.24.rst | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index 31c548e7de0b5..a479edd25342b 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -53,11 +53,10 @@ Changelog
   unless data is sparse.
   :pr:`17396` by :user:`Jiaxiang <fujiaxiang>`.
 
-- |Enhancement| :func:`datasets.fetch_covtype` now now supports optional
-  argument `as_frame`; when it is set to true, the returned Bunch object's
-  `data` and `target` members are in pandas DataFrame format, and the Bunch
-  object will also have an additional `frame` member as a pandas DataFrame.
-  :pr:`17491` by :user:`Alex Liang <tianchuliang>`.
+- |Enhancement| :func:`datasets.fetch_covtype` now now supports the optional
+  argument `as_frame`; when it is set to True, the returned Bunch object's
+  `data` and `frame` members are pandas DataFrames, and the `target` member is
+  a pandas Series. :pr:`17491` by :user:`Alex Liang <tianchuliang>`.
 
 :mod:`sklearn.decomposition`
 ............................

From 85718f32260bbb16b9e4aacb74f8b185ec315a81 Mon Sep 17 00:00:00 2001
From: "Thomas J. Fan" <thomasjpfan@gmail.com>
Date: Sat, 20 Jun 2020 22:26:42 -0400
Subject: [PATCH 16/16] TST Adds one more test

---
 sklearn/datasets/tests/test_covtype.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index ae9433e0f6981..df0989d66bb3a 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -30,6 +30,8 @@ def test_fetch_asframe(fetch_covtype_fxt):
     assert hasattr(bunch, 'frame')
     frame = bunch.frame
     assert frame.shape == (581012, 55)
+    assert bunch.data.shape == (581012, 54)
+    assert bunch.target.shape == (581012,)
 
     column_names = set(frame.columns)