rasbt · rasbt · Jun 2, 2017 · Jun 2, 2017 · Jun 2, 2017 · Jun 2, 2017
diff --git a/docs/sources/CHANGELOG.md b/docs/sources/CHANGELOG.md
@@ -23,9 +23,8 @@ The CHANGELOG for the current development version is available at
 
 - The TensorFlow estimator have been removed from mlxtend, since TensorFlow has now very convenient ways to build on estimators, which render those implementations obsolete.
 - `plot_decision_regions` now supports plotting decision regions for more than 2 training features. (via [James Bourbeau](https://github.com/jrbourbeau)).
-
-
 - Parallel execution in `mlxtend.feature_selection.SequentialFeatureSelector` and `mlxtend.feature_selection.ExhaustiveFeatureSelector` is now performed over different feature subsets instead of the different cross-validation folds to better utilize machines with multiple processors if the number of features is large ([#193](https://github.com/rasbt/mlxtend/pull/193), via [@whalebot-helmsman](https://github.com/whalebot-helmsman)).
+- Raise meaningful error messages if pandas `DataFrame`s or Python lists of lists are fed into the StackingCVClassifer as a `fit` arguments.
 
 ##### Bug Fixes
 

diff --git a/docs/sources/user_guide/classifier/StackingCVClassifier.ipynb b/docs/sources/user_guide/classifier/StackingCVClassifier.ipynb
diff --git a/.../user_guide/classifier/StackingCVClassifier_files/StackingCVClassifier_13_0.png b/.../user_guide/classifier/StackingCVClassifier_files/StackingCVClassifier_13_0.png
diff --git a/mlxtend/classifier/stacking_cv_classification.py b/mlxtend/classifier/stacking_cv_classification.py
@@ -105,10 +105,11 @@ def fit(self, X, y):
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+        X : numpy array, shape = [n_samples, n_features]
             Training vectors, where n_samples is the number of samples and
             n_features is the number of features.
-        y : array-like, shape = [n_samples]
+
+        y : numpy array, shape = [n_samples]
             Target values.
 
         Returns
@@ -158,7 +159,20 @@ def fit(self, X, y):
                     print("Training and fitting fold %d of %d..." %
                           ((num + 1), self.n_folds))
 
-                model.fit(X[train_index], y[train_index])
+                try:
+                    model.fit(X[train_index], y[train_index])
+                except TypeError as e:
+                    raise TypeError(str(e) + '\nPlease check that X and y'
+                                    'are NumPy arrays. If X and y are lists'
+                                    ' of lists,\ntry passing them as'
+                                    ' numpy.array(X)'
+                                    ' and numpy.array(y).')
+                except KeyError as e:
+                    raise KeyError(str(e) + '\nPlease check that X and y'
+                                   ' are NumPy arrays. If X and y are pandas'
+                                   ' DataFrames,\ntry passing them as'
+                                   ' X.values'
+                                   ' and y.values.')
 
                 if not self.use_probas:
                     prediction = model.predict(X[test_index])
@@ -223,7 +237,7 @@ def predict(self, X):
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+        X : numpy array, shape = [n_samples, n_features]
             Training vectors, where n_samples is the number of samples and
             n_features is the number of features.
 
@@ -257,7 +271,7 @@ def predict_proba(self, X):
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+        X : numpy array, shape = [n_samples, n_features]
             Training vectors, where n_samples is the number of samples and
             n_features is the number of features.
 

diff --git a/mlxtend/classifier/tests/test_stacking_cv_classifier.py b/mlxtend/classifier/tests/test_stacking_cv_classifier.py
@@ -7,6 +7,7 @@
 
 from mlxtend.classifier import StackingCVClassifier
 
+import pandas as pd
 from sklearn.linear_model import LogisticRegression
 from sklearn.naive_bayes import GaussianNB
 from sklearn.ensemble import RandomForestClassifier
@@ -188,3 +189,36 @@ def test_verbose():
                                 shuffle=False,
                                 verbose=3)
     sclf.fit(iris.data, iris.target)
+
+
+def test_list_of_lists():
+    X_list = [i for i in X]
+    meta = LogisticRegression()
+    clf1 = RandomForestClassifier()
+    clf2 = GaussianNB()
+    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
+                                use_probas=True,
+                                meta_classifier=meta,
+                                shuffle=False,
+                                verbose=0)
+
+    try:
+        sclf.fit(X_list, iris.target)
+    except TypeError as e:
+        assert 'are NumPy arrays. If X and y are lists' in str(e)
+
+
+def test_pandas():
+    X_df = pd.DataFrame(X)
+    meta = LogisticRegression()
+    clf1 = RandomForestClassifier()
+    clf2 = GaussianNB()
+    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
+                                use_probas=True,
+                                meta_classifier=meta,
+                                shuffle=False,
+                                verbose=0)
+    try:
+        sclf.fit(X_df, iris.target)
+    except KeyError as e:
+        assert 'are NumPy arrays. If X and y are pandas DataFrames' in str(e)