diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
index e1a39d3c70868..dde014edc05d9 100644
--- a/doc/modules/preprocessing.rst
+++ b/doc/modules/preprocessing.rst
@@ -485,7 +485,7 @@ estimator that supports imputation. See :ref:`sphx_glr_auto_examples_missing_val
 Transformer indicating missing values
 =====================================
 
-MissingIndicator transformer is useful to transform a dataset into corresponding
+:class:`MissingIndicator` transformer is useful to transform a dataset into corresponding
 binary matrix indicating the presence of missing values in the dataset.
 The knowledge of which features were imputed can be exploited by a downstream
 estimator by adding features that indicate which elements have been imputed.
@@ -508,9 +508,9 @@ estimator by adding features that indicate which elements have been imputed.
     MissingIndicator(features='train', missing_values=-1, sparse='auto')
     >>> X2_tr = MI.transform(X2)
     >>> X2_tr
-    array([[False, False,  True],
-           [ True,  True, False],
-           [False, False, False]], dtype=bool)
+    array([[0, 0, 1],
+           [1, 1, 0],
+           [0, 0, 0]], dtype=int32)
 
 
 .. _polynomial_features:
diff --git a/sklearn/preprocessing/imputation.py b/sklearn/preprocessing/imputation.py
index f614fa7660f0f..775c154103337 100644
--- a/sklearn/preprocessing/imputation.py
+++ b/sklearn/preprocessing/imputation.py
@@ -425,9 +425,9 @@ class MissingIndicator(BaseEstimator, TransformerMixin):
     MissingIndicator(features='train', missing_values=-1, sparse='auto')
     >>> X2_tr = MI.transform(X2)
     >>> X2_tr
-    array([[False,  True],
-           [ True, False],
-           [False, False]], dtype=bool)
+    array([[0, 1],
+           [1, 0],
+           [0, 0]], dtype=int32)
 
     """
 
@@ -438,11 +438,13 @@ def __init__(self, missing_values="NaN", features="train", sparse="auto"):
 
     def fit(self, X):
         """Fit the transformer on X.
+
         Parameters
         ----------
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Input data, where ``n_samples`` is the number of samples and
             ``n_features`` is the number of features.
+
         Returns
         -------
         self : object
@@ -470,15 +472,17 @@ def fit(self, X):
         return self
 
     def transform(self, X):
-        """Impute all missing values in X.
+        """Generate missing values indicator for X.
+
         Parameters
         ----------
         X : {array-like, sparse matrix}, shape = [n_samples, n_features]
             The input data to complete.
+
         Returns
         -------
-        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
-            The transformerwith missing indicator.
+        Xt : {array-like, sparse matrix}, shape = [n_samples, n_features]
+             The missing indicator for input data
 
         """
         if self.features == "train":
@@ -486,16 +490,15 @@ def transform(self, X):
 
         X = check_array(X, accept_sparse=('csc', 'csr'), dtype=np.float64,
                         force_all_finite=False)
-
         imputer_mask, feat_with_missing = self._get_missing_features_info(X)
 
         if self.features == "train":
             features = np.setdiff1d(feat_with_missing,
                                     self.feat_with_missing_)
             if features.size:
-                warnings.warn("The features %s have missing "
-                              "values in transform but have no missing values"
-                              " in fit " % features, RuntimeWarning,
+                warnings.warn("The features %s have missing values "
+                              "in transform but have no missing values "
+                              "in fit " % features, RuntimeWarning,
                               stacklevel=1)
             imputer_mask = imputer_mask[:, self.feat_with_missing_]
 
@@ -522,6 +525,7 @@ def _get_missing_features_info(self, X):
             if sparse.issparse(X):
                 X = X.toarray()
             imputer_mask = _get_mask(X, self.missing_values)
+            imputer_mask = imputer_mask.astype(np.int32, copy=False)
             feat_with_missing = np.where(np.any(imputer_mask, axis=0))[0]
 
         if self.sparse is True:
@@ -531,5 +535,8 @@ def _get_missing_features_info(self, X):
                 imputer_mask = sparse.csc_matrix(imputer_mask)
         elif self.sparse is False and sparse.issparse(imputer_mask):
             imputer_mask = imputer_mask.toarray()
+        elif self.sparse == 'auto' and self.missing_values != 0:
+            if sparse.issparse(imputer_mask):
+                imputer_mask = imputer_mask.tocsc()
 
         return imputer_mask, feat_with_missing
diff --git a/sklearn/preprocessing/tests/test_imputation.py b/sklearn/preprocessing/tests/test_imputation.py
index 95cc8388641bd..9954ddcdc2208 100644
--- a/sklearn/preprocessing/tests/test_imputation.py
+++ b/sklearn/preprocessing/tests/test_imputation.py
@@ -380,15 +380,17 @@ def test_missing_indicator():
              [11,  -1,   1,  1]
     ])
 
-    def assert_type(actual, expect, sp, missing_values):
-        if sp is True and missing_values != 0:
+    def assert_type(actual, is_sparse, sp, missing_values):
+        if sp is True :
             assert_equal(actual, sparse.csc_matrix)
-        elif (sp is True and missing_values == 0) or \
-            sp is False:
+        elif (sp is "auto" and missing_values == 0 ) \
+            or sp is False:
             assert_equal(actual, np.ndarray)
         else:
-            print type(retype(X2)), sp, missing_values, type(X2_tr)
-            assert_equal(actual, expect)
+            if is_sparse:
+                assert_equal(actual, sparse.csc_matrix)
+            else:
+                assert_equal(actual, np.ndarray)
     
     def assert_mask(actual, expected, features):
         if hasattr(actual, 'toarray'):
@@ -396,39 +398,43 @@ def assert_mask(actual, expected, features):
         else:
             assert_array_equal(actual, expected[:, features])
 
-    for X1, X2, missing_values in [(X1_orig, X2_orig, -1),
-                                   (X1_orig + 1, X2_orig + 1, 0)]:
+    def _check_missing_indicator(X1, X2, retype, sp, missing_values):
         mask = X2 == missing_values
         expect_feat_missing = np.where(np.any(X1 == missing_values, axis=0))[0]
-        for retype in [np.array, sparse.csr_matrix,
+
+        X1_in = retype(X1)
+        X2_in = retype(X2)
+        # features = "train":
+        MI = MissingIndicator(missing_values=missing_values,
+                              sparse = sp)
+
+        MI.fit(X1_in)
+        X2_tr = MI.transform(X2_in)
+        features = MI.feat_with_missing_
+        assert_array_equal(expect_feat_missing, features)
+        assert_type(type(X2_tr),sparse.issparse(X2_in), sp, missing_values)
+        assert_mask(X2_tr, mask, features)
+
+        # features = "all"
+        MI = clone(MI).set_params(features="all")
+        MI.fit(X1_in)
+        X2_tr = MI.transform(X2_in)
+        features = np.arange(X2.shape[1])
+        assert_mask(X2_tr, mask, features)
+
+        # features = [1, 2]
+        features = [1, 2]
+        MI = clone(MI).set_params(features=features)
+        MI.fit(X1_in)
+        X2_tr = MI.transform(X2_in)
+        assert_mask(X2_tr, mask, features)
+
+    for X1, X2, missing_values in [(X1_orig, X2_orig, -1),
+                                   (X1_orig + 1, X2_orig + 1, 0)]:
+        for retype in [lambda x: x.tolist(), np.array, sparse.csr_matrix,
                        sparse.csc_matrix, sparse.lil_matrix]:
             for sp in [True, False, 'auto']:
-                X1_ft = retype(X1)
-                X2_t = retype(X2)
-                # features = "train":
-                MI = MissingIndicator(missing_values=missing_values,
-                                      sparse = sp)
-
-                MI.fit(X1_ft)
-                X2_tr = MI.transform(X2_t)
-                features = MI.feat_with_missing_
-                assert_array_equal(expect_feat_missing, features)
-                assert_type(type(X2_tr), type(X2_t), sp, missing_values)
-                assert_mask(X2_tr, mask, features)
-
-                # features = "all"
-                MI = clone(MI).set_params(features="all")
-                MI.fit(X1_ft)
-                X2_tr = MI.transform(retype(X2))
-                features = np.arange(X2.shape[1])
-                assert_mask(X2_tr, mask, features)
-
-                # features = [1, 2]
-                features = [1, 2]
-                MI = clone(MI).set_params(features=features)
-                MI.fit(X1_ft)
-                X2_tr = MI.transform(X2_t)
-                assert_mask(X2_tr, mask, features)
+                _check_missing_indicator(X1, X2, retype, sp, missing_values)
 
 
 def test_missing_indicator_warning():