scikit-learn · agramfort · Jun 7, 2017 · May 1, 2017 · May 2, 2017 · May 5, 2017
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
@@ -337,10 +337,12 @@ def _multinomial_loss_grad(w, X, Y, alpha, sample_weight):
     n_classes = Y.shape[1]
     n_features = X.shape[1]
     fit_intercept = (w.size == n_classes * (n_features + 1))
-    grad = np.zeros((n_classes, n_features + bool(fit_intercept)))
+    grad = np.zeros((n_classes, n_features + bool(fit_intercept)),
+                    dtype=X.dtype)
     loss, p, w = _multinomial_loss(w, X, Y, alpha, sample_weight)
     sample_weight = sample_weight[:, np.newaxis]
     diff = sample_weight * (p - Y)
+    diff = diff.astype(X.dtype)
     grad[:, :n_features] = safe_sparse_dot(diff.T, X)
     grad[:, :n_features] += alpha * w
     if fit_intercept:
@@ -608,10 +610,10 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     # and check length
     # Otherwise set them to 1 for all examples
     if sample_weight is not None:
-        sample_weight = np.array(sample_weight, dtype=np.float64, order='C')
+        sample_weight = np.array(sample_weight, dtype=X.dtype, order='C')
         check_consistent_length(y, sample_weight)
     else:
-        sample_weight = np.ones(X.shape[0])
+        sample_weight = np.ones(X.shape[0], dtype=X.dtype)
 
     # If class_weights is a dict (provided by the user), the weights
     # are assigned to the original labels. If it is "balanced", then
@@ -624,10 +626,10 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
     # For doing a ovr, we need to mask the labels first. for the
     # multinomial case this is not necessary.
     if multi_class == 'ovr':
-        w0 = np.zeros(n_features + int(fit_intercept))
+        w0 = np.zeros(n_features + int(fit_intercept), dtype=X.dtype)
         mask_classes = np.array([-1, 1])
         mask = (y == pos_class)
-        y_bin = np.ones(y.shape, dtype=np.float64)
+        y_bin = np.ones(y.shape, dtype=X.dtype)
         y_bin[~mask] = -1.
         # for compute_class_weight
 
@@ -648,7 +650,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
             Y_multi = le.fit_transform(y)
 
         w0 = np.zeros((classes.size, n_features + int(fit_intercept)),
-                      order='F')
+                      order='F', dtype=X.dtype)
 
     if coef is not None:
         # it must work both giving the bias term and not
@@ -1203,7 +1205,12 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("Tolerance for stopping criteria must be "
                              "positive; got (tol=%r)" % self.tol)
 
-        X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64,
+        if self.solver in ['newton-cg']:
+            _dtype = [np.float64, np.float32]
+        else:
+            _dtype = np.float64
+
+        X, y = check_X_y(X, y, accept_sparse='csr', dtype=_dtype,
                          order="C")
         check_classification_targets(y)
         self.classes_ = np.unique(y)

diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
@@ -1136,3 +1136,25 @@ def test_saga_vs_liblinear():
                 liblinear.fit(X, y)
                 # Convergence for alpha=1e-3 is very slow
                 assert_array_almost_equal(saga.coef_, liblinear.coef_, 3)
+
+
+def test_dtype_match():
+    # Test that np.float32 input data is not cast to np.float64 when possible
+
+    X_32 = np.array(X).astype(np.float32)
+    y_32 = np.array(Y1).astype(np.float32)
+    X_64 = np.array(X).astype(np.float64)
+    y_64 = np.array(Y1).astype(np.float64)
+
+    for solver in ['newton-cg']:
+        for multi_class in ['ovr', 'multinomial']:
+
+            # Check type consistency
+            lr_32 = LogisticRegression(solver=solver, multi_class=multi_class)
+            lr_32.fit(X_32, y_32)
+            assert_equal(lr_32.coef_.dtype, X_32.dtype)
+
+            # Check accuracy consistency
+            lr_64 = LogisticRegression(solver=solver, multi_class=multi_class)
+            lr_64.fit(X_64, y_64)
+            assert_almost_equal(lr_32.coef_, lr_64.coef_.astype(np.float32))
diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py
@@ -41,12 +41,14 @@ def compute_class_weight(class_weight, classes, y):
     # Import error caused by circular imports.
     from ..preprocessing import LabelEncoder
 
+    _dtype = y.dtype
+
     if set(y) - set(classes):
         raise ValueError("classes should include all valid labels that can "
                          "be in y")
     if class_weight is None or len(class_weight) == 0:
         # uniform class weights
-        weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
+        weight = np.ones(classes.shape[0], dtype=_dtype, order='C')
     elif class_weight == 'balanced':
         # Find the weight of each class as present in y.
         le = LabelEncoder()
@@ -55,11 +57,11 @@ def compute_class_weight(class_weight, classes, y):
             raise ValueError("classes should have valid labels that are in y")
 
         recip_freq = len(y) / (len(le.classes_) *
-                               bincount(y_ind).astype(np.float64))
+                               bincount(y_ind).astype(_dtype))
         weight = recip_freq[le.transform(classes)]
     else:
         # user-defined dictionary
-        weight = np.ones(classes.shape[0], dtype=np.float64, order='C')
+        weight = np.ones(classes.shape[0], dtype=_dtype, order='C')
         if not isinstance(class_weight, dict):
             raise ValueError("class_weight must be dict, 'balanced', or None,"
                              " got: %r" % class_weight)