Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG+1] Use astype(.., copy=False) when possible #11973

Merged
merged 14 commits into from Mar 1, 2019

Address review comments

  • Loading branch information...
rth committed Sep 26, 2018
commit 5b7b7cc684287238c2da71210033bc6309503ce3
@@ -480,7 +480,7 @@ def linkage_tree(X, connectivity=None, n_clusters=None, linkage='complete',

if affinity == 'precomputed':
distances = X[connectivity.row, connectivity.col].astype(
'float64', **_astype_copy_false(X))
'float64', **_astype_copy_false(X))
else:
# FIXME We compute all the distances, while we could have only computed
# the "interesting" distances
@@ -283,7 +283,7 @@ def test_scikit_vs_scipy():

out = hierarchy.linkage(X, method=linkage)

children_ = out[:, :2].astype(np.int)
children_ = out[:, :2].astype(np.int, copy=False)
children, _, n_leaves, _ = _TREE_BUILDERS[linkage](X, connectivity)

# Sort the order of child nodes per row for consistency
@@ -241,7 +241,7 @@ def test_dump():
# where X.astype(dtype) overflows. The result is
# then platform dependent and X_dense.astype(dtype) may be
# different from X_sparse.astype(dtype).asarray().
X_input = X.astype(dtype)
X_input = X.astype(dtype, copy=False)

dump_svmlight_file(X_input, y, f, comment="test",
zero_based=zero_based)
@@ -273,13 +273,13 @@ def test_dump():
assert_array_almost_equal(
X_input_dense, X2_dense, 4)
assert_array_almost_equal(
y_dense.astype(dtype), y2, 4)
y_dense.astype(dtype, copy=False), y2, 4)
else:
# allow a rounding error at the last decimal place
assert_array_almost_equal(
X_input_dense, X2_dense, 15)
assert_array_almost_equal(
y_dense.astype(dtype), y2, 15)
y_dense.astype(dtype, copy=False), y2, 15)


def test_dump_multilabel():
Copy path View file
@@ -352,7 +352,7 @@ def _dense_fit(self, X, strategy, missing_values, fill_value):
most_frequent = np.empty(X.shape[0])

for i, (row, row_mask) in enumerate(zip(X[:], mask[:])):
row_mask = np.logical_not(row_mask).astype(np.bool, copy=False)
row_mask = np.logical_not(row_mask)
row = row[row_mask]
most_frequent[i] = _most_frequent(row, np.nan, 0)

@@ -852,7 +852,7 @@ def normalized_mutual_info_score(labels_true, labels_pred,
classes.shape[0] == clusters.shape[0] == 0):
return 1.0
contingency = contingency_matrix(labels_true, labels_pred, sparse=True)
contingency = contingency.astype(np.float64)
contingency = contingency.astype(np.float64, copy=False)
# Calculate the MI for the two clusterings
mi = mutual_info_score(labels_true, labels_pred,
contingency=contingency)
@@ -856,10 +856,12 @@ def test_confusion_matrix_dtype():
assert_equal(cm.dtype, np.int64)
# The dtype of confusion_matrix is always 64 bit
for dtype in [np.bool_, np.int32, np.uint64]:
cm = confusion_matrix(y, y, sample_weight=weight.astype(dtype))
cm = confusion_matrix(y, y,
sample_weight=weight.astype(dtype, copy=False))
assert_equal(cm.dtype, np.int64)
for dtype in [np.float32, np.float64, None, object]:
cm = confusion_matrix(y, y, sample_weight=weight.astype(dtype))
cm = confusion_matrix(y, y,
sample_weight=weight.astype(dtype, copy=False))
assert_equal(cm.dtype, np.float64)

# np.iinfo(np.uint32).max should be accumulated correctly
@@ -285,7 +285,7 @@ def _dense_fit(self, X, strategy, missing_values, axis):
most_frequent = np.empty(X.shape[0])

for i, (row, row_mask) in enumerate(zip(X[:], mask[:])):
row_mask = np.logical_not(row_mask).astype(np.bool, copy=False)
row_mask = np.logical_not(row_mask)
row = row[row_mask]
most_frequent[i] = _most_frequent(row, np.nan, 0)

ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.