Skip to content

Commit

Permalink
Remove use of AnnData constructor dtype kwarg (#2658) (#2659)
Browse files Browse the repository at this point in the history
* Remove use of AnnData constructor dtype kwarg (#2658)

* Remove use of AnnData constructor dtype kwarg

* release note

* Fix release note

(cherry picked from commit 0b49eeb)

* Fix test

* Set release date

* Add release notes to index
  • Loading branch information
ivirshup committed Sep 8, 2023
1 parent efce8f8 commit 46969b4
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 26 deletions.
6 changes: 6 additions & 0 deletions docs/release-notes/1.9.5.md
@@ -0,0 +1,6 @@
### 1.9.5 {small}`2023-09-08`

```{rubric} Bug fixes
```

- Remove use of deprecated `dtype` argument to AnnData constructor {pr}`2658` {smaller}`Isaac Virshup`
3 changes: 3 additions & 0 deletions docs/release-notes/release-latest.md
@@ -1,5 +1,8 @@
## Version 1.9

```{include} /release-notes/1.9.5.md
```

```{include} /release-notes/1.9.4.md
```

Expand Down
6 changes: 3 additions & 3 deletions scanpy/datasets/_datasets.py
Expand Up @@ -49,7 +49,7 @@ def blobs(
cluster_std=cluster_std,
random_state=0,
)
return ad.AnnData(X, obs=dict(blobs=y.astype(str)), dtype=X.dtype)
return ad.AnnData(X, obs=dict(blobs=y.astype(str)))


@check_datasetdir_exists
Expand Down Expand Up @@ -172,13 +172,13 @@ def paul15() -> ad.AnnData:
backup_url = 'http://falexwolf.de/data/paul15.h5'
_utils.check_presence_download(filename, backup_url)
with h5py.File(filename, 'r') as f:
X = f['data.debatched'][()]
X = f['data.debatched'][()].astype(np.float32)
gene_names = f['data.debatched_rownames'][()].astype(str)
cell_names = f['data.debatched_colnames'][()].astype(str)
clusters = f['cluster.id'][()].flatten().astype(int)
infogenes_names = f['info.genes_strings'][()].astype(str)
# each row has to correspond to a observation, therefore transpose
adata = ad.AnnData(X.transpose(), dtype=np.float32)
adata = ad.AnnData(X.transpose())
adata.var_names = gene_names
adata.row_names = cell_names
# names reflecting the cell type identifications from the paper
Expand Down
2 changes: 1 addition & 1 deletion scanpy/testing/_helpers/__init__.py
Expand Up @@ -21,7 +21,7 @@

def check_rep_mutation(func, X, *, fields=("layer", "obsm"), **kwargs):
"""Check that only the array meant to be modified is modified."""
adata = sc.AnnData(X=X.copy(), dtype=X.dtype)
adata = sc.AnnData(X=X.copy())
for field in fields:
sc.get._set_obs_rep(adata, X, **{field: field})
X_array = asarray(X)
Expand Down
9 changes: 3 additions & 6 deletions scanpy/tests/test_get.py
Expand Up @@ -37,15 +37,14 @@ def adata():
adata.layers['double'] is sparse np.ones((2,2)) * 2 to also test sparse matrices
"""
return AnnData(
X=np.ones((2, 2)),
X=np.ones((2, 2), dtype=int),
obs=pd.DataFrame(
{"obs1": [0, 1], "obs2": ["a", "b"]}, index=["cell1", "cell2"]
),
var=pd.DataFrame(
{"gene_symbols": ["genesymbol1", "genesymbol2"]}, index=["gene1", "gene2"]
),
layers={"double": sparse.csr_matrix(np.ones((2, 2)), dtype=int) * 2},
dtype=int,
)


Expand All @@ -60,12 +59,11 @@ def test_obs_df(adata):

# make raw with different genes than adata
adata.raw = AnnData(
X=np.array([[1, 2, 3], [2, 4, 6]]),
X=np.array([[1, 2, 3], [2, 4, 6]], dtype=np.float64),
var=pd.DataFrame(
{"gene_symbols": ["raw1", "raw2", 'raw3']},
index=["gene2", "gene3", "gene4"],
),
dtype='float64',
)
pd.testing.assert_frame_equal(
sc.get.obs_df(
Expand Down Expand Up @@ -157,9 +155,8 @@ def test_repeated_gene_symbols():
gene_symbols = [f"symbol_{i}" for i in ["a", "b", "b", "c"]]
var_names = pd.Index([f"id_{i}" for i in ["a", "b.1", "b.2", "c"]])
adata = sc.AnnData(
np.arange(3 * 4).reshape((3, 4)),
np.arange(3 * 4, dtype=np.float32).reshape((3, 4)),
var=pd.DataFrame({"gene_symbols": gene_symbols}, index=var_names),
dtype=np.float32,
)

with pytest.raises(KeyError, match="symbol_b"):
Expand Down
2 changes: 1 addition & 1 deletion scanpy/tests/test_highly_variable_genes.py
Expand Up @@ -483,7 +483,7 @@ def test_seurat_v3_mean_var_output_with_batchkey():

def test_cellranger_n_top_genes_warning():
X = np.random.poisson(2, (100, 30))
adata = sc.AnnData(X, dtype=X.dtype)
adata = sc.AnnData(X)
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)

Expand Down
16 changes: 8 additions & 8 deletions scanpy/tests/test_normalization.py
Expand Up @@ -37,14 +37,14 @@ def typ(request):


@pytest.mark.parametrize('dtype', ['float32', 'int64'])
def test_normalize_total(typ, dtype):
adata = AnnData(typ(X_total), dtype=dtype)
def test_normalize_total(array_type, dtype):
adata = AnnData(array_type(X_total).astype(dtype))
sc.pp.normalize_total(adata, key_added='n_counts')
assert np.allclose(np.ravel(adata.X.sum(axis=1)), [3.0, 3.0, 3.0])
sc.pp.normalize_total(adata, target_sum=1, key_added='n_counts2')
assert np.allclose(np.ravel(adata.X.sum(axis=1)), [1.0, 1.0, 1.0])

adata = AnnData(typ(X_frac), dtype=dtype)
adata = AnnData(array_type(X_frac).astype(dtype))
sc.pp.normalize_total(adata, exclude_highly_expressed=True, max_fraction=0.7)
assert np.allclose(np.ravel(adata.X[:, 1:3].sum(axis=1)), [1.0, 1.0, 1.0])

Expand All @@ -59,17 +59,17 @@ def test_normalize_total_rep(typ, dtype):


@pytest.mark.parametrize('dtype', ['float32', 'int64'])
def test_normalize_total_layers(typ, dtype):
adata = AnnData(typ(X_total), dtype=dtype)
def test_normalize_total_layers(array_type, dtype):
adata = AnnData(array_type(X_total).astype(dtype))
adata.layers["layer"] = adata.X.copy()
with pytest.warns(FutureWarning, match=r".*layers.*deprecated"):
sc.pp.normalize_total(adata, layers=["layer"])
assert np.allclose(adata.layers["layer"].sum(axis=1), [3.0, 3.0, 3.0])


@pytest.mark.parametrize('dtype', ['float32', 'int64'])
def test_normalize_total_view(typ, dtype):
adata = AnnData(typ(X_total), dtype=dtype)
def test_normalize_total_view(array_type, dtype):
adata = AnnData(array_type(X_total).astype(dtype))
v = adata[:, :]

sc.pp.normalize_total(v)
Expand Down Expand Up @@ -127,7 +127,7 @@ def test_normalize_pearson_residuals_values(sparsity_func, dtype, theta, clip):
residuals_reference = (X - mu) / np.sqrt(mu + mu**2 / theta)

# compute output to test
adata = AnnData(sparsity_func(X), dtype=dtype)
adata = AnnData(sparsity_func(X).astype(dtype))
output = sc.experimental.pp.normalize_pearson_residuals(
adata, theta=theta, clip=clip, inplace=False
)
Expand Down
8 changes: 4 additions & 4 deletions scanpy/tests/test_preprocessing.py
Expand Up @@ -147,7 +147,7 @@ def test_scale_array(count_matrix_format, zero_center):
Test that running sc.pp.scale on an anndata object and an array returns the same results.
"""
X = count_matrix_format(sp.random(100, 200, density=0.3).toarray())
adata = sc.AnnData(X=X.copy(), dtype=np.float64)
adata = sc.AnnData(X=X.copy().astype(np.float64))

sc.pp.scale(adata, zero_center=zero_center)
scaled_X = sc.pp.scale(X, zero_center=zero_center, copy=True)
Expand Down Expand Up @@ -254,7 +254,7 @@ def test_downsample_counts_per_cell(count_matrix_format, replace, dtype):
TARGET = 1000
X = np.random.randint(0, 100, (1000, 100)) * np.random.binomial(1, 0.3, (1000, 100))
X = X.astype(dtype)
adata = AnnData(X=count_matrix_format(X), dtype=dtype)
adata = AnnData(X=count_matrix_format(X).astype(dtype))
with pytest.raises(ValueError):
sc.pp.downsample_counts(
adata, counts_per_cell=TARGET, total_counts=TARGET, replace=replace
Expand Down Expand Up @@ -286,7 +286,7 @@ def test_downsample_counts_per_cell_multiple_targets(
TARGETS = np.random.randint(500, 1500, 1000)
X = np.random.randint(0, 100, (1000, 100)) * np.random.binomial(1, 0.3, (1000, 100))
X = X.astype(dtype)
adata = AnnData(X=count_matrix_format(X), dtype=dtype)
adata = AnnData(X=count_matrix_format(X).astype(dtype))
initial_totals = np.ravel(adata.X.sum(axis=1))
with pytest.raises(ValueError):
sc.pp.downsample_counts(adata, counts_per_cell=[40, 10], replace=replace)
Expand All @@ -312,7 +312,7 @@ def test_downsample_counts_per_cell_multiple_targets(
def test_downsample_total_counts(count_matrix_format, replace, dtype):
X = np.random.randint(0, 100, (1000, 100)) * np.random.binomial(1, 0.3, (1000, 100))
X = X.astype(dtype)
adata_orig = AnnData(X=count_matrix_format(X), dtype=dtype)
adata_orig = AnnData(X=count_matrix_format(X))
total = X.sum()
target = np.floor_divide(total, 10)
initial_totals = np.ravel(adata_orig.X.sum(axis=1))
Expand Down
6 changes: 3 additions & 3 deletions scanpy/tests/test_scaling.py
Expand Up @@ -28,15 +28,15 @@
def test_scale(typ, dtype):
# test AnnData arguments
# test scaling with default zero_center == True
adata0 = AnnData(typ(X), dtype=dtype)
adata0 = AnnData(typ(X).astype(dtype))
sc.pp.scale(adata0)
assert np.allclose(csr_matrix(adata0.X).toarray(), X_centered)
# test scaling with explicit zero_center == True
adata1 = AnnData(typ(X), dtype=dtype)
adata1 = AnnData(typ(X).astype(dtype))
sc.pp.scale(adata1, zero_center=True)
assert np.allclose(csr_matrix(adata1.X).toarray(), X_centered)
# test scaling with explicit zero_center == False
adata2 = AnnData(typ(X), dtype=dtype)
adata2 = AnnData(typ(X).astype(dtype))
sc.pp.scale(adata2, zero_center=False)
assert np.allclose(csr_matrix(adata2.X).toarray(), X_scaled)
# test bare count arguments, for simplicity only with explicit copy=True
Expand Down

0 comments on commit 46969b4

Please sign in to comment.