Skip to content

Commit

Permalink
Fix #20.
Browse files Browse the repository at this point in the history
  • Loading branch information
grst committed Mar 30, 2020
1 parent 22babe4 commit 8ce1265
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 72 deletions.
1 change: 1 addition & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ Tools: `tl`
clonotype_network
chain_pairing
clip_and_count
clonal_expansion
alpha_diversity
group_abundance
spectratype
Expand Down
43 changes: 36 additions & 7 deletions docs/tutorials/tutorial_3k_tcr.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ sc.pl.umap(adata, color=["sample", "patient", "cluster", "CD8A", "CD4", "FOXP3"]
## TCR Quality Control

<!-- #raw raw_mimetype="text/restructuredtext" -->
WWhile most of T cell receptors have exactly one pair of α and β chains, up to one third of
While most of T cell receptors have exactly one pair of α and β chains, up to one third of
T cells can have *dual TCRs*, i.e. two pairs of receptors originating from different alleles (:cite:`Schuldt2019`).

Using the :func:`scirpy.tl.chain_pairing` function, we can add a summary
Expand Down Expand Up @@ -261,15 +261,19 @@ ir.tl.clonotype_network(adata, min_size=2)
ir.pl.clonotype_network(adata, color="clonotype", legend_loc="none")
```

Let's re-compute the network with a `cutoff` of `20`.
That's the equivalent of 4 `R`s mutating into `N` (using the BLOSUM62 distance matrix).
Let's re-compute the network with a `cutoff` of `15`.
That's the equivalent of 3 `R`s mutating into `N` (using the BLOSUM62 distance matrix).

Additionally, we set `chains` to `all`. This results in the distances not being only
computed between the most abundant pair of T-cell receptors, but instead, will
take the minimal distance between any pair of T-cell receptors.

```python
ir.pp.tcr_neighbors(adata, cutoff=20, chains="all")
sc.settings.verbosity = 4
```

```python
ir.pp.tcr_neighbors(adata, cutoff=15, chains="all")
ir.tl.define_clonotypes(adata)
```

Expand All @@ -294,18 +298,37 @@ it is shared across tissues and/or patients.
ir.pl.clonotype_network(adata, color="sample")
```

Next, visualize the clonal expansion by cell-type cluster
## Clonotype analysis

Let's visualize the number of expanded clonotypes (i.e. clonotypes consisting
of more than one cell) by cell-type:

```python
ir.tl.clip_and_count(adata, groupby="cluster", target_col="clonotype")
```

```python
sc.pl.umap(adata, color=["clonotype_clipped_count", "cluster"])
```

```python
ir.pl.clip_and_count(adata, groupby="cluster", target_col="clonotype", fraction=True)
```

```python
ir.pl.clonal_expansion(adata, groupby="cluster", clip_at=4, fraction=False)
```

Normalized to the cluster size
Normalized to the cluster size:

```python
ir.pl.clonal_expansion(adata, "cluster")
```

Expectedly, the CD8+ effector T cells have the largest fraction of expanded clonotypes.

Consistent with this observation, they have the lowest alpha diversity:

```python
ir.pl.alpha_diversity(adata, groupby="cluster")
```
Expand All @@ -314,7 +337,13 @@ ir.pl.alpha_diversity(adata, groupby="cluster")

```python
ir.pl.group_abundance(
adata, groupby="clonotype", target_col="cluster", max_cols=10, fraction=False
adata, groupby="clonotype", target_col="cluster", max_cols=10, fraction="patient"
)
```

```python
ir.pl.group_abundance(
adata, groupby="clonotype", target_col="patient", max_cols=10, fraction="patient"
)
```

Expand Down
41 changes: 35 additions & 6 deletions scirpy/_plotting/_clip_and_count.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,35 @@
from .. import tl
from anndata import AnnData
from . import base
from .._util import _normalize_counts, _is_na


def _prepare_df(adata, groupby, target_col, clip_at, fraction):
"""Turn the result of the `clip_and_count` tool into a plottable
dataframe"""
tmp_col = target_col + "clipped_count"
tmp_col_weight = target_col + "weight"

obs = adata.obs.loc[:, [groupby, target_col]]
obs[tmp_col] = tl.clip_and_count(
adata, target_col, groupby=groupby, clip_at=clip_at, inplace=False
)
# filter NA values
obs = obs.loc[~_is_na(obs[target_col]), :]

# add normalization vector
size_vector = _normalize_counts(obs, fraction, groupby)
obs[tmp_col_weight] = size_vector

obs = (
obs.groupby([groupby, tmp_col], observed=True)[tmp_col_weight]
.sum()
.reset_index()
.pivot(index=groupby, columns=tmp_col, values=tmp_col_weight)
.fillna(0)
)

return obs


def clip_and_count(
Expand All @@ -12,8 +41,10 @@ def clip_and_count(
fraction: bool = True,
**kwargs,
):
"""Plots the the number of identical entries in `target_col`
for each group in `group_by`.
"""Plots the the *number of cells* in `target_col` that fall into
a certain count bin for each group in `group_by`.
Removes all entries with `NaN` in `target_col` prior to plotting.
Parameters
----------
Expand All @@ -32,11 +63,9 @@ def clip_and_count(
**kwargs
Additional arguments passed to :meth:`base.bar`
"""
res = tl.clip_and_count(
adata, groupby, target_col, clip_at=clip_at, fraction=fraction
)
plot_df = _prepare_df(adata, groupby, target_col, clip_at, fraction)

return base.bar(res, **kwargs)
return base.bar(plot_df, **kwargs)


def clonal_expansion(
Expand Down
2 changes: 1 addition & 1 deletion scirpy/_tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ._chain_pairing import chain_pairing
from ._clip_and_count import clip_and_count
from ._clip_and_count import clip_and_count, clonal_expansion
from ._diversity import alpha_diversity
from ._group_abundance import group_abundance
from ._spectratype import spectratype
Expand Down
106 changes: 78 additions & 28 deletions scirpy/_tools/_clip_and_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,59 +3,109 @@
from .._util import _is_na
import numpy as np
import pandas as pd
from typing import Union


def clip_and_count(
adata: AnnData,
groupby: str,
target_col: str,
*,
groupby: Union[str, None] = None,
clip_at: int = 3,
inplace: bool = True,
key_added: Union[str, None] = None,
fraction: bool = True,
) -> Dict:
) -> Union[None, np.ndarray]:
"""Counts the number of identical entries in `target_col`
for each group in `group_by`.
Ignores NaN values.
Counts NaN values like any other value in `target_col`.
Parameters
----------
adata
AnnData object to work on
groupby
Group by this column from `obs`
target_col
Column to count on.
groupby
Calculate counts within groups (e.g. sample or patient)
clip_at
All entries in `target_col` with more copies than `clip_at`
will be summarized into a single group.
fraction
If True, compute fractions rather than reporting
abosolute numbers.
inplace
If True, adds a column to `obs`. Otherwise returns an array
with the clipped counts.
key_added
Key under which the results will be stored in `obs`. Defaults
to `{target_col}_clipped_count`
Returns
-------
Dictionary with counts/fractions per group
Depending on the value of inplace, adds a column to adata or returns
an array with the clipped count per cell.
"""
if target_col not in adata.obs.columns:
raise ValueError("`target_col` not found in obs.")
# count abundance of each clonotype
tcr_obs = adata.obs.loc[~_is_na(adata.obs[target_col]), :]

groupby = [groupby] if isinstance(groupby, str) else groupby
groupby_cols = [target_col] if groupby is None else groupby + [target_col]
clonotype_counts = (
tcr_obs.groupby([groupby, target_col]).size().reset_index(name="count")
adata.obs.groupby(groupby_cols)
.size()
.reset_index(name="tmp_count")
.assign(
tmp_count=lambda X: [
">= {}".format(min(n, clip_at)) if n >= clip_at else str(n)
for n in X["tmp_count"].values
]
)
)
clipped_count = adata.obs.merge(clonotype_counts, how="left", on=groupby_cols)[
"tmp_count"
].values

if inplace:
key_added = (
"{}_clipped_count".format(target_col) if key_added is None else key_added
)
adata.obs[key_added] = clipped_count
else:
return clipped_count


def clonal_expansion(
adata: AnnData,
target_col: str = "clonotype",
*,
groupby: Union[str, None] = None,
clip_at: int = 3,
key_added: str = "clonal_expansion",
**kwargs,
) -> Union[None, np.ndarray]:
"""Adds a column to obs which clonotypes are expanded.
Parameters
----------
adata
Annoated data matrix
target_col
Column containing the clontype annoataion
groupby
Calculate clonal expansion within groups. Usually makes sense to set
this to the column containing sample annotation.
clip_at:
All clonotypes with more than `clip_at` clones will be summarized into
a single category
key_added
Key under which the results will be added to `obs`.
kwargs
Additional arguments passed to :func:`scirpy.tl.clip_and_count`.
"""
return clip_and_count(
adata,
target_col,
groupby=groupby,
clip_at=clip_at,
key_added=key_added,
**kwargs,
)
clonotype_counts.loc[clonotype_counts["count"] >= clip_at, "count"] = clip_at

result_dict = dict()
for group in clonotype_counts[groupby].unique():
result_dict[group] = dict()
for n in range(1, clip_at + 1):
label = ">= {}".format(n) if n == clip_at else str(n)
mask_group = clonotype_counts[groupby] == group
mask_count = clonotype_counts["count"] == n
tmp_count = np.sum(mask_group & mask_count)
if fraction:
tmp_count /= np.sum(mask_group)
result_dict[group][label] = tmp_count

return pd.DataFrame.from_dict(result_dict, orient="index")
28 changes: 27 additions & 1 deletion tests/test_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,36 @@
adata_vdj,
adata_clonotype_network,
)
import scirpy._plotting as irplt
import matplotlib.pyplot as plt
import numpy.testing as npt
import pandas.testing as pdt
import numpy as np


def test_clip_and_count(adata_tra):
def test_clip_and_count(adata_tra, adata_clonotype):
res_fraction = irplt._clip_and_count._prepare_df(
adata_clonotype, "group", "clonotype", 2, True
)
res_counts = irplt._clip_and_count._prepare_df(
adata_clonotype, "group", "clonotype", 2, False
)
pdt.assert_frame_equal(
res_fraction,
pd.DataFrame.from_dict(
{"group": ["A", "B"], "1": [0, 3 / 5], ">= 2": [1.0, 2 / 5]}
).set_index("group"),
check_names=False,
)
pdt.assert_frame_equal(
res_counts,
pd.DataFrame.from_dict(
{"group": ["A", "B"], "1": [0, 3], ">= 2": [3, 2]}
).set_index("group"),
check_names=False,
check_dtype=False,
)

p = pl.clip_and_count(
adata_tra, target_col="TRA_1_cdr3", groupby="sample", fraction=False
)
Expand All @@ -41,10 +65,12 @@ def test_spectratype(adata_tra):
p = pl.spectratype(adata_tra, target_col="sample")
assert isinstance(p, plt.Axes)


def test_vdj_usage(adata_vdj):
p = pl.vdj_usage(adata_vdj, fraction="sample")
assert isinstance(p, plt.Axes)


def test_clonotype_network(adata_clonotype_network):
p = pl.clonotype_network(adata_clonotype_network)
assert isinstance(p[0], plt.Axes)
Expand Down
Loading

0 comments on commit 8ce1265

Please sign in to comment.