Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FlowSOM #59

Merged
merged 11 commits into from
Nov 7, 2023
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ repos:
language_version: python3
args:
- --max-line-length=88
- --ignore=E203
- --ignore=E203,TYP001
exclude: |
(?x)(
__init__.py
Expand Down
803 changes: 803 additions & 0 deletions docs/examples/03_flowsom.ipynb

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions docs/tutorials/preprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.7 ('pyto_dev')",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -234,7 +234,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
"version": "3.10.12"
},
"vscode": {
"interpreter": {
Expand All @@ -243,5 +243,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
4 changes: 2 additions & 2 deletions docs/tutorials/quickstart.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.9.7 ('pyto_dev')",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -193,7 +193,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.18"
"version": "3.10.12"
},
"vscode": {
"interpreter": {
Expand Down
14 changes: 10 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,28 @@ build-backend = "flit_core.buildapi"

[project]
name = "pytometry"
authors = [{name = "Maren Buettner", email = "maren.buettner@tum.de"}]
authors = [
{name = "Maren Buettner", email = "maren.buettner@tum.de"},
{name = "Ross Burton", email = "burtonrossj@gmail.com"}
]
readme = "README.md"
dynamic = ["version"]
description = "Pytometry is a Python package for flow and mass cytometry analysis."
requires-python = '>= 3.9'
dependencies = [
"nbproject",
"numpy",
"pandas",
"numpy>=1.20.0",
"numba>=0.57",
"pandas<2.0.0,>=1.5.3",
"anndata",
"scanpy",
"scipy",
"seaborn",
"matplotlib",
"readfcs >=1.1.0",
"datashader"
"datashader",
"consensusclustering",
"minisom"
]

[project.urls]
Expand Down
58 changes: 30 additions & 28 deletions pytometry/plotting/_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,39 @@ def plotdata(
adata: AnnData,
key: str = "signal_type",
option: str = "area",
n_bins: int = 400,
normalize: Optional[str] = None,
cofactor: Optional[float] = 10,
figsize: Tuple[float, float] = (15, 6),
bins: int = 400,
save: Optional[str] = None,
n_cols: int = 3,
save: str = "",
**kwargs,
):
"""Creating histogram plot from Anndata object.
"""Creating histogram plot of channels from Anndata object.

:param adata: AnnData object containing data.
:param key: string value to point to the column var metadata with the
signal type to plot (see `option` parameter). Defaults to "signal_type".
:param option: Switch to choose directly between area and height data.
:param n_bins: int value to control the number of bins per histogram plot
:param normalize: choose between "arcsinh", "biExp" and "logicle"
:param cofactor: float value to normalize with in arcsinh-transform
:param figsize: tuple to control the overall figure size.
:param n_cols: int value, number of columns of the plot.
:param save: str value, filename to save the shown figure
:param kwargs: Passed to :func:`matplotlib.pyplot.savefig`
Args:
adata (AnnData): Anndata object containing data.
key (str):
Key in adata.var to plot. Default is 'signal_type' which is generated
when calling the preprocessing function `split_signal`.
normalize (str):
Normalization type. Default is None but can be set to "arcsinh", "biExp"
or "logicle"
cofactor (float):
Cofactor for arcsinh normalization. Default is 10.
figsize (tuple):
Figure size (width, height). Default is (15, 6).
option (str):
Switch to choose directly between area and height data. Default is "area".
bins (int):
Number of bins for the histogram. Default is 400.
save (str, optional):
Path to save the figure.
**kwargs:
Additional arguments passed to `matplotlib.pyplot.savefig`

Returns:
matplotlib.pyplot.Figure
"""
option_key = option
key_in = key
Expand Down Expand Up @@ -88,18 +100,8 @@ def plotdata(

for idx in range(number):
ax = fig.add_subplot(rows, columns, idx + 1)
p0 = sns.histplot(
datax[:, names == names[idx]],
kde=False,
legend=False,
# stat="density",
bins=n_bins,
ax=ax,
)
p0.set_title(names[idx])
plt.subplots_adjust(bottom=0.1)
if save != "":
sns.histplot(datax[:, names == names[idx]], bins=bins, ax=ax, legend=False)
ax.set_xlabel(names[idx])
if save:
plt.savefig(save, bbox_inches="tight", **kwargs)
plt.show()

return
return fig
8 changes: 6 additions & 2 deletions pytometry/plotting/_scatter_density.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def scatter_density(
x_lim: Optional[Tuple[float, float]] = None,
y_lim: Optional[Tuple[float, float]] = None,
ax: Optional[Axes] = None,
figsize: Optional[tuple[int, int]] = None,
cmap: Union[str, List, Colormap] = "jet",
vmin: Optional[float] = None,
vmax: Optional[float] = None,
Expand Down Expand Up @@ -51,6 +52,8 @@ def scatter_density(
ax (`matplotlib.Axes`), optional:
Axes to draw into. If *None*, create a new figure or use ``fignum`` to
draw into an existing figure.
figsize (tuple), optional:
Figure size (width, height) if ``ax`` not provided. Defaults to (10, 10).
cmap (str or list or :class:`matplotlib.colors.Colormap`), optional:
For scalar aggregates, a matplotlib colormap name or instance.
Alternatively, an iterable of colors can be passed and will be converted
Expand All @@ -61,12 +64,13 @@ def scatter_density(
range of data in the area displayed, unless the corresponding value is
already set in the norm.
layer
layer in `adata` to use. If `None`, use `adata.X`.
The layer in `adata` to use. If `None`, use `adata.X`.

Returns:
Scatter plot that displays cell density
"""
fig, ax = plt.subplots()
figsize = figsize if figsize is not None else (10, 10)
ax = plt.subplots(figsize=figsize)[1] if ax is None else ax
if x_label is None:
x_label = x
if y_label is None:
Expand Down
53 changes: 53 additions & 0 deletions pytometry/read_write/_readfcs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from __future__ import annotations

from pathlib import Path
from typing import Any

import readfcs
from anndata import AnnData
from tqdm.auto import tqdm


def read_fcs(path: str, reindex: bool = True) -> AnnData:
Expand All @@ -16,3 +22,50 @@ def read_fcs(path: str, reindex: bool = True) -> AnnData:
an AnnData object of the fcs file
"""
return readfcs.read(path, reindex=reindex)


def read_and_merge(
files: str | list[str],
sample_ids: list[Any] | None = None,
sample_id_from_filename: bool = False,
sample_id_index: int = 0,
sample_id_sep: str = "_",
) -> AnnData:
"""Read and merge multiple FCS files into a single AnnData object.

Args:
files (str | list[str]): either a list of file paths or a directory path
sample_ids (list[Any] | None): list of sample ids to use as a column in
the AnnData object
sample_id_from_filename (bool): whether to use the filename to extract the
sample id
sample_id_index (int): which index of the filename to use as the sample id,
defaults to 0
sample_id_sep (str): separator to use when splitting the filename, defaults
to "_"

Returns:
AnnData: merged AnnData object
"""
if isinstance(files, str):
if Path(files).is_dir():
files = [str(f) for f in Path(files).glob("*.fcs")]
else:
raise ValueError("files must be a list of files or a directory path")
elif isinstance(files, list):
files = [str(Path(f)) for f in files if Path(f).suffix == ".fcs"]

if sample_ids is None and sample_id_from_filename:
sample_ids = [f.split(sample_id_sep)[sample_id_index] for f in files]
else:
sample_ids = [None for _ in range(len(files))]

adata_stack = []
for file, file_id in tqdm(
zip(files, sample_ids), total=len(files), desc="Loading FCS files"
):
adata = read_fcs(file)
if file_id is not None:
adata.obs["sample"] = file_id
adata_stack.append(adata)
return AnnData.concatenate(*adata_stack, join="outer", uns_merge="unique")
1 change: 1 addition & 0 deletions pytometry/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from ._normalization import normalize_arcsinh, normalize_biExp, normalize_logicle
from .clustering._flowsom import flowsom_clustering, meta_clustering, som_clustering
1 change: 1 addition & 0 deletions pytometry/tools/clustering/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ._flowsom import flowsom_clustering
Loading
Loading