scverse · mbuttner · Nov 7, 2023 · Sep 17, 2023 · Sep 26, 2023 · Sep 26, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
         language_version: python3
         args:
           - --max-line-length=88
-          - --ignore=E203
+          - --ignore=E203,TYP001
         exclude: |
           (?x)(
               __init__.py

diff --git a/docs/examples/03_flowsom.ipynb b/docs/examples/03_flowsom.ipynb
diff --git a/docs/tutorials/preprocessing.ipynb b/docs/tutorials/preprocessing.ipynb
@@ -220,7 +220,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.9.7 ('pyto_dev')",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -234,7 +234,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.10.12"
   },
   "vscode": {
    "interpreter": {
@@ -243,5 +243,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/docs/tutorials/quickstart.ipynb b/docs/tutorials/quickstart.ipynb
@@ -179,7 +179,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3.9.7 ('pyto_dev')",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -193,7 +193,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.18"
+   "version": "3.10.12"
   },
   "vscode": {
    "interpreter": {

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,22 +4,28 @@ build-backend = "flit_core.buildapi"
 
 [project]
 name = "pytometry"
-authors = [{name = "Maren Buettner", email = "maren.buettner@tum.de"}]
+authors = [
+    {name = "Maren Buettner", email = "maren.buettner@tum.de"},
+    {name = "Ross Burton", email = "burtonrossj@gmail.com"}
+]
 readme = "README.md"
 dynamic = ["version"]
 description = "Pytometry is a Python package for flow and mass cytometry analysis."
 requires-python = '>= 3.9'
 dependencies = [
     "nbproject",
-    "numpy",
-    "pandas",
+    "numpy>=1.20.0",
+    "numba>=0.57",
+    "pandas<2.0.0,>=1.5.3",
     "anndata",
     "scanpy",
     "scipy",
     "seaborn",
     "matplotlib",
     "readfcs >=1.1.0",
-    "datashader"
+    "datashader",
+    "consensusclustering",
+    "minisom"
 ]
 
 [project.urls]

diff --git a/pytometry/plotting/_histogram.py b/pytometry/plotting/_histogram.py
@@ -16,27 +16,39 @@ def plotdata(
     adata: AnnData,
     key: str = "signal_type",
     option: str = "area",
-    n_bins: int = 400,
     normalize: Optional[str] = None,
     cofactor: Optional[float] = 10,
     figsize: Tuple[float, float] = (15, 6),
+    bins: int = 400,
+    save: Optional[str] = None,
     n_cols: int = 3,
-    save: str = "",
     **kwargs,
 ):
-    """Creating histogram plot from Anndata object.
+    """Creating histogram plot of channels from Anndata object.
 
-    :param adata: AnnData object containing data.
-    :param key: string value to point to the column var metadata with the
-        signal type to plot (see `option` parameter). Defaults to "signal_type".
-    :param option: Switch to choose directly between area and height data.
-    :param n_bins: int value to control the number of bins per histogram plot
-    :param normalize: choose between "arcsinh", "biExp" and "logicle"
-    :param cofactor: float value to normalize with in arcsinh-transform
-    :param figsize: tuple to control the overall figure size.
-    :param n_cols: int value, number of columns of the plot.
-    :param save: str value, filename to save the shown figure
-    :param kwargs: Passed to :func:`matplotlib.pyplot.savefig`
+    Args:
+        adata (AnnData): Anndata object containing data.
+        key (str):
+            Key in adata.var to plot. Default is 'signal_type' which is generated
+            when calling the preprocessing function `split_signal`.
+        normalize (str):
+            Normalization type. Default is None but can be set to "arcsinh", "biExp"
+            or "logicle"
+        cofactor (float):
+            Cofactor for arcsinh normalization. Default is 10.
+        figsize (tuple):
+            Figure size (width, height). Default is (15, 6).
+        option (str):
+            Switch to choose directly between area and height data. Default is "area".
+        bins (int):
+            Number of bins for the histogram. Default is 400.
+        save (str, optional):
+            Path to save the figure.
+        **kwargs:
+            Additional arguments passed to `matplotlib.pyplot.savefig`
+
+    Returns:
+    matplotlib.pyplot.Figure
     """
     option_key = option
     key_in = key
@@ -88,18 +100,8 @@ def plotdata(
 
     for idx in range(number):
         ax = fig.add_subplot(rows, columns, idx + 1)
-        p0 = sns.histplot(
-            datax[:, names == names[idx]],
-            kde=False,
-            legend=False,
-            # stat="density",
-            bins=n_bins,
-            ax=ax,
-        )
-        p0.set_title(names[idx])
-    plt.subplots_adjust(bottom=0.1)
-    if save != "":
+        sns.histplot(datax[:, names == names[idx]], bins=bins, ax=ax, legend=False)
+        ax.set_xlabel(names[idx])
+    if save:
         plt.savefig(save, bbox_inches="tight", **kwargs)
-    plt.show()
-
-    return
+    return fig
diff --git a/pytometry/plotting/_scatter_density.py b/pytometry/plotting/_scatter_density.py
@@ -24,6 +24,7 @@ def scatter_density(
     x_lim: Optional[Tuple[float, float]] = None,
     y_lim: Optional[Tuple[float, float]] = None,
     ax: Optional[Axes] = None,
+    figsize: Optional[tuple[int, int]] = None,
     cmap: Union[str, List, Colormap] = "jet",
     vmin: Optional[float] = None,
     vmax: Optional[float] = None,
@@ -51,6 +52,8 @@ def scatter_density(
         ax (`matplotlib.Axes`), optional:
             Axes to draw into. If *None*, create a new figure or use ``fignum`` to
             draw into an existing figure.
+        figsize (tuple), optional:
+            Figure size (width, height) if ``ax`` not provided. Defaults to (10, 10).
         cmap (str or list or :class:`matplotlib.colors.Colormap`), optional:
             For scalar aggregates, a matplotlib colormap name or instance.
             Alternatively, an iterable of colors can be passed and will be converted
@@ -61,12 +64,13 @@ def scatter_density(
             range of data in the area displayed, unless the corresponding value is
             already set in the norm.
         layer
-            layer in `adata` to use. If `None`, use `adata.X`.
+            The layer in `adata` to use. If `None`, use `adata.X`.
 
     Returns:
         Scatter plot that displays cell density
     """
-    fig, ax = plt.subplots()
+    figsize = figsize if figsize is not None else (10, 10)
+    ax = plt.subplots(figsize=figsize)[1] if ax is None else ax
     if x_label is None:
         x_label = x
     if y_label is None:

diff --git a/pytometry/read_write/_readfcs.py b/pytometry/read_write/_readfcs.py
@@ -1,5 +1,11 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
 import readfcs
 from anndata import AnnData
+from tqdm.auto import tqdm
 
 
 def read_fcs(path: str, reindex: bool = True) -> AnnData:
@@ -16,3 +22,50 @@ def read_fcs(path: str, reindex: bool = True) -> AnnData:
         an AnnData object of the fcs file
     """
     return readfcs.read(path, reindex=reindex)
+
+
+def read_and_merge(
+    files: str | list[str],
+    sample_ids: list[Any] | None = None,
+    sample_id_from_filename: bool = False,
+    sample_id_index: int = 0,
+    sample_id_sep: str = "_",
+) -> AnnData:
+    """Read and merge multiple FCS files into a single AnnData object.
+
+    Args:
+        files (str | list[str]): either a list of file paths or a directory path
+        sample_ids (list[Any] | None): list of sample ids to use as a column in
+        the AnnData object
+        sample_id_from_filename (bool): whether to use the filename to extract the
+        sample id
+        sample_id_index (int): which index of the filename to use as the sample id,
+        defaults to 0
+        sample_id_sep (str): separator to use when splitting the filename, defaults
+        to "_"
+
+    Returns:
+        AnnData: merged AnnData object
+    """
+    if isinstance(files, str):
+        if Path(files).is_dir():
+            files = [str(f) for f in Path(files).glob("*.fcs")]
+        else:
+            raise ValueError("files must be a list of files or a directory path")
+    elif isinstance(files, list):
+        files = [str(Path(f)) for f in files if Path(f).suffix == ".fcs"]
+
+    if sample_ids is None and sample_id_from_filename:
+        sample_ids = [f.split(sample_id_sep)[sample_id_index] for f in files]
+    else:
+        sample_ids = [None for _ in range(len(files))]
+
+    adata_stack = []
+    for file, file_id in tqdm(
+        zip(files, sample_ids), total=len(files), desc="Loading FCS files"
+    ):
+        adata = read_fcs(file)
+        if file_id is not None:
+            adata.obs["sample"] = file_id
+        adata_stack.append(adata)
+    return AnnData.concatenate(*adata_stack, join="outer", uns_merge="unique")
diff --git a/pytometry/tools/__init__.py b/pytometry/tools/__init__.py
@@ -1 +1,2 @@
 from ._normalization import normalize_arcsinh, normalize_biExp, normalize_logicle
+from .clustering._flowsom import flowsom_clustering, meta_clustering, som_clustering
diff --git a/pytometry/tools/clustering/__init__.py b/pytometry/tools/clustering/__init__.py
@@ -0,0 +1 @@
+from ._flowsom import flowsom_clustering