diff --git a/CHANGELOG.md b/CHANGELOG.md
index fab4c694497..c125724f07b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -38,7 +38,11 @@
 
 * `reference/cellranger_mkgtf`: Rename `reference/mkgtf` to `reference/cellranger_mkgtf` (PR #846).
 
-* `interactive/run_cellxgene` and `interactive/run_cirrocumulus` were removed (PR #796).
+* `labels_transfer/xgboost`: Align interface with new annotation workflow
+  - Store label probabilities instead of uncertainties
+  - Take `.h5mu` format as an input instead of `.h5ad`
+
+* `labels_transfer/knn`: delete outdated component due to its functionality now implemented in `labels_transfer/pynndescent_knn`
 
 * `reference/build_cellranger_arc_reference`: a default value of "output" is now specified for the argument `--genome`, inline with `reference/build_cellranger_reference` component. Additionally, providing a value for `--organism` is no longer required and its default value of `Homo Sapiens` has been removed (PR #864).
 
@@ -83,6 +87,8 @@
   
 * `dataflow/split_h5mu` component: Added a component to split a single h5mu file into multiple h5mu files based on the values of an .obs column (PR #824).
 
+* `labels_transfer/pynndescent_knn`: component: Added a component for KNN classification based on a PyNNDescent neighborhood graph (PR #830).
+
 * `workflows/test_workflows/ingestion` components & `workflows/ingestion`: Added standalone components for integration testing of ingestion workflows (PR #801). 
 
 * `workflows/ingestion/make_reference`: Add additional arguments passed through to the STAR and BD Rhapsody reference components (PR #846).
diff --git a/src/labels_transfer/api/common_arguments.yaml b/src/labels_transfer/api/common_arguments.yaml
index 567aa0ec3b4..5f9514ca099 100644
--- a/src/labels_transfer/api/common_arguments.yaml
+++ b/src/labels_transfer/api/common_arguments.yaml
@@ -1,4 +1,3 @@
-
   argument_groups:
     - name: Input dataset (query) arguments
       arguments:
@@ -25,7 +24,7 @@
                     obsm:
                       - type: "double"
                         name: "features"
-                        example: X_integrated_scanvi
+                        example: X_scvi
                         required: false
                         description: |
                           The embedding to use for the classifier's inference. Override using the `--input_obsm_features` argument. If not provided, the `.X` slot will be used instead.
@@ -41,52 +40,59 @@
           description: |
             The `.obsm` key of the embedding to use for the classifier's inference. If not provided, the `.X` slot will be used instead.
             Make sure that embedding was obtained in the same way as the reference embedding (e.g. by the same model or preprocessing).
-          example: X_integrated_scanvi
+          example: X_scvi
+
     - name: Reference dataset arguments
       arguments:
         - name: "--reference"
           type: "file"
           required: false
           description: "The reference data to train classifiers on."
-          example: https://zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad
+          example: reference.h5mu
           info:
             label: "Reference"
             file_format:
-              type: h5ad
-              X:
-                type: double
-                name: features
-                required: false
-                description: |
-                  The expression data to use for the classifier's training, if `--input_obsm_features` argument is not provided.
-              obsm:
-                - type: "double"
-                  name: "features"
-                  example: X_integrated_scanvi
-                  description: |
-                    The embedding to use for the classifier's training. Override using the `--reference_obsm_features` argument.
-                    Make sure that embedding was obtained in the same way as the query embedding (e.g. by the same model or preprocessing).
-                  required: true
-              obs:
-                - type: "string"
-                  name: "targets"
-                  multiple: true
-                  example: [ ann_level_1, ann_level_2, ann_level_3, ann_level_4, ann_level_5, ann_finest_level ]
-                  description: "The target labels to transfer. Override using the `--reference_obs_targets` argument."
+              type: h5mu
+              mod:
+                rna:
+                  description: "Modality in AnnData format containing RNA data."
                   required: true
+                  slots:
+                    X:
+                      type: double
+                      name: features
+                      required: false
+                      description: |
+                        The expression data to use for the classifier's training, if `--input_obsm_features` argument is not provided.
+                    obsm:
+                      - type: "double"
+                        name: "features"
+                        example: X_scvi
+                        description: |
+                          The embedding to use for the classifier's training. Override using the `--reference_obsm_features` argument.
+                          Make sure that embedding was obtained in the same way as the query embedding (e.g. by the same model or preprocessing).
+                        required: true
+                    obs:
+                      - type: "string"
+                        name: "targets"
+                        multiple: true
+                        example: [ ann_level_1, ann_level_2, ann_level_3, ann_level_4, ann_level_5, ann_finest_level ]
+                        description: "The target labels to transfer. Override using the `--reference_obs_targets` argument."
+                        required: true
         - name: "--reference_obsm_features"
           type: string
-          required: true
+          required: false
           description: |
-            The `.obsm` key of the embedding to use for the classifier's training.
+            The `.obsm` key of the embedding to use for the classifier's training. If not provided, the `.X` slot will be used instead.
             Make sure that embedding was obtained in the same way as the query embedding (e.g. by the same model or preprocessing).
-          default: X_integrated_scanvi
+          example: X_scvi
         - name: "--reference_obs_targets"
           type: string
           default: [ ann_level_1, ann_level_2, ann_level_3, ann_level_4, ann_level_5, ann_finest_level ]
           required: false
           multiple: true
-          description: The `.obs` key of the target labels to tranfer.
+          description: The `.obs` key(s) of the target labels to tranfer.
+
     - name: Outputs
       arguments:
         - name: "--output"
@@ -108,22 +114,16 @@
                       description: "The predicted labels. Override using the `--output_obs_predictions` argument."
                       required: true
                     - type: "double"
-                      name: "uncertainty"
-                      description: "The uncertainty of the predicted labels. Override using the `--output_obs_uncertainty` argument."
+                      name: "probability"
+                      description: "The probability of the predicted labels. Override using the `--output_obs_probability` argument."
                       required: false
                   obsm:
                     - type: "double"
-                      name: "X_integrated_scanvi"
+                      name: "X_scvi"
                       description: 
                         The embedding used for the classifier's inference. Could have any name,
                         specified by `input_obsm_features` argument."
                       required: false
-                  uns:
-                    - type: "string"
-                      name: "parameters"
-                      example: labels_tranfer
-                      description: Additional information about the parameters used for the label transfer.
-                      required: true
         - name: "--output_obs_predictions"
           type: string
           example: 
@@ -133,17 +133,19 @@
             In which `.obs` slots to store the predicted information.
             If provided, must have the same length as `--reference_obs_targets`.
             If empty, will default to the `reference_obs_targets` combined with the `"_pred"` suffix.
-        - name: "--output_obs_uncertainty"
+        - name: "--output_obs_probability"
           type: string
           example: 
           required: false
           multiple: true
           description: |
-            In which `.obs` slots to store the uncertainty of the predictions.
+            In which `.obs` slots to store the probability of the predictions.
             If provided, must have the same length as `--reference_obs_targets`.
-            If empty, will default to the `reference_obs_targets` combined with the `"_uncertainty"` suffix.
-        - name: "--output_uns_parameters"
+            If empty, will default to the `reference_obs_targets` combined with the `"_probability"` suffix.
+        - name: "--output_compression"
           type: string
-          default: "labels_transfer"
+          description: |
+            The compression format to be used on the output h5mu object.
+          choices: ["gzip", "lzf"]
           required: false
-          description: "The `.uns` key to store additional information about the parameters used for the label transfer."
\ No newline at end of file
+          example: "gzip"
diff --git a/src/labels_transfer/knn/README.md b/src/labels_transfer/knn/README.md
deleted file mode 100644
index 44d9209d2c0..00000000000
--- a/src/labels_transfer/knn/README.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# KNN Labels Transfer
-
-This component transfers labels from a reference dataset to an query using a weighted K-Nearest Neighbors classification approach based on aproximate neighbors search of [PyNNDescent](https://pynndescent.readthedocs.io/en/latest/how_to_use_pynndescent.html).
-
-# Input
-
-- `.h5mu` files for reference and query as specified in [config](./config.vsh.yaml)
-- List of targets (labels) to predict from `.obs` slot of the reference. Common choice for HLCA would be `["ann_level_1", "ann_level_2", "ann_level_3", "ann_level_4", "ann_level_5", "ann_finest_level"]`
-- Number of nearest neighbors to use for prediction. We recommend using 30-50 neighbors for HLCA size dataset
-
-For the description of other input arguments, refer to the [config](./config.vsh.yaml).
-
-# Output
-
-`.h5mu` file with the query dataset that contains the transferred annotations
-- The annotations for each of the `targets` will be stored as new columns in the `.obs` data frame with the suffix `_pred` by default
-- The uncertainty of the prediction for each target will be stored as another column in `.obs` with the suffix `_uncertainty`
-- `labels_transfer` entry is added to the `.uns` dictionary of the output file, which contains information about the method used and its parameters. Keys of this dictionary are labels from `targets` list. Values are dictionaries with the following keys:
-
-| Key         | Description                   |
-|-------------|------------------------|
-| method      | Always the string "KNN_pynndescent" |
-| n_neighbors | Integer indicating the number of neighbors used for the prediction |
-| reference   | File or a link used as `reference` input argument |
\ No newline at end of file
diff --git a/src/labels_transfer/knn/config.vsh.yaml b/src/labels_transfer/knn/config.vsh.yaml
deleted file mode 100644
index b6191067186..00000000000
--- a/src/labels_transfer/knn/config.vsh.yaml
+++ /dev/null
@@ -1,58 +0,0 @@
-name: knn
-namespace: "labels_transfer"
-description: "Performs label transfer from reference to query using KNN classifier"
-info:
-  method_id: KNN_pynndescent
-authors:
-  - __merge__: /src/authors/vladimir_shitov.yaml
-    roles: [ author ]
-__merge__: ../api/common_arguments.yaml
-argument_groups:
-  - name: "Learning parameters"
-    arguments:
-      - name: "--n_neighbors"
-        alternatives: ["-k"]
-        type: integer
-        description: "Number of nearest neighbors to use for classification"
-        required: true
-resources:
-  - type: python_script
-    path: script.py
-  - path: ../utils/helper.py
-  - path: ../../utils/setup_logger.py
-test_resources:
-  - type: python_script
-    path: test.py
-  - path: /resources_test/annotation_test_data/
-  - path: /resources_test/pbmc_1k_protein_v3/
-
-engines:
-- type: docker
-  image: python:3.10-slim
-  setup:
-    - type: apt
-      packages: 
-        - procps
-        - git
-    - type: python
-      __merge__: [/src/base/requirements/anndata_mudata.yaml, .]
-    - type: apt
-      packages:
-        - libopenblas-dev
-        - liblapack-dev
-        - gfortran
-    - type: python
-      __merge__: [/src/base/requirements/scanpy.yaml, .]
-      packages:
-        - pynndescent~=0.5.8
-        - numba~=0.56.4
-        - numpy~=1.23.5
-  test_setup:
-    - type: python
-      __merge__: [ /src/base/requirements/viashpy.yaml, .]
-
-runners:
-- type: executable
-- type: nextflow
-  directives:
-    label: [highmem, highcpu]
diff --git a/src/labels_transfer/knn/script.py b/src/labels_transfer/knn/script.py
deleted file mode 100644
index ae4eceba726..00000000000
--- a/src/labels_transfer/knn/script.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import sys
-import warnings
-
-import mudata
-import numpy as np
-import scanpy as sc
-from scipy.sparse import issparse
-import pynndescent
-import numba
-
-
-## VIASH START
-par = {
-    "input": "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu",
-    "modality": "rna",
-    "input_obsm_features": "X_integrated_scanvi",
-    "reference": "https://zenodo.org/record/6337966/files/HLCA_emb_and_metadata.h5ad",
-    "reference_obsm_features": "X_integrated_scanvi",
-    "reference_obs_targets": ["ann_level_1", "ann_level_2", "ann_level_3", "ann_level_4", "ann_level_5", "ann_finest_level"],
-    "output": "foo.h5mu",
-    "output_obs_predictions": None,
-    "output_obs_uncertainty": None,
-    "output_uns_parameters": "labels_transfer",
-    "n_neighbors": 1
-}
-meta = {
-    "resources_dir": "src/labels_transfer/utils"
-}
-## VIASH END
-
-sys.path.append(meta["resources_dir"])
-from helper import check_arguments, get_reference_features, get_query_features
-# START TEMPORARY WORKAROUND setup_logger
-# reason: resources aren't available when using Nextflow fusion
-# from setup_logger import setup_logger
-def setup_logger():
-    import logging
-    from sys import stdout
-
-    logger = logging.getLogger()
-    logger.setLevel(logging.INFO)
-    console_handler = logging.StreamHandler(stdout)
-    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
-    console_handler.setFormatter(logFormatter)
-    logger.addHandler(console_handler)
-
-    return logger
-# END TEMPORARY WORKAROUND setup_logger
-
-@numba.njit
-def weighted_prediction(weights, ref_cats):
-    """Get highest weight category."""
-    N = len(weights)
-    predictions = np.zeros((N,), dtype=ref_cats.dtype)
-    uncertainty = np.zeros((N,))
-    for i in range(N):
-        obs_weights = weights[i]
-        obs_cats = ref_cats[i]
-        best_prob = 0
-        for c in np.unique(obs_cats):
-            cand_prob = np.sum(obs_weights[obs_cats == c])
-            if cand_prob > best_prob:
-                best_prob = cand_prob
-                predictions[i] = c
-                uncertainty[i] = max(1 - best_prob, 0)
-
-    return predictions, uncertainty
-
-def distances_to_affinities(distances):
-    stds = np.std(distances, axis=1)
-    stds = (2.0 / stds) ** 2
-    stds = stds.reshape(-1, 1)
-    distances_tilda = np.exp(-np.true_divide(distances, stds))
-
-    return distances_tilda / np.sum(distances_tilda, axis=1, keepdims=True)
-
-def main(par):
-    logger = setup_logger()
-
-    logger.info("Checking arguments")
-    par = check_arguments(par)
-
-    logger.info("Reading input (query) data")
-    mdata = mudata.read(par["input"])
-    adata = mdata.mod[par["modality"]]
-
-    logger.info("Reading reference data")
-    adata_reference = sc.read(par["reference"], backup_url=par["reference"])
-
-    # fetch feature data
-    train_data = get_reference_features(adata_reference, par, logger)
-    query_data = get_query_features(adata, par, logger)
-
-    # pynndescent does not support sparse matrices
-    if issparse(train_data):
-        warnings.warn("Converting sparse matrix to dense. This may consume a lot of memory.")
-        train_data = train_data.toarray()
-
-    logger.debug(f"Shape of train data: {train_data.shape}")
-
-    logger.info("Building NN index")
-    ref_nn_index = pynndescent.NNDescent(train_data, n_neighbors=par["n_neighbors"])
-    ref_nn_index.prepare()
-
-    ref_neighbors, ref_distances = ref_nn_index.query(query_data, k=par["n_neighbors"])
-
-    weights = distances_to_affinities(ref_distances)
-
-    output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {})
-
-    # for each annotation level, get prediction and uncertainty
-    
-    for obs_tar, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]):
-        logger.info(f"Predicting labels for {obs_tar}")
-        ref_cats = adata_reference.obs[obs_tar].cat.codes.to_numpy()[ref_neighbors]
-        prediction, uncertainty = weighted_prediction(weights, ref_cats)
-        prediction = np.asarray(adata_reference.obs[obs_tar].cat.categories)[prediction]
-        
-        adata.obs[obs_pred], adata.obs[obs_unc] = prediction, uncertainty
-        
-        # Write information about labels transfer to uns
-        output_uns_parameters[obs_tar] = {
-            "method": "KNN_pynndescent",
-            "n_neighbors": par["n_neighbors"],
-            "reference": par["reference"]
-        }
-
-    adata.uns[par["output_uns_parameters"]] = output_uns_parameters
-
-    mdata.mod[par['modality']] = adata
-    mdata.update()
-    mdata.write_h5mu(par['output'].strip())
-
-if __name__ == "__main__":
-    main(par)
diff --git a/src/labels_transfer/knn/test.py b/src/labels_transfer/knn/test.py
deleted file mode 100644
index d986631dba3..00000000000
--- a/src/labels_transfer/knn/test.py
+++ /dev/null
@@ -1,80 +0,0 @@
-import pytest
-from pathlib import Path
-import anndata
-import mudata
-import numpy as np
-
-## VIASH START
-meta = {
-    'executable': './target/executable/labels_transfer/knn/knn',
-    'resources_dir': './resources_test/'
-}
-## VIASH END
-
-reference_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5ad"
-input_file = f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"
-
-@pytest.fixture
-def test_args(tmp_path, request):
-    obsm_features, obs_targets, output_uns_parameters = request.param
-
-    # read reference
-    tempfile_reference_file = tmp_path / "reference.h5ad"
-    reference_adata = anndata.read_h5ad(reference_file)
-
-    # generate reference obs targets
-    for i, target in enumerate(obs_targets):
-        class_names = [str(idx) for idx in range(i + 2)]  # e.g. ["0", "1", "2"], the higher the level, the more the classes
-        reference_adata.obs[target] = np.random.choice(class_names, size=reference_adata.n_obs)
-
-    # read input query
-    tempfile_input_file = tmp_path / "input.h5mu"
-    input_data = mudata.read_h5mu(input_file)
-    adata = input_data.mod["rna"]
-    
-    # generate features
-    reference_adata.obsm[obsm_features] = np.random.normal(size=(reference_adata.n_obs, 30))
-    adata.obsm[obsm_features] = np.random.normal(size=(adata.n_obs, 30))
-
-    # write files
-    reference_adata.write(tempfile_reference_file.name)
-    input_data.write(tempfile_input_file.name)
-
-    return tempfile_reference_file, reference_adata, tempfile_input_file, adata, obsm_features, obs_targets, output_uns_parameters
-
-@pytest.mark.parametrize("test_args", [("X_integrated_scvi", ["celltype"], None), ("X_int", ["ann_level_1", "ann_level_2", "ann_level_3"], "lab_tran")], indirect=True)
-def test_label_transfer(run_component, test_args):
-    tempfile_reference_file, reference_adata, tempfile_input_file, query_adata, obsm_features, obs_targets, output_uns_parameters = test_args
-
-    args = [
-        "--input", tempfile_input_file.name,
-        "--modality", "rna",
-        "--input_obsm_features", obsm_features,
-        "--reference", tempfile_reference_file.name,
-        "--reference_obsm_features", obsm_features,
-        "--reference_obs_targets", ";".join(obs_targets),
-        "--output", "output.h5mu",
-        "--n_neighbors", "5"
-    ]
-
-    if output_uns_parameters is not None:
-        args.extend(["--output_uns_parameters", output_uns_parameters])
-
-    run_component(args)
-
-    assert Path("output.h5mu").is_file()
-
-    output_data = mudata.read_h5mu("output.h5mu")
-
-    exp_uns = "labels_transfer" if output_uns_parameters is None else output_uns_parameters
-
-    for target in obs_targets:
-        assert f"{target}_pred" in output_data.mod["rna"].obs, f"Predictions are missing from output\noutput: {output_data.mod['rna'].obs}"
-        assert f"{target}_uncertainty" in output_data.mod["rna"].obs, f"Uncertainties are missing from output\noutput: {output_data.mod['rna'].obs}"
-        assert exp_uns in output_data.mod["rna"].uns, f"Parameters are missing from output\noutput: {output_data.mod['rna'].uns}"
-        assert target in output_data.mod["rna"].uns[exp_uns], f"Parameters are missing from output\noutput: {output_data.mod['rna'].uns}"
-        assert output_data.mod["rna"].uns[exp_uns][target].get("method") == "KNN_pynndescent", f"Wrong method in parameters\noutput: {output_data.mod['rna'].uns}"
-        assert output_data.mod["rna"].uns[exp_uns][target].get("n_neighbors") == 5, f"Wrong number of neighbors in parameters\noutput: {output_data.mod['rna'].uns}"
-
-if __name__ == '__main__':
-    exit(pytest.main([__file__]))
\ No newline at end of file
diff --git a/src/labels_transfer/pynndescent_knn/config.vsh.yaml b/src/labels_transfer/pynndescent_knn/config.vsh.yaml
new file mode 100644
index 00000000000..7f1651f8a54
--- /dev/null
+++ b/src/labels_transfer/pynndescent_knn/config.vsh.yaml
@@ -0,0 +1,67 @@
+name: pynndescent_knn
+namespace: "labels_transfer"
+description: |
+  This component generates a neighborhood graph based using the PyNNDescentTransformer, followed by classification using a k-nearest neighborhood vote.
+authors:
+  - __merge__: /src/authors/dorien_roosen.yaml
+    roles: [ maintainer, author ]
+  - __merge__: /src/authors/vladimir_shitov.yaml
+    roles: [ author ]
+
+__merge__: ../api/common_arguments.yaml
+
+argument_groups:
+
+  - name: KNN label transfer arguments
+    arguments:
+      - name: "--weights"
+        type: string
+        choices: ["uniform", "distance", "gaussian"]
+        default: "uniform"
+        description: |
+          Weight function used in prediction. Possible values are:
+          - `uniform` - all points in each neighborhood are weighted equally 
+          - `distance` - weight points by the inverse of their distance
+          - `gaussian` - weight points by the sum of their Gaussian kernel similarities to each sample
+      - name: "--n_neighbors"
+        type: integer
+        min: 5
+        default: 15
+        description: |
+          The number of neighbors to use in k-neighbor graph structure used for fast approximate nearest neighbor search with PyNNDescent. 
+          Larger values will result in more accurate search results at the cost of computation time.
+resources:
+  - type: python_script
+    path: script.py
+  - path: ../utils/helper.py
+
+test_resources:
+  - type: python_script
+    path: test.py
+  - path: /resources_test/annotation_test_data/
+  - path: /resources_test/pbmc_1k_protein_v3/
+
+engines:
+  - type: docker
+    image: python:3.12
+    setup:
+      - type: apt
+        packages: 
+          - procps
+          - pkg-config
+          - libhdf5-dev
+      - type: python
+        __merge__: /src/base/requirements/anndata_mudata.yaml
+      - type: python
+        packages:
+          - pynndescent~=0.5.10
+          - numpy<2
+    test_setup:
+      - type: python
+        __merge__: [ /src/base/requirements/viashpy.yaml ]
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [highmem, highcpu]
diff --git a/src/labels_transfer/pynndescent_knn/script.py b/src/labels_transfer/pynndescent_knn/script.py
new file mode 100644
index 00000000000..18773fc582a
--- /dev/null
+++ b/src/labels_transfer/pynndescent_knn/script.py
@@ -0,0 +1,119 @@
+import mudata as mu
+import numpy as np
+import sys
+from pynndescent import PyNNDescentTransformer
+from sklearn.neighbors import KNeighborsClassifier
+
+## VIASH START
+par = {
+    "input": "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu",
+    "modality": "rna",
+    "input_obsm_features": None,
+    "reference": "resources_test/annotation_test_data/TS_Blood_filtered.h5mu",
+    "reference_obsm_features": None,
+    "reference_obs_targets": ["cell_type"],
+    "output": "foo_distance.h5mu",
+    "output_obs_predictions": None,
+    "output_obs_probability": None,
+    "output_uns_parameters": "labels_transfer",
+    "output_compression": None,
+    "weights": "distance",
+    "n_neighbors": 15
+}
+meta = {
+    "resources_dir": "src/labels_transfer/utils"
+}
+## VIASH END
+
+sys.path.append(meta["resources_dir"])
+from helper import check_arguments, get_reference_features, get_query_features
+
+
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger
+# END TEMPORARY WORKAROUND setup_logger
+
+
+def distances_to_affinities(distances):
+    # Apply Gaussian kernel to distances
+    stds = np.std(distances, axis=1)
+    stds = (2.0 / stds) ** 2
+    stds = stds.reshape(-1, 1)
+    distances_tilda = np.exp(-np.true_divide(distances, stds))
+
+    # normalize the distances_tilda
+    # if the sum of a row of the distances tilda equals 0,
+    # set normalized distances for that row to 1
+    # else divide the row values by the value of the sum of the row
+    distances_tilda_normalized = np.where(
+        np.sum(distances_tilda, axis=1, keepdims=True) == 0,
+        1,
+        distances_tilda / np.sum(distances_tilda, axis=1, keepdims=True)
+    )
+    return distances_tilda_normalized
+
+
+logger = setup_logger()
+
+# Reading in data
+logger.info(f"Reading in query dataset {par['input']} and reference datasets {par['reference']}")
+q_mdata = mu.read_h5mu(par["input"])
+q_adata = q_mdata.mod[par["modality"]]
+
+r_mdata = mu.read_h5mu(par["reference"])
+r_adata = r_mdata.mod[par["modality"]]
+
+# check arguments
+logger.info("Checking arguments")
+par = check_arguments(par)
+
+# Generating training and inference data
+logger.info("Generating training and inference data")
+train_X = get_reference_features(r_adata, par, logger)
+inference_X = get_query_features(q_adata, par, logger)
+
+neighbors_transformer = PyNNDescentTransformer(
+    n_neighbors=par["n_neighbors"],
+    parallel_batch_queries=True,
+)
+neighbors_transformer.fit(train_X)
+
+# Square sparse matrix with distances to n neighbors in reference data
+reference_neighbors = neighbors_transformer.transform(inference_X)
+query_neighbors = neighbors_transformer.transform(train_X)
+
+# For each target, train a classifier and predict labels
+for obs_tar, obs_pred, obs_proba in zip(par["reference_obs_targets"],  par["output_obs_predictions"], par["output_obs_probability"]):
+    logger.info(f"Predicting labels for {obs_tar}")
+
+    weights_dict = {
+        "uniform": "uniform",
+        "distance": "distance",
+        "gaussian": distances_to_affinities
+    }
+
+    logger.info(f"Using KNN classifier with {par['weights']} weights")
+    train_y = r_adata.obs[obs_tar].to_numpy()
+    classifier = KNeighborsClassifier(n_neighbors=par["n_neighbors"], metric="precomputed", weights=weights_dict[par["weights"]])
+    classifier.fit(X=query_neighbors, y=train_y)
+    predicted_labels = classifier.predict(reference_neighbors)
+    probabilities = classifier.predict_proba(reference_neighbors).max(axis=1)
+
+    # save_results
+    logger.info(f"Saving predictions to {obs_pred} and probabilities to {obs_proba} in obs")
+    q_adata.obs[obs_pred] = predicted_labels
+    q_adata.obs[obs_proba] = probabilities
+
+logger.info(f"Saving output data to {par['output']}")
+q_mdata.mod[par['modality']] = q_adata
+q_mdata.write_h5mu(par['output'], compression=par['output_compression'])
diff --git a/src/labels_transfer/pynndescent_knn/test.py b/src/labels_transfer/pynndescent_knn/test.py
new file mode 100644
index 00000000000..cabc57ad6e3
--- /dev/null
+++ b/src/labels_transfer/pynndescent_knn/test.py
@@ -0,0 +1,70 @@
+import pytest
+from pathlib import Path
+import anndata as ad
+import mudata as mu
+
+## VIASH START
+meta = {
+    'resources_dir': './resources_test/'
+}
+## VIASH END
+
+reference_h5ad_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5ad"
+# convert reference to h5mu
+reference_adata = ad.read_h5ad(reference_h5ad_file)
+reference_mdata = mu.MuData({"rna": reference_adata})
+reference_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5mu"
+reference_mdata.write_h5mu(reference_file)
+input_file = f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"
+
+
+def test_label_transfer(run_component):
+
+    args = [
+        "--input", input_file,
+        "--modality", "rna",
+        "--reference", reference_file,
+        "--reference_obs_targets", "cell_type",
+        "--output", "output.h5mu",
+        "--n_neighbors", "5"
+    ]
+
+    run_component(args)
+
+    assert Path("output.h5mu").is_file()
+
+    output_data = mu.read_h5mu("output.h5mu")
+
+    assert "cell_type_pred" in output_data.mod["rna"].obs, f"Predictions cell_type_pred is missing from output\noutput: {output_data.mod['rna'].obs}"
+    assert "cell_type_probability" in output_data.mod["rna"].obs, f"Uncertainties cell_type_probability is missing from output\noutput: {output_data.mod['rna'].obs}"
+
+
+@pytest.mark.parametrize("weights", ["uniform", "distance", "gaussian"])
+def test_label_transfer_prediction_columns(run_component, weights):
+
+    output = f"output_{weights}.h5mu"
+
+    args = [
+        "--input", input_file,
+        "--modality", "rna",
+        "--reference", reference_file,
+        "--reference_obs_targets", "cell_type",
+        "--weights", weights,
+        "--output", output,
+        "--output_obs_probability", "test_probability",
+        "--output_obs_predictions", "test_prediction",
+        "--n_neighbors", "5"
+    ]
+
+    run_component(args)
+
+    assert Path(output).is_file()
+
+    output_data = mu.read_h5mu(output)
+
+    assert "test_prediction" in output_data.mod["rna"].obs, f"Predictions test_prediction is missing from output\noutput: {output_data.mod['rna'].obs}"
+    assert "test_probability" in output_data.mod["rna"].obs, f"Uncertainties test_probability is missing from output\noutput: {output_data.mod['rna'].obs}"
+
+
+if __name__ == '__main__':
+    exit(pytest.main([__file__]))
diff --git a/src/labels_transfer/utils/helper.py b/src/labels_transfer/utils/helper.py
index a90bf59efdb..be879425afe 100644
--- a/src/labels_transfer/utils/helper.py
+++ b/src/labels_transfer/utils/helper.py
@@ -1,3 +1,5 @@
+from scipy.sparse import issparse
+
 def check_arguments(par):
     # check output .obs predictions
     if not par["output_obs_predictions"]:
@@ -5,9 +7,9 @@ def check_arguments(par):
     assert len(par["output_obs_predictions"]) == len(par["reference_obs_targets"]), f"Number of output_obs_predictions must match number of reference_obs_targets\npar: {par}"
 
     # check output .obs uncertainty
-    if not par["output_obs_uncertainty"]:
-        par["output_obs_uncertainty"] = [ t + "_uncertainty" for t in par["reference_obs_targets"]]
-    assert len(par["output_obs_uncertainty"]) == len(par["reference_obs_targets"]), f"Number of output_obs_uncertainty must match number of reference_obs_targets\npar: {par}"
+    if not par["output_obs_probability"]:
+        par["output_obs_probability"] = [ t + "_probability" for t in par["reference_obs_targets"]]
+    assert len(par["output_obs_probability"]) == len(par["reference_obs_targets"]), f"Number of output_obs_probability must match number of reference_obs_targets\npar: {par}"
 
     return par
 
@@ -29,4 +31,4 @@ def get_query_features(adata, par, logger):
         logger.info(f"Using .obsm[{par['input_obsm_features']}] of query data")
         query_data = adata.obsm[par["input_obsm_features"]]
 
-    return query_data
\ No newline at end of file
+    return query_data
diff --git a/src/labels_transfer/xgboost/config.vsh.yaml b/src/labels_transfer/xgboost/config.vsh.yaml
index 34b4fabe3ee..4baf79f07d7 100644
--- a/src/labels_transfer/xgboost/config.vsh.yaml
+++ b/src/labels_transfer/xgboost/config.vsh.yaml
@@ -30,6 +30,11 @@ argument_groups:
         description: Output directory for model
         direction: output
         required: false
+      - name: "--output_uns_parameters"
+        type: string
+        default: "xgboost_parameters"
+        description: The key in `uns` slot of the output AnnData object to store the parameters of the XGBoost classifier.
+        required: false
   - name: "Learning parameters"
     arguments:
       - name: "--learning_rate"
diff --git a/src/labels_transfer/xgboost/script.py b/src/labels_transfer/xgboost/script.py
index 6cc11aa22d5..7306975c566 100644
--- a/src/labels_transfer/xgboost/script.py
+++ b/src/labels_transfer/xgboost/script.py
@@ -25,7 +25,7 @@
     "reference_obs_targets": ["ann_level_1", "ann_level_2", "ann_level_3", "ann_level_4", "ann_level_5", "ann_finest_level"],
     "output": "foo.h5mu",
     "output_obs_predictions": None,
-    "output_obs_uncertainty": None,
+    "output_obs_probability": None,
     "output_uns_parameters": "labels_transfer",
     "force_retrain": False,
     "use_gpu": True,
@@ -256,27 +256,27 @@ def project_labels(
     query_dataset,
     cell_type_classifier_model: xgb.XGBClassifier,
     annotation_column_name='label_pred',
-    uncertainty_column_name='label_uncertainty',
-    uncertainty_thresh=None  # Note: currently not passed to predict function
+    probability_column_name='label_probability',
+    probability_thresh=None  # Note: currently not passed to predict function
 ):
     """
-    A function that projects predicted labels onto the query dataset, along with uncertainty scores.
+    A function that projects predicted labels onto the query dataset, along with probability estimations.
     Performs in-place update of the adata object, adding columns to the `obs` DataFrame.
 
     Input:
         * `query_dataset`: The query `AnnData` object
         * `model_file`: Path to the classification model file
         * `prediction_key`: Column name in `adata.obs` where to store the predicted labels
-        * `uncertainty_key`: Column name in `adata.obs` where to store the uncertainty scores
-        * `uncertainty_thresh`: The uncertainty threshold above which we call a cell 'Unknown'
+        * `probability_key`: Column name in `adata.obs` where to store the labels probabilities
+        * `probability_thresh`: The probability threshold below which we call a cell 'Unknown'
 
     Output:
         Nothing is output, the passed anndata is modified inplace
 
     """
 
-    if (uncertainty_thresh is not None) and (uncertainty_thresh < 0 or uncertainty_thresh > 1):
-        raise ValueError(f'`uncertainty_thresh` must be `None` or between 0 and 1.')
+    if (probability_thresh is not None) and (probability_thresh < 0 or probability_thresh > 1):
+        raise ValueError(f'`probability_thresh` must be `None` or between 0 and 1.')
 
     query_data = get_query_features(query_dataset, par, logger)
 
@@ -288,14 +288,14 @@ def project_labels(
 
     # Format probabilities
     df_probs = pd.DataFrame(probs, columns=cell_type_classifier_model.classes_, index=query_dataset.obs_names)
-    query_dataset.obs[uncertainty_column_name] = 1 - df_probs.max(1)
+    query_dataset.obs[probability_column_name] = df_probs.max(1)
 
     # Note: this is here in case we want to propose a set of values for the user to accept to seed the
     #       manual curation of predicted labels
-    if uncertainty_thresh is not None:
+    if probability_thresh is not None:
         logger.info("Marking uncertain predictions")
         query_dataset.obs[annotation_column_name + "_filtered"] = [
-            val if query_dataset.obs[uncertainty_column_name][i] < uncertainty_thresh
+            val if query_dataset.obs[probability_column_name][i] >= probability_thresh
             else "Unknown" for i, val in enumerate(query_dataset.obs[annotation_column_name])]
 
     return query_dataset
@@ -306,7 +306,7 @@ def predict(
     cell_type_classifier_model_path,
     annotation_column_name: str,
     prediction_column_name: str,
-    uncertainty_column_name: str,
+    probability_column_name: str,
     models_info,
     use_gpu: bool = False
 ) -> pd.DataFrame:
@@ -328,7 +328,7 @@ def predict(
     project_labels(query_dataset, 
                    cell_type_classifier_model, 
                    annotation_column_name=prediction_column_name, 
-                   uncertainty_column_name=uncertainty_column_name)
+                   probability_column_name=probability_column_name)
 
     logger.info("Converting labels from numbers to classes")
     labels_encoder = LabelEncoder()
@@ -342,10 +342,11 @@ def main(par):
     logger.info("Checking arguments")
     par = check_arguments(par)
 
-    mdata = mudata.read(par["input"].strip())
-    adata = mdata.mod[par["modality"]]
+    mdata_query = mudata.read(par["input"].strip())
+    adata_query = mdata_query.mod[par["modality"]]
 
-    adata_reference = sc.read(par["reference"], backup_url=par["reference"])
+    mdata_reference = mudata.read(par["reference"])
+    adata_reference = mdata_reference.mod[par["modality"]]
 
     # If classifiers for targets are in the model_output directory, simply open them and run (unless `retrain` != True)
     # If some classifiers are missing, train and save them first
@@ -363,19 +364,19 @@ def main(par):
     build_ref_classifiers(adata_reference, targets_to_train, model_path=par["model_output"], 
                           gpu=par["use_gpu"], eval_verbosity=par["verbosity"])
 
-    output_uns_parameters = adata.uns.get(par["output_uns_parameters"], {})
+    output_uns_parameters = adata_query.uns.get(par["output_uns_parameters"], {})
 
     with open(par["model_output"] + "/model_info.json", "r") as f:
         models_info = json.loads(f.read())
 
-    for obs_target, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_uncertainty"]):
+    for obs_target, obs_pred, obs_unc in zip(par["reference_obs_targets"], par["output_obs_predictions"], par["output_obs_probability"]):
         logger.info(f"Predicting {obs_target}")
 
-        adata = predict(query_dataset=adata,
+        adata_query = predict(query_dataset=adata_query,
                         cell_type_classifier_model_path=os.path.join(par["model_output"], "classifier_" + obs_target + ".xgb"),
                         annotation_column_name=obs_target, 
                         prediction_column_name=obs_pred,
-                        uncertainty_column_name=obs_unc,
+                        probability_column_name=obs_unc,
                         models_info=models_info,
                         use_gpu=par["use_gpu"])
         
@@ -386,14 +387,14 @@ def main(par):
                 **training_params
             }
 
-    adata.uns[par["output_uns_parameters"]] = output_uns_parameters
+    adata_query.uns[par["output_uns_parameters"]] = output_uns_parameters
 
     logger.info("Updating mdata")
-    mdata.mod[par['modality']] = adata
-    mdata.update()
+    mdata_query.mod[par['modality']] = adata_query
+    mdata_query.update()
 
     logger.info("Writing output")
-    mdata.write_h5mu(par['output'].strip())
+    mdata_query.write_h5mu(par['output'].strip())
 
 if __name__ == "__main__":
     main(par)
diff --git a/src/labels_transfer/xgboost/test.py b/src/labels_transfer/xgboost/test.py
index 3a81bbfe4e9..3dcfdfef778 100644
--- a/src/labels_transfer/xgboost/test.py
+++ b/src/labels_transfer/xgboost/test.py
@@ -12,7 +12,7 @@
 }
 ## VIASH END
 
-reference_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5ad"
+reference_h5ad_file = f"{meta['resources_dir']}/annotation_test_data/TS_Blood_filtered.h5ad"
 input_file = f"{meta['resources_dir']}/pbmc_1k_protein_v3/pbmc_1k_protein_v3_filtered_feature_bc_matrix.h5mu"
 
 
@@ -20,11 +20,11 @@
 def test_args(tmp_path, request):
     obsm_features, obs_targets, output_uns_parameters = request.param
 
-    tempfile_reference_file = tmp_path / "reference.h5ad"
+    tempfile_reference_file = tmp_path / "reference.h5mu"
     tempfile_input_file = tmp_path / "input.h5mu"
 
     # read reference
-    reference_adata = anndata.read_h5ad(reference_file)
+    reference_adata = anndata.read_h5ad(reference_h5ad_file)
 
     # generate reference obs targets
     for i, target in enumerate(obs_targets):
@@ -39,8 +39,9 @@ def test_args(tmp_path, request):
     reference_adata.obsm[obsm_features] = np.random.normal(size=(reference_adata.n_obs, 30))
     input_rna_adata.obsm[obsm_features] = np.random.normal(size=(input_rna_adata.n_obs, 30))
 
+    reference_mdata = mudata.MuData({"rna": reference_adata})
     # write files
-    reference_adata.write_h5ad(str(tempfile_reference_file))
+    reference_mdata.write_h5mu(str(tempfile_reference_file))
     input_mudata.write_h5mu(str(tempfile_input_file))
 
     return tempfile_reference_file, reference_adata, tempfile_input_file, input_rna_adata, obsm_features, obs_targets, output_uns_parameters
@@ -72,11 +73,11 @@ def test_label_transfer(run_component, test_args):
 
     output_data = mudata.read_h5mu("output.h5mu")
 
-    exp_uns = "labels_transfer" if output_uns_parameters is None else output_uns_parameters
+    exp_uns = "xgboost_parameters" if output_uns_parameters is None else output_uns_parameters
 
     for target in obs_targets:
         assert f"{target}_pred" in output_data.mod["rna"].obs, f"Predictions are missing from output\noutput: {output_data.mod['rna'].obs}"
-        assert f"{target}_uncertainty" in output_data.mod["rna"].obs, f"Uncertainties are missing from output\noutput: {output_data.mod['rna'].obs}"
+        assert f"{target}_probability" in output_data.mod["rna"].obs, f"Probabilities are missing from output\noutput: {output_data.mod['rna'].obs}"
         assert exp_uns in output_data.mod["rna"].uns, f"Parameters are missing from output\noutput: {output_data.mod['rna'].uns}"
         assert target in output_data.mod["rna"].uns[exp_uns], f"Parameters are missing from output\noutput: {output_data.mod['rna'].uns}"
         assert output_data.mod["rna"].uns[exp_uns][target].get("method") == "XGBClassifier", f"Wrong method in parameters\noutput: {output_data.mod['rna'].uns}"
@@ -126,7 +127,7 @@ def test_retraining(run_component, test_args, tmp_path):
 
     for target in obs_targets:
         assert f"{target}_pred" in output_data.mod["rna"].obs, f"Predictions are missing from output\noutput: {output_data.mod['rna'].obs}"
-        assert f"{target}_uncertainty" in output_data.mod["rna"].obs, f"Uncertainties are missing from output\noutput: {output_data.mod['rna'].obs}"
+        assert f"{target}_probability" in output_data.mod["rna"].obs, f"Probabilities are missing from output\noutput: {output_data.mod['rna'].obs}"
         assert output_uns_parameters in output_data.mod["rna"].uns, f"Parameters are missing from output\noutput: {output_data.mod['rna'].uns}"
         assert target in output_data.mod["rna"].uns[output_uns_parameters], f"Parameters are missing from output\noutput: {output_data.mod['rna'].uns}"
         assert output_data.mod["rna"].uns[output_uns_parameters][target].get("method") == "XGBClassifier", f"Wrong method in parameters\noutput: {output_data.mod['rna'].uns}"