diff --git a/.gitignore b/.gitignore
index 250f8f9..ccf9ec1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,23 @@
 local/
 private/
 
+# Data
+*.json
+*.yml
+*.yaml
+*.csv
+*.npy
+*.npz
+
+# Plots
+*.eps
+*.gif
+*.jpg
+*.jpeg
+*.pdf
+*.png
+*.svg
+
 # Editors
 .idea/
 .vscode/
diff --git a/labelshift/algorithms/api.py b/labelshift/algorithms/api.py
index a6a18f5..db1281d 100644
--- a/labelshift/algorithms/api.py
+++ b/labelshift/algorithms/api.py
@@ -8,10 +8,12 @@
 from labelshift.algorithms.bbse import BlackBoxShiftEstimator
 from labelshift.algorithms.classify_and_count import ClassifyAndCount
 from labelshift.algorithms.ratio_estimator import InvariantRatioEstimator
+from labelshift.interfaces.point_estimators import SummaryStatistic
 
 __all__ = [
     "BlackBoxShiftEstimator",
     "ClassifyAndCount",
     "DiscreteCategoricalMAPEstimator",
     "InvariantRatioEstimator",
+    "SummaryStatistic",
 ]
diff --git a/labelshift/algorithms/bayesian_discrete.py b/labelshift/algorithms/bayesian_discrete.py
index 418f89b..ef6ff25 100644
--- a/labelshift/algorithms/bayesian_discrete.py
+++ b/labelshift/algorithms/bayesian_discrete.py
@@ -3,7 +3,7 @@
 Proposed in
    TODO(Pawel): Add citation to pre-print after AISTATS reviews.
 """
-from typing import cast, NewType, Optional
+from typing import cast, NewType, Optional, Union
 
 import arviz as az
 import numpy as np
@@ -35,11 +35,31 @@ class SamplingParams(pydantic.BaseModel):
 )
 
 
+def dirichlet_alphas(L: int, alpha: Union[float, ArrayLike]) -> np.ndarray:
+    """Convenient initialization of alpha (pseudocounts)
+    parameters of the Dirichlet prior.
+
+    Args:
+        alpha: either an array of shape (L,) or a float.
+          If a float, vector (alpha, alpha, ..., alpha)
+          is created
+
+    Returns:
+         alphas, shape (L,)
+    """
+    if isinstance(alpha, float):
+        return np.ones(L) * alpha
+    else:
+        alpha = np.asarray(alpha)
+        assert alpha.shape == (L,)
+        return alpha
+
+
 def build_model(
     n_y_and_c_labeled: ArrayLike,
     n_c_unlabeled: ArrayLike,
-    alpha_p_y_labeled: Optional[ArrayLike] = None,
-    alpha_p_y_unlabeled: Optional[ArrayLike] = None,
+    alpha_p_y_labeled: Union[float, ArrayLike] = 1.0,
+    alpha_p_y_unlabeled: Union[float, ArrayLike] = 1.0,
 ) -> DiscreteBayesianQuantificationModel:
     """Builds the discrete Bayesian quantification model,
      basing on the sufficient statistic of the data.
@@ -59,15 +79,8 @@ def build_model(
     assert n_y_labeled.shape == (L,)
     assert n_c_unlabeled.shape == (K,)
 
-    alpha_p_y_labeled = (
-        np.ones(L) if alpha_p_y_labeled is None else np.asarray(alpha_p_y_labeled)
-    )
-    alpha_p_y_unlabeled = (
-        np.ones(L) if alpha_p_y_unlabeled is None else np.asarray(alpha_p_y_unlabeled)
-    )
-
-    assert alpha_p_y_labeled.shape == (L,)
-    assert alpha_p_y_unlabeled.shape == (L,)
+    alpha_p_y_labeled = dirichlet_alphas(L, alpha_p_y_labeled)
+    alpha_p_y_unlabeled = dirichlet_alphas(L, alpha_p_y_unlabeled)
 
     model = pm.Model()
     with model:
@@ -140,13 +153,16 @@ class DiscreteCategoricalMAPEstimator(pe.SummaryStatisticPrevalenceEstimator):
     """A version of Bayesian quantification
     which finds the Maximum a Posteriori solution."""
 
-    def __init__(self, max_eval: int = 10_000) -> None:
+    def __init__(
+        self, max_eval: int = 10_000, alpha_unlabeled: Union[float, ArrayLike] = 1.0
+    ) -> None:
         """
         Args:
             max_eval: maximal number of evaluations of the posterior
               during the optimization to find the MAP
         """
         self._max_eval = max_eval
+        self._alpha_unlabeled = alpha_unlabeled
 
     def estimate_from_summary_statistic(
         self, /, statistic: pe.SummaryStatistic
@@ -155,6 +171,7 @@ def estimate_from_summary_statistic(
         model = build_model(
             n_c_unlabeled=statistic.n_c_unlabeled,
             n_y_and_c_labeled=statistic.n_y_and_c_labeled,
+            alpha_p_y_unlabeled=self._alpha_unlabeled,
         )
         with model:
             optimal = pymc.find_MAP(maxeval=self._max_eval)
diff --git a/labelshift/algorithms/expectation_maximization.py b/labelshift/algorithms/expectation_maximization.py
index 2ff51c3..6286b05 100644
--- a/labelshift/algorithms/expectation_maximization.py
+++ b/labelshift/algorithms/expectation_maximization.py
@@ -14,7 +14,7 @@ def expectation_maximization(
     *,
     initial_prevalences: Optional[ArrayLike] = None,
     max_steps: int = 10000,
-    atol: float = 0.01,
+    tolerance: float = 0.01,
 ) -> np.ndarray:
     """Expectation maximization algorithm, as described in
 
@@ -24,13 +24,13 @@ def expectation_maximization(
 
     Args:
         predictions: test set probability predictions. Shape (n_samples, n_classes).
-        prevalences: prevalences in the training data set.
+        training_prevalences: prevalences in the training data set.
             Shape (n_classes,), (n_classes, 1) or (1, n_classes). Will be normalized.
         initial_prevalences: starting prevalences for optimization.
             If not provided, the training prevalences are used.
             Shape (n_classes,), (n_classes, 1) or (1, n_classes). Will be normalized.
         max_steps: maximal number of iteration steps
-        atol: desired accuracy (for early stopping)
+        tolerance: desired accuracy (for early stopping)
 
     Returns:
         test set prevalences, shape (n_classes,).
@@ -48,6 +48,7 @@ def expectation_maximization(
         test_prevalences = training_prevalences.copy()
 
     # Iteratively improve the estimate of the test set prevalences
+    converged: bool = False
     for _ in range(max_steps):
         old_prevalences = test_prevalences.copy()
 
@@ -59,10 +60,12 @@ def expectation_maximization(
         ) / len(new_predictions)
 
         # Check if converged
-        if np.allclose(old_prevalences, test_prevalences, atol=atol, rtol=0):
+        if np.max(np.abs(old_prevalences - test_prevalences)) < tolerance:
+            converged = True
             break
 
-    warnings.warn(
-        RuntimeWarning(f"Required accuracy not reached in {max_steps} steps.")
-    )
+    if not converged:
+        warnings.warn(
+            RuntimeWarning(f"Required accuracy not reached in {max_steps} steps.")
+        )
     return test_prevalences.ravel()
diff --git a/labelshift/algorithms/gaussian_mixture_model.py b/labelshift/algorithms/gaussian_mixture_model.py
index 03a5d9c..b00797f 100644
--- a/labelshift/algorithms/gaussian_mixture_model.py
+++ b/labelshift/algorithms/gaussian_mixture_model.py
@@ -5,7 +5,7 @@
 Note:
     This algorithm models the data in 1D.
 """
-from typing import Optional, Sequence, Tuple
+from typing import Sequence, Tuple, Union
 
 import numpy as np
 import pymc as pm
@@ -21,7 +21,7 @@ def build_model(
     unlabeled_data: ArrayLike,
     mean_params: Tuple[float, float] = (0.0, 1.0),
     sigma_param: float = 1.0,
-    alpha: Optional[ArrayLike] = None,
+    alpha: Union[float, ArrayLike] = None,
 ) -> pm.Model:
     """Builds a PyMC model for Bayesian quantification for 1D data
     assumed to be sampled from a mixture of normals.
@@ -38,7 +38,8 @@ def build_model(
         mean_params: used to initialize the prior on the component means
         sigma_param: used to initialize the prior on the component sigmas
         alpha: used to initialize the Dirichlet prior on P_unlabeled(Y).
-          Shape (n_components,)
+          Can be an array of shape (n_components,)
+          or a float, so that a uniform vector (alpha, alpha, ...) is used.
 
     Returns:
         a PyMC model with the following variables:
@@ -51,8 +52,8 @@ def build_model(
     assert unlabeled_data.shape == (len(unlabeled_data),)
 
     n_y = len(labeled_data)
-    if alpha is None:
-        alpha = np.ones(n_y)
+    if isinstance(alpha, float):
+        alpha = alpha * np.ones(n_y)
     else:
         alpha = np.asarray(alpha)
 
diff --git a/labelshift/algorithms/ratio_estimator.py b/labelshift/algorithms/ratio_estimator.py
index 346eb53..07099a7 100644
--- a/labelshift/algorithms/ratio_estimator.py
+++ b/labelshift/algorithms/ratio_estimator.py
@@ -41,7 +41,7 @@
   ``H_hat[l, k] = G_hat[l, k] = E_labeled[ g(X)[k]  | Y = l] \\in R^{L x (K-1)}.``
 
 """
-from typing import Tuple
+from typing import Optional, Tuple
 
 import numpy as np
 from numpy.typing import ArrayLike
@@ -103,9 +103,42 @@ def prevalence_from_vector_and_matrix(
 def calculate_vector_and_matrix_from_predictions(
     unlabeled_predictions: ArrayLike,
     labeled_predictions: ArrayLike,
-) -> None:
-    """This method has not been implemented yet."""
-    raise NotImplementedError
+    labeled_ground_truth: ArrayLike,
+    L: Optional[int] = None,
+    enforce_square: bool = True,
+    restricted: bool = True,
+    rcond: float = 1e-4,
+) -> np.ndarray:
+    """TODO(Pawel): Fix this docstring.
+
+    Args:
+        unlabeled_predictions: shape (N', K)
+        labeled_predictions: shape (N, K)
+        labeled_ground_truth: shape (N,). Each entry is in {0, ..., L-1}.
+    """
+    unlabeled_predictions = np.asarray(unlabeled_predictions)
+    labeled_predictions = np.asarray(labeled_predictions)
+    labeled_ground_truth = np.asarray(labeled_ground_truth, dtype=int)
+
+    K = unlabeled_predictions.shape[1]
+    L: int = K if L is None else L
+
+    assert labeled_predictions.shape == (len(labeled_ground_truth), K)
+
+    unlabeled_vector = unlabeled_predictions.mean(axis=0)[: K - 1]  # Shape (K - 1,)
+    labeled_matrix = np.zeros((L, K - 1))
+
+    for l in range(L):  # noqa: E741 ambiguous name variable
+        index = labeled_ground_truth == l
+        labeled_matrix[l, :] = labeled_predictions[index, : K - 1].mean(axis=0)
+
+    return prevalence_from_vector_and_matrix(
+        vector=unlabeled_vector,
+        matrix=labeled_matrix,
+        restricted=restricted,
+        enforce_square=enforce_square,
+        rcond=rcond,
+    )
 
 
 def calculate_vector_and_matrix_from_summary_statistics(
@@ -200,4 +233,6 @@ def estimate_from_summary_statistic(
         return prevalence_from_summary_statistics(
             n_c_unlabeled=statistic.n_c_unlabeled,
             n_y_and_c_labeled=statistic.n_y_and_c_labeled,
+            enforce_square=self._enforce_square,
+            rcond=self._rcond,
         )
diff --git a/labelshift/api.py b/labelshift/api.py
new file mode 100644
index 0000000..51ff24c
--- /dev/null
+++ b/labelshift/api.py
@@ -0,0 +1,7 @@
+import labelshift.experiments.api as experiments
+import labelshift.datasets.api as datasets
+
+__all__ = [
+    "experiments",
+    "datasets",
+]
diff --git a/labelshift/datasets/api.py b/labelshift/datasets/api.py
new file mode 100644
index 0000000..59624a9
--- /dev/null
+++ b/labelshift/datasets/api.py
@@ -0,0 +1,20 @@
+from labelshift.datasets.split import (
+    IDataset,
+    n_classes,
+    SplitDataset,
+    SplitSpecification,
+    split_dataset,
+)
+from labelshift.datasets.discrete_categorical import DiscreteSampler, almost_eye
+
+__all__ = [
+    # `split` submodule
+    "IDataset",
+    "n_classes",
+    "SplitDataset",
+    "SplitSpecification",
+    "split_dataset",
+    # `discrete_categorical` submodule
+    "DiscreteSampler",
+    "almost_eye",
+]
diff --git a/labelshift/datasets/discrete_categorical.py b/labelshift/datasets/discrete_categorical.py
index e2a3962..971e464 100644
--- a/labelshift/datasets/discrete_categorical.py
+++ b/labelshift/datasets/discrete_categorical.py
@@ -1,104 +1,156 @@
 """Discrete categorical sampler."""
+import dataclasses
 import math
+from typing import Tuple, Any, Union, Optional
+
 import numpy as np
 from numpy.typing import ArrayLike
 
 from labelshift.interfaces.point_estimators import SummaryStatistic
 
+RNG = Any
 
-class DiscreteSampler:
-    """Samples from the discrete model P(C|Y)."""
 
-    def __init__(
-        self, p_y_labeled: ArrayLike, p_y_unlabeled: ArrayLike, p_c_cond_y: ArrayLike
-    ) -> None:
+@dataclasses.dataclass
+class SummaryMultinomialStatistic:
+    n_y: np.ndarray
+    n_c: np.ndarray
+    n_y_and_c: np.ndarray
+
+
+class SimpleMultinomialSampler:
+    def __init__(self, p_y: ArrayLike, p_c_cond_y: ArrayLike) -> None:
         """
         Args:
-            p_y_labeled: P_train(Y) vector, shape (L,)
-            p_y_unlabeled: P_test(Y) vector, shape (L,)
-            p_c_cond_y: P(C|Y), shape (L, K)
+            p_y: P(Y) vector, shape (L,)
+            p_c_cond_y: P(C|Y) matrix, shape (L, K)
         """
-        self._p_y_labeled = np.asarray(p_y_labeled)
-        self._p_y_unlabeled = np.asarray(p_y_unlabeled)
-        self._c_cond_y = np.asarray(p_c_cond_y)
+        self._p_y = np.asarray(p_y)
+        self._p_c_cond_y = np.asarray(p_c_cond_y)
 
-        assert len(self._c_cond_y.shape) == 2
-        self._L = self._c_cond_y.shape[0]
-        self._K = self._c_cond_y.shape[1]
+        assert len(self._p_c_cond_y.shape) == 2
+        self._L = self._p_c_cond_y.shape[0]
+        self._K = self._p_c_cond_y.shape[1]
 
-        assert self._p_y_labeled.shape == (self._L,)
-        assert self._p_y_unlabeled.shape == (self._L,)
+        assert self._p_y.shape == (self._L,)
 
-        assert np.min(self._p_y_labeled) >= 0
-        assert np.min(self._p_y_unlabeled) >= 0
-        assert np.min(self._c_cond_y) >= 0
+        assert np.min(self._p_y) >= 0
+        assert np.min(self._p_c_cond_y) >= 0
 
-        assert math.isclose(np.sum(self._p_y_labeled), 1.0)
-        assert math.isclose(np.sum(self._p_y_unlabeled), 1.0)
+        assert math.isclose(np.sum(self._p_y), 1.0)
 
         for label in range(self._L):
-            s = self._c_cond_y[label, :].sum()
+            s = self._p_c_cond_y[label, :].sum()
             assert math.isclose(s, 1.0)
 
     @property
-    def p_y_labeled(self) -> np.ndarray:
-        """P_labeled(Y) vector. Shape (size_Y,)"""
-        return self._p_y_labeled
-
-    @property
-    def p_y_unlabeled(self) -> np.ndarray:
-        """P_unlabeled(Y) vector. Shape (size_Y,)"""
-        return self._p_y_unlabeled
+    def p_y(self) -> np.ndarray:
+        return self._p_y
 
     @property
     def p_c_cond_y(self) -> np.ndarray:
-        """P(C | Y) matrix, shape (size_Y, size_C)"""
-        return self._c_cond_y
+        return self._p_c_cond_y
 
     @property
-    def p_c_unlabeled(self) -> np.ndarray:
-        """P_unlabeled(C) vector, shape (size_C,)"""
-        return self._c_cond_y.T @ self._p_y_unlabeled
+    def p_c(self) -> np.ndarray:
+        return np.einsum("lk,l->k", self._p_c_cond_y, self._p_y)
 
     @property
-    def p_c_labeled(self) -> np.ndarray:
-        """P_labeled(C) vector, shape (size_C,)"""
-        return self._c_cond_y.T @ self._p_y_labeled
-
-    @property
-    def size_Y(self) -> int:
-        """How many Y there are."""
+    def n_y(self) -> int:
         return self._L
 
-    def size_C(self) -> int:
-        """How many C there are."""
+    @property
+    def n_c(self) -> int:
         return self._K
 
     def sample_summary_statistic(
-        self, n_labeled: int = 1000, n_unlabeled: int = 1000, seed: int = 42
-    ) -> SummaryStatistic:
-        """Samples the summary statistic from the model.
-
-        Args:
-            n_labeled: number of examples in the labeled data set
-            n_unlabeled: number of examples in the unlabeled data set
-            seed: random seed
-        """
-        rng = np.random.default_rng(seed)
+        self, n: int, rng: Union[int, RNG]
+    ) -> SummaryMultinomialStatistic:
+        rng = np.random.default_rng(rng)
 
-        n_y = rng.multinomial(n_labeled, self._p_y_labeled)
+        n_y = rng.multinomial(n, self._p_y)
         n_y_and_c = np.vstack(
-            [rng.multinomial(n, p) for n, p in zip(n_y, self._c_cond_y)]
+            [rng.multinomial(n, p) for n, p in zip(n_y, self._p_c_cond_y)]
+        )
+        n_c = n_y_and_c.sum(axis=0)
+
+        assert n_c.shape == (self._K,)
+
+        return SummaryMultinomialStatistic(
+            n_c=n_c,
+            n_y_and_c=n_y_and_c,
+            n_y=n_y,
         )
-        n_c = rng.multinomial(n_unlabeled, self._c_cond_y.T @ self._p_y_unlabeled)
 
+
+class DiscreteSampler:
+    def __init__(
+        self,
+        sampler_labeled: SimpleMultinomialSampler,
+        sampler_unlabeled: SimpleMultinomialSampler,
+    ) -> None:
+        assert sampler_labeled.n_c == sampler_unlabeled.n_c
+        assert sampler_labeled.n_y == sampler_unlabeled.n_y
+
+        self.labeled = sampler_labeled
+        self.unlabeled = sampler_unlabeled
+
+    @property
+    def n_y(self) -> int:
+        return self.labeled.n_y
+
+    @property
+    def n_c(self) -> int:
+        return self.labeled.n_c
+
+    def sample_from_both(
+        self, n_labeled: int, n_unlabeled: int, seed: int
+    ) -> Tuple[SummaryMultinomialStatistic, SummaryMultinomialStatistic]:
+        rng1, rng2 = [
+            np.random.default_rng(s) for s in np.random.SeedSequence(seed).spawn(2)
+        ]
+
+        return (
+            self.labeled.sample_summary_statistic(n=n_labeled, rng=rng1),
+            self.unlabeled.sample_summary_statistic(n=n_unlabeled, rng=rng2),
+        )
+
+    def sample_summary_statistic(
+        self, n_labeled: int, n_unlabeled: int, seed: int
+    ) -> SummaryStatistic:
+        labeled, unlabeled = self.sample_from_both(
+            n_labeled=n_labeled, n_unlabeled=n_unlabeled, seed=seed
+        )
         return SummaryStatistic(
-            n_y_labeled=n_y,
-            n_c_unlabeled=n_c,
-            n_y_and_c_labeled=n_y_and_c,
+            n_y_labeled=labeled.n_y,
+            n_y_and_c_labeled=labeled.n_y_and_c,
+            n_c_unlabeled=unlabeled.n_c,
         )
 
 
+def discrete_sampler_factory(
+    p_y_labeled: ArrayLike,
+    p_y_unlabeled: ArrayLike,
+    p_c_cond_y_labeled: ArrayLike,
+    p_c_cond_y_unlabeled: Optional[ArrayLike] = None,
+) -> DiscreteSampler:
+    p_c_cond_y_labeled = np.asarray(p_c_cond_y_labeled)
+
+    if p_c_cond_y_unlabeled is None:
+        p_c_cond_y_unlabeled = p_c_cond_y_labeled.copy()
+
+    return DiscreteSampler(
+        sampler_labeled=SimpleMultinomialSampler(
+            p_y=p_y_labeled,
+            p_c_cond_y=p_c_cond_y_labeled,
+        ),
+        sampler_unlabeled=SimpleMultinomialSampler(
+            p_y=p_y_unlabeled,
+            p_c_cond_y=p_c_cond_y_unlabeled,
+        ),
+    )
+
+
 def almost_eye(y: int, c: int, diagonal: float = 1.0) -> np.ndarray:
     """Matrix P(C | Y) with fixed "diagonal" terms.
 
diff --git a/labelshift/datasets/split.py b/labelshift/datasets/split.py
new file mode 100644
index 0000000..ed61519
--- /dev/null
+++ b/labelshift/datasets/split.py
@@ -0,0 +1,142 @@
+"""Utilities for working with NumPy datasets."""
+import dataclasses
+from typing import List, Protocol
+
+import numpy as np
+import pydantic
+
+
+class IDataset(Protocol):
+    """Interface for the dataset used. Note that it's designed
+    to be compatible with the scikit-learn's datasets."""
+
+    @property
+    def data(self) -> np.ndarray:
+        """The covariates X. Shape (n_samples, ...)."""
+        raise NotImplementedError
+
+    @property
+    def target(self) -> np.ndarray:
+        """The target labels Y. Shape (n_samples,)."""
+        raise NotImplementedError
+
+
+def n_classes(dataset: IDataset) -> int:
+    """Calculates the number of classes in the dataset."""
+    return len(np.unique(dataset.target))
+
+
+@dataclasses.dataclass
+class SplitDataset:
+    """This class represents training, validation, and test datasets."""
+
+    train_x: np.ndarray
+    train_y: np.ndarray
+
+    valid_x: np.ndarray
+    valid_y: np.ndarray
+
+    test_x: np.ndarray
+    test_y: np.ndarray
+
+
+class SplitSpecification(pydantic.BaseModel):
+    """Contains the specification about the class prevalences
+    in each of training, validation, and test datasets.
+
+    Each of the lists should be of length `n_classes`
+    and at `y`th position the required number of instances
+    in the given dataset should be given.
+    """
+
+    train: List[int]
+    valid: List[int]
+    test: List[int]
+
+
+def _calculate_number_required(specification: SplitSpecification, label: int) -> int:
+    """Calculates the required number of instances of label `label`
+    according to `specification`."""
+    return (
+        specification.train[label]
+        + specification.valid[label]
+        + specification.test[label]
+    )
+
+
+def split_dataset(
+    dataset: IDataset, specification: SplitSpecification, random_seed: int
+) -> SplitDataset:
+    """Splits `dataset` according to `specification`."""
+    n_labels = n_classes(dataset)
+
+    if set(np.unique(dataset.target)) != set(range(n_labels)):
+        raise ValueError(
+            f"Labels must be 0-indexed integers: {dataset.target_names} != "
+            f"{set(range(n_labels))}."
+        )
+    if {
+        len(specification.train),
+        len(specification.valid),
+        len(specification.train),
+    } != {n_labels}:
+        raise ValueError("Wrong length of the specification.")
+
+    rng = np.random.default_rng(random_seed)
+
+    train_indices: List[np.ndarray] = []
+    valid_indices: List[np.ndarray] = []
+    test_indices: List[np.ndarray] = []
+
+    for label in range(n_labels):
+        # Take the index of the points in `dataset` corresponding to the `label`
+        # Shape (n_points_with_this_label,).
+        index: np.ndarray = np.asarray(dataset.target == label).nonzero()[0]
+
+        n_required = _calculate_number_required(
+            specification=specification, label=label
+        )
+
+        if n_required > index.shape[0]:
+            raise ValueError(
+                f"According to specification one needs {n_required} data points "
+                f"of label {label},"
+                f"but only {index.shape[0]} are available."
+            )
+
+        # Shuffle index
+        shuffled_index = rng.permutation(index)
+
+        # Get the required data points from this data set
+        # according to the permuted index
+        n_train = specification.train[label]
+        n_valid = specification.valid[label]
+        n_test = specification.test[label]
+
+        # Silence errors related to spacings around :
+        index_train = shuffled_index[:n_train]
+        index_valid = shuffled_index[n_train : n_train + n_valid]  # noqa: E203
+        index_test = shuffled_index[
+            n_train + n_valid : n_train + n_valid + n_test  # noqa: E203
+        ]
+
+        assert len(index_train) == n_train
+        assert len(index_valid) == n_valid
+        assert len(index_test) == n_test
+
+        train_indices.append(index_train)
+        valid_indices.append(index_valid)
+        test_indices.append(index_test)
+
+    train_index = rng.permutation(np.hstack(train_indices))
+    valid_index = rng.permutation(np.hstack(valid_indices))
+    test_index = rng.permutation(np.hstack(test_indices))
+
+    return SplitDataset(
+        train_x=dataset.data[train_index, ...],
+        train_y=dataset.target[train_index],
+        valid_x=dataset.data[valid_index, ...],
+        valid_y=dataset.target[valid_index],
+        test_x=dataset.data[test_index, ...],
+        test_y=dataset.target[test_index],
+    )
diff --git a/labelshift/experiments/__init__.py b/labelshift/experiments/__init__.py
new file mode 100644
index 0000000..73a5018
--- /dev/null
+++ b/labelshift/experiments/__init__.py
@@ -0,0 +1,5 @@
+"""Subpackage with experimental utilities and designs.
+
+Note:
+    This file should be empty. For the API see the `api.py` submodule.
+"""
diff --git a/labelshift/experiments/api.py b/labelshift/experiments/api.py
new file mode 100644
index 0000000..4dab1ad
--- /dev/null
+++ b/labelshift/experiments/api.py
@@ -0,0 +1,22 @@
+"""The experimental utilities."""
+from typing import TypeVar, Optional
+
+from labelshift.experiments.timer import Timer
+from labelshift.experiments.names import generate_name
+
+_T = TypeVar("_T")
+
+
+def calculate_value(*, overwrite: Optional[_T], default: _T) -> _T:
+    if overwrite is None:
+        return default
+    else:
+        return overwrite
+
+
+__all__ = [
+    "Timer",
+    "calculate_value",
+    "generate_name",
+    "calculate_value",
+]
diff --git a/labelshift/experiments/names.py b/labelshift/experiments/names.py
new file mode 100644
index 0000000..2d24ae6
--- /dev/null
+++ b/labelshift/experiments/names.py
@@ -0,0 +1,12 @@
+"""Utilities for dealing with filesystem IO."""
+import petname
+from datetime import datetime
+
+
+def generate_name() -> str:
+    """Generates a name with timestamp and a random part."""
+
+    now = datetime.now()  # current date and time
+    date_time = now.strftime("%Y%m%d-%H%M%S")
+    suffix = petname.generate(separator="-", words=3)
+    return f"{date_time}-{suffix}"
diff --git a/labelshift/timer.py b/labelshift/experiments/timer.py
similarity index 100%
rename from labelshift/timer.py
rename to labelshift/experiments/timer.py
diff --git a/labelshift/partition.py b/labelshift/partition.py
new file mode 100644
index 0000000..38cb61a
--- /dev/null
+++ b/labelshift/partition.py
@@ -0,0 +1,129 @@
+"""Partition of the real line into intervals."""
+from typing import List, Sequence, Tuple
+
+import numpy as np
+from numpy.typing import ArrayLike
+from scipy import stats
+
+_INFINITY = np.inf
+
+
+class RealLinePartition:
+    """Partitions the real line into K (disjoint) intervals."""
+
+    def __init__(self, breakpoints: Sequence[float]) -> None:
+        """
+
+        Args:
+            breakpoints: points a_1, ..., a_{K-1} determine
+              the partition into K intervals
+              (-oo, a_1), [a_1, a_2), [a_2, a_3), ..., [a_{K-1}, +oo)
+        """
+        breakpoints = list(breakpoints)
+        # Numer of intervals we want
+        K = len(breakpoints) + 1
+
+        # Make sure that the points are in ascending order
+        # and that no two values are equal.
+        # Note that we want k = 0, ...
+        # and k + 1 = 1, ..., K-2,
+        # as we have K-1 breakpoints
+        # Hence, we want to have k = 0, ..., K-3
+        for k in range(K - 2):
+            assert breakpoints[k] < breakpoints[k + 1]
+
+        self._breakpoints = np.asarray(breakpoints)
+
+        self._intervals: List[Tuple[float, float]] = (
+            [(-_INFINITY, breakpoints[0])]
+            + [(breakpoints[k], breakpoints[k + 1]) for k in range(K - 2)]
+            + [(breakpoints[-1], _INFINITY)]
+        )
+
+        assert (
+            len(self._intervals) == K
+        ), f"Expected {K} intervals, but got the following: {self._intervals}."
+
+    def __len__(self) -> int:
+        """Number of the intervals, K."""
+        return len(self._intervals)
+
+    def predict(self, X: np.ndarray) -> np.ndarray:
+        """Predicts the labels of points in `X`.
+
+        Args:
+            X: points on the real line, shape (n, 1) or (n,)
+
+        Returns:
+            array of integers valued in {0, 1, ..., K-1} with the
+              interval number, for each point in `X`
+        """
+        X = np.asarray(X).ravel()  # Shape (n,)
+        return np.asarray([self._predict_point(x) for x in X], dtype=int)
+
+    def _predict_point(self, x: float) -> int:
+        """Predict the label (interval) for point `x`."""
+        for k, (a, b) in enumerate(self._intervals):
+            if a <= x < b:
+                return k
+        raise ValueError(f"No interval containing {x}.")
+
+    def interval(self, k: int) -> Tuple[float, float]:
+        """Returns the ends of the `k`th interval.
+
+        Args:
+            k: should be valued between 0 and K-1.
+
+        Raises:
+            IndexError, if k >= K (is out of range)
+        """
+        return self._intervals[k]
+
+    @property
+    def breakpoints(self, include_inf: bool = False) -> np.ndarray:
+        """Returns the breakpoints.
+
+        Args:
+            include_inf: whether the infinities should be returned or not
+
+        Returns:
+            breakpoints. Shape (K-1,) if `include_inf` is False
+              and (K+1,) if `include_inf` is True
+        """
+        if not include_inf:
+            return self._breakpoints.copy()
+        else:
+            return np.asarray([-_INFINITY] + self._breakpoints.tolist() + [_INFINITY])
+
+
+def gaussian_probability_masses(
+    means: ArrayLike, sigmas: ArrayLike, partition: RealLinePartition
+) -> np.ndarray:
+    """
+    Args:
+        means: shape (L,)
+        sigmas: shape (L,)
+        partition: partition into K intervals
+
+    Returns:
+        matrix P(C|Y), shape (L, K). The (l,k)th entry is the probability mass
+        of the `l`th Gaussian contained in the `k`th interval
+    """
+    means = np.asarray(means)
+    sigmas = np.asarray(sigmas)
+    L = len(means)
+    assert means.shape == sigmas.shape == (L,)
+
+    K = len(partition)
+    p_c_cond_y = np.zeros((L, K))
+
+    for l in range(L):  # noqa: E741
+        for k in range(K):
+            mu, sigma = means[l], sigmas[l]
+
+            a, b = partition.interval(k)
+            p_c_cond_y[l, k] = stats.norm.cdf(b, loc=mu, scale=sigma) - stats.norm.cdf(
+                a, loc=mu, scale=sigma
+            )
+
+    return p_c_cond_y
diff --git a/labelshift/scoring.py b/labelshift/scoring.py
index c4fa163..6129120 100644
--- a/labelshift/scoring.py
+++ b/labelshift/scoring.py
@@ -206,3 +206,9 @@ def __init__(self) -> None:
 
     def _calculate_error(self, true: np.ndarray, estimated: np.ndarray) -> float:
         return self._kl.error(true, estimated) + self._kl.error(estimated, true)
+
+
+class FisherRaoDistance(MulticlassQuantificationError):
+    def error(self, true: ArrayLike, estimated: ArrayLike) -> float:
+        bhattacharyya_coefficient = np.dot(np.sqrt(true), np.sqrt(estimated))
+        return 2 * np.arccos(bhattacharyya_coefficient)
diff --git a/requirements.txt b/requirements.txt
index df3a1bc..0aa9708 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
 arviz
 numpy
+petname
 pydantic
 scikit-learn
 scipy
-
 # Code quality tools
 black
 flake8
diff --git a/scripts/design_categorical.py b/scripts/design_categorical.py
new file mode 100644
index 0000000..6e0ce90
--- /dev/null
+++ b/scripts/design_categorical.py
@@ -0,0 +1,156 @@
+"""Experimental design for the categorical experiment.
+
+Use it to generate a list of commands to be run."""
+from pathlib import Path
+from typing import Optional
+
+DIRECTORY = Path("data/generated/categorical_experiment")
+
+ESTIMATOR_CONFIGURATIONS = {
+    "MAP-1": "--algorithm MAP --bayesian-alpha 1",
+    "MAP-2": "--algorithm MAP --bayesian-alpha 2",
+    "CC": "--algorithm CC",
+    "IR": "--algorithm IR --restricted true",
+    "BBSE": "--algorithm BBSE",
+}
+
+N_SEEDS: int = 2
+
+N_LABELED: int = 1_000
+N_UNLABELED: int = 500
+QUALITY_LABELED: float = 0.85
+PI_UNLABELED: float = 0.7
+L: int = 5
+K: int = 5
+
+
+def command(
+    estimator_key: str,
+    seed: int,
+    output_dir: Path,
+    n_y: int = L,
+    n_c: int = K,
+    n_labeled: int = N_LABELED,
+    n_unlabeled: int = N_UNLABELED,
+    quality_labeled: float = QUALITY_LABELED,
+    quality_unlabeled: Optional[float] = None,
+    pi_unlabeled: float = PI_UNLABELED,
+) -> str:
+    estimator_args = ESTIMATOR_CONFIGURATIONS[estimator_key]
+
+    quality_unlabeled_str = (
+        "" if quality_unlabeled is None else f"--quality-unlabeled {quality_unlabeled}"
+    )
+
+    print(
+        f"python scripts/run_categorical.py "
+        f"--n-labeled {n_labeled} --n-unlabeled {n_unlabeled} "
+        f"--quality {quality_labeled} {quality_unlabeled_str} "
+        f"--prevalence-unlabeled {pi_unlabeled} "
+        f"--seed {seed} "
+        f"--output-dir {output_dir} "
+        f"--K {n_y} --L {n_c} "
+        f"--tag {estimator_key} {estimator_args}"
+    )
+
+
+def experiment_change_prevalence() -> None:
+    """Fix L = K = 5 and change pi'_1."""
+    for seed in range(N_SEEDS):
+        for pi_unlabeled in [0.5, 0.6, 0.7, 0.8, 0.9]:
+            for algorithm in ESTIMATOR_CONFIGURATIONS.keys():
+                output_dir = (
+                    DIRECTORY / "change_prevalence" / f"{algorithm}-{pi_unlabeled}"
+                )
+                command(
+                    output_dir=output_dir,
+                    pi_unlabeled=pi_unlabeled,
+                    seed=seed,
+                    estimator_key=algorithm,
+                )
+
+
+def experiment_change_n_unlabeled() -> None:
+    """Change N'."""
+    for seed in range(N_SEEDS):
+        for n_unlabeled in [10, 50, 100, 500, 1000, 10000]:
+            for algorithm in ESTIMATOR_CONFIGURATIONS.keys():
+                output_dir = (
+                    DIRECTORY / "change_n_unlabeled" / f"{algorithm}-{n_unlabeled}"
+                )
+                command(
+                    n_unlabeled=n_unlabeled,
+                    seed=seed,
+                    estimator_key=algorithm,
+                    output_dir=output_dir,
+                )
+
+
+def experiment_change_k() -> None:
+    """Change K, keeping L fixed."""
+    for seed in range(N_SEEDS):
+        for n_c in [2, 3, 5, 7, 9]:
+            for algorithm in ESTIMATOR_CONFIGURATIONS.keys():
+                output_dir = DIRECTORY / "change_k" / f"{algorithm}-{n_c}"
+                command(
+                    seed=seed,
+                    output_dir=output_dir,
+                    estimator_key=algorithm,
+                    n_c=n_c,
+                )
+
+
+def experiment_change_jointly_l_and_k() -> None:
+    """Jointly change L = K."""
+    for seed in range(N_SEEDS):
+        for lk in [2, 3, 5, 7, 9, 10]:
+            for algorithm in ESTIMATOR_CONFIGURATIONS.keys():
+                output_dir = DIRECTORY / "change_jointly_lk" / f"{algorithm}-{lk}"
+                command(
+                    seed=seed,
+                    estimator_key=algorithm,
+                    output_dir=output_dir,
+                    n_c=lk,
+                    n_y=lk,
+                )
+
+
+def experiment_change_quality() -> None:
+    """Change quality."""
+    for seed in range(N_SEEDS):
+        for quality in [0.55, 0.65, 0.75, 0.85, 0.95]:
+            for algorithm in ESTIMATOR_CONFIGURATIONS.keys():
+                output_dir = DIRECTORY / "change_quality" / f"{algorithm}-{quality}"
+                command(
+                    quality_labeled=quality,
+                    seed=seed,
+                    estimator_key=algorithm,
+                    output_dir=output_dir,
+                )
+
+
+def experiment_misspecified() -> None:
+    """Change quality in the unlabeled population, so that the model is misspecified."""
+    for seed in range(N_SEEDS):
+        for quality_prime in [0.45, 0.55, 0.65, 0.75, 0.80, 0.85, 0.90, 0.95]:
+            for algorithm in ESTIMATOR_CONFIGURATIONS.keys():
+                output_dir = DIRECTORY / "misspecified" / f"{algorithm}-{quality_prime}"
+                command(
+                    quality_unlabeled=quality_prime,
+                    seed=seed,
+                    output_dir=output_dir,
+                    estimator_key=algorithm,
+                )
+
+
+def main() -> None:
+    experiment_change_prevalence()
+    experiment_change_n_unlabeled()
+    experiment_change_quality()
+    experiment_change_jointly_l_and_k()
+    experiment_change_k()
+    experiment_misspecified()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/experiment1/1-1.py b/scripts/experiment1/1-1.py
new file mode 100644
index 0000000..faf0d71
--- /dev/null
+++ b/scripts/experiment1/1-1.py
@@ -0,0 +1,32 @@
+"""
+Fixed q = 0.85, N' = 500 and changed the prevalence π'1 in range {0.5, 0.6, 0.7, 0.8, 0.9}.
+"""
+algorithms = [
+    "ClassifyAndCount",
+    "RatioEstimator",
+    "BlackBoxShiftEstimator",
+    "BayesianMAP",
+]
+
+
+def main() -> None:
+    n_labeled = 1000
+    n_unlabeled = 500
+    quality = 0.85
+    n_seeds = 30
+
+    pi_labeled = 0.5
+
+    for pi_unlabeled in [0.5, 0.6, 0.7, 0.8, 0.9]:
+        for seed in range(n_seeds):
+            for algorithm in algorithms:
+                try:
+                    output_dir = f"experiment1-1/{algorithm}"
+                    command = f"python scripts/experiment_categorical.py --n-labeled {n_labeled} --n-unlabeled {n_unlabeled} --quality {quality} --pi-labeled {pi_labeled} --pi-unlabeled {pi_unlabeled} --seed {seed} --algorithm {algorithm} --output-dir {output_dir}"
+                    print(command)
+                except Exception as e:
+                    print(e)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/experiment1/1-2.py b/scripts/experiment1/1-2.py
new file mode 100644
index 0000000..9a924d0
--- /dev/null
+++ b/scripts/experiment1/1-2.py
@@ -0,0 +1,33 @@
+"""
+Fixed q = 0.85, π′1 = 0.7 and changed N' in range
+{10, 50, 100, 500, 1000, 10000}
+"""
+algorithms = [
+    "ClassifyAndCount",
+    "RatioEstimator",
+    "BlackBoxShiftEstimator",
+    "BayesianMAP",
+]
+
+
+def main() -> None:
+    n_labeled = 1000
+    quality = 0.85
+    n_seeds = 30
+
+    pi_unlabeled = 0.7
+    pi_labeled = 0.5
+
+    for n_unlabeled in [10, 50, 100, 500, 1000, 10000]:
+        for seed in range(n_seeds):
+            for algorithm in algorithms:
+                try:
+                    output_dir = f"experiment1-2/{algorithm}"
+                    command = f"python scripts/experiment_categorical.py --n-labeled {n_labeled} --n-unlabeled {n_unlabeled} --quality {quality} --pi-labeled {pi_labeled} --pi-unlabeled {pi_unlabeled} --seed {seed} --algorithm {algorithm} --output-dir {output_dir}"
+                    print(command)
+                except Exception as e:
+                    print(e)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/experiment1/1-3.py b/scripts/experiment1/1-3.py
new file mode 100644
index 0000000..3389f8b
--- /dev/null
+++ b/scripts/experiment1/1-3.py
@@ -0,0 +1,33 @@
+"""
+Fixed π′1 = 0.7, N'= 500 and changed q in range
+{0.55, 0.65, 0.75, 0.85, 0.95}
+"""
+algorithms = [
+    "ClassifyAndCount",
+    "RatioEstimator",
+    "BlackBoxShiftEstimator",
+    "BayesianMAP",
+]
+
+
+def main() -> None:
+    n_labeled = 1000
+    n_unlabeled = 500
+    n_seeds = 30
+
+    pi_unlabeled = 0.7
+    pi_labeled = 0.5
+
+    for quality in [0.55, 0.65, 0.75, 0.85, 0.95]:
+        for seed in range(n_seeds):
+            for algorithm in algorithms:
+                try:
+                    output_dir = f"experiment1-3/{algorithm}"
+                    command = f"python scripts/experiment_categorical.py --n-labeled {n_labeled} --n-unlabeled {n_unlabeled} --quality {quality} --pi-labeled {pi_labeled} --pi-unlabeled {pi_unlabeled} --seed {seed} --algorithm {algorithm} --output-dir {output_dir}"
+                    print(command)
+                except Exception as e:
+                    print(e)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/experiment1/plot_figure.py b/scripts/experiment1/plot_figure.py
new file mode 100644
index 0000000..d0aa63a
--- /dev/null
+++ b/scripts/experiment1/plot_figure.py
@@ -0,0 +1,97 @@
+import json
+import string
+from pathlib import Path
+
+import seaborn as sns
+import matplotlib.pyplot as plt
+import pandas as pd
+
+
+rename_dict = {
+    "ClassifyAndCount": "CC",
+    "RatioEstimator": "IR",
+    "BlackBoxShiftEstimator": "BBSE",
+    "BayesianMAP": "MAP",
+}
+
+hue_order = [
+    "CC",
+    "IR",
+    "BBSE",
+    "MAP",
+]
+
+
+def file_to_row(file):
+    with open(file) as f:
+        x = json.load(f)
+    return {
+        "Algorithm": rename_dict[x["algorithm"]],
+        "true": x["true"][0],
+        "estimated": x["estimated"][0],
+        "quality": x["sampler"]["p_c_cond_y"][0][0],
+        "n_labeled": x["sampler"]["n_labeled"],
+        "n_unlabeled": x["sampler"]["n_unlabeled"],
+    }
+
+
+def experiment_directory_to_dataframe(experiment_directory) -> pd.DataFrame:
+    files = list(
+        Path(experiment_directory).rglob(
+            "*.json",
+        )
+    )
+    df = pd.DataFrame([file_to_row(f) for f in files])
+    df["error"] = df["estimated"] - df["true"]
+    return df
+
+
+def main() -> None:
+    fig, axs = plt.subplots(3, 1, figsize=(4, 12), sharey=False)
+
+    experiment1 = "experiment1-1"
+    df1 = experiment_directory_to_dataframe(experiment1)
+    sns.boxplot(
+        df1, x="true", y="error", hue="Algorithm", ax=axs[0], hue_order=hue_order
+    )
+    axs[0].set_xlabel(r"Prevalence $\pi'_1$")
+    axs[0].set_ylabel(r"Signed difference $\hat \pi'_1 - \pi'_1$")
+
+    experiment2 = "experiment1-2"
+    df2 = experiment_directory_to_dataframe(experiment2)
+    sns.boxplot(
+        df2, x="n_unlabeled", y="error", hue="Algorithm", ax=axs[1], hue_order=hue_order
+    )
+
+    axs[1].set_xlabel(r"Unlabeled data set size $N'$")
+    axs[1].set_ylabel(r"Signed difference $\hat \pi'_1 - \pi'_1$")
+    axs[1].legend([], [], frameon=False)
+
+    experiment3 = "experiment1-3"
+    df3 = experiment_directory_to_dataframe(experiment3)
+    sns.boxplot(
+        df3, x="quality", y="error", hue="Algorithm", ax=axs[2], hue_order=hue_order
+    )
+
+    axs[2].set_xlabel(r"Classifier quality $q$")
+    axs[2].set_ylabel(r"Signed difference $\hat \pi'_1 - \pi'_1$")
+    axs[2].legend([], [], frameon=False)
+
+    for n, ax in enumerate(axs):
+        ax.text(
+            -0.1,
+            1.1,
+            string.ascii_uppercase[n],
+            transform=ax.transAxes,
+            size=20,
+            weight="bold",
+        )
+
+    sns.move_legend(axs[0], "lower left")  # , bbox_to_anchor=(1, 1))
+
+    fig.tight_layout()
+    fig.savefig("experiment1.pdf")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/experiment_external_dataset.py b/scripts/experiment_external_dataset.py
new file mode 100644
index 0000000..a5a892c
--- /dev/null
+++ b/scripts/experiment_external_dataset.py
@@ -0,0 +1,134 @@
+import enum
+
+import numpy as np
+import sklearn.datasets
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+
+import labelshift.datasets.split as split
+import labelshift.summary_statistic as summ
+
+import labelshift.algorithms.api as algos
+import labelshift.algorithms.ratio_estimator as re
+from labelshift.algorithms.expectation_maximization import expectation_maximization
+
+
+class Algorithm(enum.Enum):
+    EM = "ExpectationMaximization"
+    CC = "ClassifyAndCount"
+    BBSE_HARD = "BBSE-Hard"
+    RATIO_HARD = "InvariantRatio-Hard"
+    BAYESIAN = "Bayesian-MAP"
+    RATIO_SOFT = "InvariantRatio-Soft"
+
+
+def get_estimate(
+    algorithm: Algorithm,
+    n_y_c_labeled: np.ndarray,
+    n_c_unlabeled: np.ndarray,
+    y_labeled: np.ndarray,
+    prob_c_labeled: np.ndarray,
+    prob_c_unlabeled: np.ndarray,
+    labeled_prevalence: np.ndarray,
+) -> np.ndarray:
+    """Function running the (point) prevalence estimator.
+
+    Args:
+        algorithm: estimator
+        n_y_c_labeled: matrix with counts of predictions and true values, shape (L, K)
+        n_c_unlabeled: vector with prediction counts on unlabeled data set, shape (K,)
+        y_labeled: true labels in the labeled data set, shape (N,)
+        prob_c_labeled: predictions of the classifier on the labeled data set, shape (N, K)
+        prob_c_unlabeled: predictions of the classifier on the unlabeled data set, shape (N', K)
+        labeled_prevalence: prevalence vector on the labeled distribution, shape (L,)
+    """
+    summary_statistic = algos.SummaryStatistic(
+        n_y_labeled=None, n_y_and_c_labeled=n_y_c_labeled, n_c_unlabeled=n_c_unlabeled
+    )
+
+    if algorithm == Algorithm.EM:
+        return expectation_maximization(
+            predictions=prob_c_unlabeled, training_prevalences=labeled_prevalence
+        )
+    elif algorithm == Algorithm.CC:
+        return algos.ClassifyAndCount().estimate_from_summary_statistic(
+            summary_statistic
+        )
+    elif algorithm == Algorithm.BBSE_HARD:
+        return algos.BlackBoxShiftEstimator(
+            p_y_labeled=labeled_prevalence
+        ).estimate_from_summary_statistic(summary_statistic)
+    elif algorithm == Algorithm.RATIO_HARD:
+        return algos.InvariantRatioEstimator(
+            restricted=True
+        ).estimate_from_summary_statistic(summary_statistic)
+    elif algorithm == Algorithm.BAYESIAN:
+        return algos.DiscreteCategoricalMAPEstimator().estimate_from_summary_statistic(
+            summary_statistic
+        )
+    elif algorithm == Algorithm.RATIO_SOFT:
+        return re.calculate_vector_and_matrix_from_predictions(
+            unlabeled_predictions=prob_c_unlabeled,
+            labeled_predictions=prob_c_labeled,
+            labeled_ground_truth=y_labeled,
+        )
+    else:
+        raise ValueError(f"Algorithm {algorithm} not recognized.")
+
+
+def main() -> None:
+    L = 2
+    K = L
+    dataset = sklearn.datasets.load_breast_cancer()
+    print(len(dataset.target))
+
+    random_seed: int = 22
+    n_training_examples: int = 200
+    n_labeled_examples: int = 100
+    n_unlabeled_examples: int = 150
+    prevalence_labeled: np.ndarray = np.ones(2) / 2
+    prevalence_unlabeled: np.ndarray = np.asarray([0.3, 0.7])
+
+    specification = split.SplitSpecification(
+        train=np.asarray(prevalence_labeled * n_training_examples, dtype=int).tolist(),
+        valid=np.asarray(prevalence_labeled * n_labeled_examples, dtype=int).tolist(),
+        test=np.asarray(
+            prevalence_unlabeled * n_unlabeled_examples, dtype=int
+        ).tolist(),
+    )
+
+    datasets = split.split_dataset(
+        dataset=dataset, specification=specification, random_seed=random_seed
+    )
+
+    classifier = DecisionTreeClassifier(random_state=random_seed + 1)
+    classifier = RandomForestClassifier(random_state=random_seed + 1)
+    classifier = LogisticRegression(random_state=random_seed + 1)
+    classifier.fit(datasets.train_x, datasets.train_y)
+
+    # The count values
+    n_y_c_labeled = summ.count_values_joint(
+        L, K, datasets.valid_y, classifier.predict(datasets.valid_x)
+    )
+    n_c_unlabeled = summ.count_values(K, classifier.predict(datasets.test_x))
+
+    labeled_probabilities = classifier.predict_proba(datasets.valid_x)
+    unlabeled_probabilities = classifier.predict_proba(datasets.test_x)
+
+    for alg in Algorithm:
+        print(alg)
+        estimate = get_estimate(
+            algorithm=alg,
+            n_y_c_labeled=n_y_c_labeled,
+            n_c_unlabeled=n_c_unlabeled,
+            y_labeled=datasets.valid_y,
+            prob_c_labeled=labeled_probabilities,
+            prob_c_unlabeled=unlabeled_probabilities,
+            labeled_prevalence=prevalence_labeled,
+        )
+        print(estimate)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/experiment_external_dataset2.py b/scripts/experiment_external_dataset2.py
new file mode 100644
index 0000000..e143fef
--- /dev/null
+++ b/scripts/experiment_external_dataset2.py
@@ -0,0 +1,179 @@
+import enum
+
+import arviz as az
+import matplotlib.pyplot as plt
+import numpy as np
+import pymc as pm
+import sklearn.datasets
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+
+import labelshift.datasets.split as split
+import labelshift.summary_statistic as summ
+
+import labelshift.algorithms.api as algos
+import labelshift.algorithms.ratio_estimator as re
+import labelshift.algorithms.bayesian_discrete as bay
+from labelshift.algorithms.expectation_maximization import expectation_maximization
+
+plt.rcParams.update({"font.size": 14})
+
+
+class Algorithm(enum.Enum):
+    EM = "EM"
+    CC = "CC"
+    BBSE_HARD = "BBSE"
+    RATIO_HARD = "IR: hard"
+    RATIO_SOFT = "IR: soft"
+
+
+def get_estimate(
+    algorithm: Algorithm,
+    n_y_c_labeled: np.ndarray,
+    n_c_unlabeled: np.ndarray,
+    y_labeled: np.ndarray,
+    prob_c_labeled: np.ndarray,
+    prob_c_unlabeled: np.ndarray,
+    labeled_prevalence: np.ndarray,
+) -> np.ndarray:
+    """Function running the (point) prevalence estimator.
+
+    Args:
+        algorithm: estimator
+        n_y_c_labeled: matrix with counts of predictions and true values, shape (L, K)
+        n_c_unlabeled: vector with prediction counts on unlabeled data set, shape (K,)
+        y_labeled: true labels in the labeled data set, shape (N,)
+        prob_c_labeled: predictions of the classifier on the labeled data set, shape (N, K)
+        prob_c_unlabeled: predictions of the classifier on the unlabeled data set, shape (N', K)
+        labeled_prevalence: prevalence vector on the labeled distribution, shape (L,)
+    """
+    summary_statistic = algos.SummaryStatistic(
+        n_y_labeled=None, n_y_and_c_labeled=n_y_c_labeled, n_c_unlabeled=n_c_unlabeled
+    )
+
+    if algorithm == Algorithm.EM:
+        return expectation_maximization(
+            predictions=prob_c_unlabeled, training_prevalences=labeled_prevalence
+        )
+    elif algorithm == Algorithm.CC:
+        return algos.ClassifyAndCount().estimate_from_summary_statistic(
+            summary_statistic
+        )
+    elif algorithm == Algorithm.BBSE_HARD:
+        return algos.BlackBoxShiftEstimator(
+            p_y_labeled=labeled_prevalence
+        ).estimate_from_summary_statistic(summary_statistic)
+    elif algorithm == Algorithm.RATIO_HARD:
+        return algos.InvariantRatioEstimator(
+            restricted=True
+        ).estimate_from_summary_statistic(summary_statistic)
+    elif algorithm == Algorithm.RATIO_SOFT:
+        return re.calculate_vector_and_matrix_from_predictions(
+            unlabeled_predictions=prob_c_unlabeled,
+            labeled_predictions=prob_c_labeled,
+            labeled_ground_truth=y_labeled,
+        )
+    else:
+        raise ValueError(f"Algorithm {algorithm} not recognized.")
+
+
+def main() -> None:
+    L = 2
+    K = L
+    dataset = sklearn.datasets.load_breast_cancer()
+    print(len(dataset.target))
+
+    ymax: float = 7.0
+    random_seed: int = 20
+    n_training_examples: int = 200
+    n_labeled_examples: int = 100
+    n_unlabeled_examples: int = 150
+    prevalence_labeled: np.ndarray = np.ones(2) / 2
+    prevalence_unlabeled: np.ndarray = np.asarray([0.3, 0.7])
+
+    specification = split.SplitSpecification(
+        train=np.asarray(prevalence_labeled * n_training_examples, dtype=int).tolist(),
+        valid=np.asarray(prevalence_labeled * n_labeled_examples, dtype=int).tolist(),
+        test=np.asarray(
+            prevalence_unlabeled * n_unlabeled_examples, dtype=int
+        ).tolist(),
+    )
+
+    datasets = split.split_dataset(
+        dataset=dataset, specification=specification, random_seed=random_seed
+    )
+
+    # classifier = DecisionTreeClassifier(random_state=random_seed + 1)
+    classifier = RandomForestClassifier(random_state=random_seed + 1)
+    # classifier = LogisticRegression(random_state=random_seed + 1)
+    classifier.fit(datasets.train_x, datasets.train_y)
+
+    # The count values
+    n_y_c_labeled = summ.count_values_joint(
+        L, K, datasets.valid_y, classifier.predict(datasets.valid_x)
+    )
+    n_c_unlabeled = summ.count_values(K, classifier.predict(datasets.test_x))
+
+    labeled_probabilities = classifier.predict_proba(datasets.valid_x)
+    unlabeled_probabilities = classifier.predict_proba(datasets.test_x)
+
+    with bay.build_model(
+        n_y_and_c_labeled=n_y_c_labeled,
+        n_c_unlabeled=n_c_unlabeled,
+    ):
+        idata = pm.sample()
+
+    fig, ax = plt.subplots(figsize=(6, 4))
+    _, ax_trash = plt.subplots()
+
+    az.plot_posterior(idata, ax=[ax, ax_trash], var_names=bay.P_TEST_Y)
+    ax.set_title(r"$\pi'_1$ posterior")
+
+    ax.vlines(
+        x=prevalence_unlabeled[0],
+        ymin=0,
+        ymax=ymax,
+        label="Ground truth",
+        colors=["k"],
+        linestyles=["--"],
+    )
+
+    linestyles = [
+        "dashdot",
+        (0, (1, 1)),
+        "solid",
+        "dashed",
+        (0, (3, 10, 1, 10)),
+    ]
+
+    for i, alg in enumerate(Algorithm):
+        print(alg)
+        estimate = get_estimate(
+            algorithm=alg,
+            n_y_c_labeled=n_y_c_labeled,
+            n_c_unlabeled=n_c_unlabeled,
+            y_labeled=datasets.valid_y,
+            prob_c_labeled=labeled_probabilities,
+            prob_c_unlabeled=unlabeled_probabilities,
+            labeled_prevalence=prevalence_labeled,
+        )
+
+        ax.vlines(
+            estimate[0],
+            ymin=0,
+            ymax=ymax,
+            label=alg.value,
+            colors=[f"C{i+2}"],
+            linestyles=[linestyles[i]],
+        )
+
+        print(estimate)
+
+    fig.legend()
+    fig.tight_layout()
+    fig.savefig("plot_cancer.pdf")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/experiment_gaussian.py b/scripts/experiment_gaussian.py
new file mode 100644
index 0000000..285980d
--- /dev/null
+++ b/scripts/experiment_gaussian.py
@@ -0,0 +1,144 @@
+"""This experiment plots the posterior in the Gaussian mixture model as well
+as a discretized version of that.
+"""
+import string
+from typing import List
+
+import arviz as az
+import matplotlib.pyplot as plt
+import numpy as np
+import pymc as pm
+import seaborn as sns
+
+import labelshift.partition as part
+import labelshift.summary_statistic as summ
+import labelshift.algorithms.bayesian_discrete as discrete
+
+
+plt.rcParams.update({"font.size": 22})
+
+
+def plot_distributions(
+    ax: plt.Axes,
+    X: np.ndarray,
+    X1: np.ndarray,
+    breakpoints: np.ndarray,
+    height: float = 1.0,
+) -> None:
+    """
+
+    Args:
+        ax: axes where to draw the plot
+        X: points from the labeled distribution, shape (n_labeled,)
+        X1: points from the unlabeled distribution, shape (n_unlabeled,)
+        breakpoints: breakpoints to be plotted, shape (n_breakpoints,)
+    """
+    sns.kdeplot(data=np.hstack(X), ax=ax)
+    sns.kdeplot(data=np.hstack(X1), ax=ax)
+
+    for bp in breakpoints:
+        ax.axvline(bp, ymax=height, linestyle="--", c="k", alpha=0.5)
+
+
+def gaussian_model(
+    labeled_data: List[np.ndarray], unlabeled_data: np.ndarray
+) -> pm.Model:
+    """
+    Args:
+        labeled_data: list of samples attributed to each Y:
+            [
+              [a1, ..., a_n0],
+              [b1, ..., b_n1]
+            ]
+        unlabeled_data: array of shape (n_unlabeled,)
+    """
+    with pm.Model() as model:
+        mu = pm.Normal("mu", mu=0, sigma=1, shape=2)
+        sigma = pm.HalfNormal("sigma", sigma=1, shape=2)
+
+        for i in range(2):
+            pm.Normal(
+                f"X_labeled{i}", mu=mu[i], sigma=sigma[i], observed=labeled_data[i]
+            )
+
+        weights = pm.Dirichlet("P_unlabeled(Y)", np.ones(2))
+
+        pm.NormalMixture(
+            "X_unlabeled", w=weights, mu=mu, sigma=sigma, observed=unlabeled_data
+        )
+
+    return model
+
+
+def main() -> None:
+    """The main method."""
+    mus = [0.0, 1.0]
+    sigmas = [0.3, 0.4]
+    ns = [500, 500]
+    ns_ = [200, 800]
+    K = 7
+    L = 2
+
+    partition = part.RealLinePartition(np.linspace(-0.5, 1.5, K - 1))
+    print(partition.breakpoints)
+
+    assert len(partition) == K
+
+    rng = np.random.default_rng(42)
+
+    X_stratified = [
+        rng.normal(loc=mu, scale=sigma, size=n) for mu, sigma, n in zip(mus, sigmas, ns)
+    ]
+    X = np.hstack(X_stratified)
+    Y = np.hstack([[i] * n for i, n in enumerate(ns)])
+
+    C = partition.predict(X)
+
+    X1_stratified = [
+        rng.normal(loc=mu, scale=sigma, size=n_)
+        for mu, sigma, n_ in zip(mus, sigmas, ns_)
+    ]
+    X1 = np.hstack(X1_stratified)
+    C1 = partition.predict(X1)
+
+    n_c_unlabeled = summ.count_values(K, C1)
+    n_y_c_labeled = summ.count_values_joint(L, K, Y, C)
+
+    print(n_c_unlabeled)
+    print(n_y_c_labeled)
+
+    fig, axs = plt.subplots(3, figsize=(6, 9))
+    plot_distributions(ax=axs[0], X=X, X1=X1, breakpoints=partition.breakpoints)
+
+    with gaussian_model(labeled_data=X_stratified, unlabeled_data=X1):
+        gaussian_data = pm.sample()
+
+    _, ax_trash = plt.subplots()
+
+    az.plot_posterior(gaussian_data, ax=[axs[1], ax_trash], var_names="P_unlabeled(Y)")
+    axs[1].set_title(r"$\pi'_1$ (Gaussian)")
+
+    with discrete.build_model(
+        n_y_and_c_labeled=n_y_c_labeled, n_c_unlabeled=n_c_unlabeled
+    ):
+        discrete_data = pm.sample()
+
+    az.plot_posterior(discrete_data, ax=[axs[2], ax_trash], var_names=discrete.P_TEST_Y)
+    axs[2].set_title(r"$\pi'_1$ (Discrete)")
+
+    for n, ax in enumerate(axs):
+        ax.text(
+            -0.1,
+            1.1,
+            string.ascii_uppercase[n],
+            transform=ax.transAxes,
+            size=20,
+            weight="bold",
+        )
+
+    fig.tight_layout()
+    fig.savefig("plot.pdf")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/run_categorical.py b/scripts/run_categorical.py
new file mode 100644
index 0000000..496f58c
--- /dev/null
+++ b/scripts/run_categorical.py
@@ -0,0 +1,256 @@
+"""Sample data directly from P(C|Y) distribution and run specified quantification estimator."""
+import argparse
+import enum
+from pathlib import Path
+from typing import List
+
+import pydantic
+
+import labelshift.interfaces.point_estimators as pe
+import labelshift.datasets.discrete_categorical as dc
+import labelshift.algorithms.api as algo
+import labelshift.experiments.api as exp
+
+
+class Algorithm(enum.Enum):
+    CLASSIFY_AND_COUNT = "CC"
+    RATIO_ESTIMATOR = "IR"
+    BBSE = "BBSE"
+    BAYESIAN = "MAP"
+
+
+def create_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--n-labeled", type=int, default=1_000, help="Number of labeled examples."
+    )
+    parser.add_argument(
+        "--n-unlabeled", type=int, default=1_000, help="Number of unlabeled examples."
+    )
+    parser.add_argument(
+        "--quality",
+        type=float,
+        default=0.85,
+        help="Quality of the classifier on the labeled data.",
+    )
+    parser.add_argument(
+        "--quality-unlabeled",
+        type=float,
+        default=None,
+        help="Quality of the classifier on the unlabeled data."
+        "Can be used to assess model misspecification. "
+        "If None, the quality will be the same for both labeled"
+        "and unlabeled data set (no misspecification).",
+    )
+    parser.add_argument("--L", type=int, default=2, help="Number of classes L.")
+    parser.add_argument(
+        "--K",
+        type=int,
+        default=None,
+        help="Number of available predictions. Default: the same as L.",
+    )
+    parser.add_argument(
+        "--prevalence-labeled",
+        type=float,
+        default=None,
+        help="Prevalence of the first class in the labeled data set. Default: 1/L (uniform).",
+    )
+    parser.add_argument(
+        "--prevalence-unlabeled",
+        type=float,
+        default=None,
+        help="Prevalence of the first class in the unlabeled data set. Default: 1/L (uniform).",
+    )
+    parser.add_argument(
+        "--seed", type=int, default=1, help="Random seed to sample the data."
+    )
+    parser.add_argument("--algorithm", type=Algorithm, default=Algorithm.BAYESIAN)
+    parser.add_argument(
+        "--output", type=Path, default=Path(f"{exp.generate_name()}.json")
+    )
+    parser.add_argument("--output-dir", type=Path, default=None)
+
+    parser.add_argument(
+        "--bayesian-alpha",
+        type=float,
+        default=1.0,
+        help="Dirichlet prior specification for the Bayesian quantification.",
+    )
+    parser.add_argument(
+        "--restricted",
+        type=bool,
+        default=True,
+        help="Whether to use restricted invariant ratio estimator.",
+    )
+
+    parser.add_argument(
+        "--tag", type=str, default="", help="Can be used to tag the run."
+    )
+
+    parser.add_argument("--dry-run", action="store_true")
+
+    return parser
+
+
+class EstimatorArguments(pydantic.BaseModel):
+    bayesian_alpha: float
+    restricted: bool
+
+
+class Arguments(pydantic.BaseModel):
+    p_y_labeled: pydantic.confloat(gt=0, lt=1)
+    p_y_unlabeled: pydantic.confloat(gt=0, lt=1)
+
+    quality_labeled: pydantic.confloat(ge=0, le=1)
+    quality_unlabeled: pydantic.confloat(ge=0, le=1)
+
+    n_y: pydantic.PositiveInt = pydantic.Field(description="Number of labels, L.")
+    n_c: pydantic.PositiveInt = pydantic.Field(description="Number of predictions, K.")
+
+    n_labeled: pydantic.PositiveInt
+    n_unlabeled: pydantic.PositiveInt
+
+    seed: int
+
+    algorithm: Algorithm
+    tag: str
+    estimator_arguments: EstimatorArguments
+
+
+def parse_args(args) -> Arguments:
+    n_y = args.L
+    n_c = exp.calculate_value(overwrite=args.K, default=n_y)
+
+    quality_unlabeled = exp.calculate_value(
+        overwrite=args.quality_unlabeled, default=args.quality
+    )
+
+    p_y_labeled = exp.calculate_value(
+        overwrite=args.prevalence_labeled, default=1 / n_y
+    )
+    p_y_unlabeled = exp.calculate_value(
+        overwrite=args.prevalence_unlabeled, default=1 / n_y
+    )
+
+    return Arguments(
+        p_y_labeled=p_y_labeled,
+        p_y_unlabeled=p_y_unlabeled,
+        quality_labeled=args.quality,
+        quality_unlabeled=quality_unlabeled,
+        n_y=n_y,
+        n_c=n_c,
+        seed=args.seed,
+        n_labeled=args.n_labeled,
+        n_unlabeled=args.n_unlabeled,
+        algorithm=args.algorithm,
+        tag=args.tag,
+        estimator_arguments=EstimatorArguments(
+            bayesian_alpha=args.bayesian_alpha,
+            restricted=args.restricted,
+        ),
+    )
+
+
+def create_sampler(args: Arguments) -> dc.DiscreteSampler:
+    L = args.n_y
+    p_y_labeled = dc.almost_eye(L, L, diagonal=args.p_y_labeled)[0, :]
+    p_y_unlabeled = dc.almost_eye(L, L, diagonal=args.p_y_unlabeled)[0, :]
+
+    p_c_cond_y_labeled = dc.almost_eye(
+        y=L,
+        c=args.n_c,
+        diagonal=args.quality_labeled,
+    )
+    p_c_cond_y_unlabeled = dc.almost_eye(
+        y=L,
+        c=args.n_c,
+        diagonal=args.quality_unlabeled,
+    )
+
+    return dc.discrete_sampler_factory(
+        p_y_labeled=p_y_labeled,
+        p_y_unlabeled=p_y_unlabeled,
+        p_c_cond_y_labeled=p_c_cond_y_labeled,
+        p_c_cond_y_unlabeled=p_c_cond_y_unlabeled,
+    )
+
+
+def get_estimator(args: Arguments) -> pe.SummaryStatisticPrevalenceEstimator:
+    if args.algorithm == Algorithm.CLASSIFY_AND_COUNT:
+        if args.n_c != args.n_y:
+            raise ValueError("For classify and count you need K = L.")
+        return algo.ClassifyAndCount()
+    elif args.algorithm == Algorithm.RATIO_ESTIMATOR:
+        return algo.InvariantRatioEstimator(
+            restricted=args.estimator_arguments.restricted, enforce_square=False
+        )
+    elif args.algorithm == Algorithm.BBSE:
+        return algo.BlackBoxShiftEstimator(enforce_square=False)
+    elif args.algorithm == Algorithm.BAYESIAN:
+        return algo.DiscreteCategoricalMAPEstimator(
+            alpha_unlabeled=args.estimator_arguments.bayesian_alpha
+        )
+    else:
+        raise ValueError(f"Algorithm {args.algorithm} not recognized.")
+
+
+class Result(pydantic.BaseModel):
+    p_y_unlabeled_true: List[float]
+    p_y_unlabeled_estimate: List[float]
+    time: float
+    algorithm: Algorithm
+
+    input_arguments: Arguments
+
+
+def dry_run(args: Arguments) -> None:
+    print("-- Dry run --\nUsed settings:")
+    print(args)
+    print("Exiting...")
+
+
+def main() -> None:
+    """The main function of the experiment."""
+    raw_args = create_parser().parse_args()
+    args: Arguments = parse_args(raw_args)
+
+    if raw_args.dry_run:
+        dry_run(args)
+        return
+
+    sampler = create_sampler(args)
+
+    summary_statistic = sampler.sample_summary_statistic(
+        n_labeled=args.n_labeled,
+        n_unlabeled=args.n_unlabeled,
+        seed=args.seed,
+    )
+
+    estimator = get_estimator(args)
+    timer = exp.Timer()
+    estimate = estimator.estimate_from_summary_statistic(summary_statistic)
+    elapsed_time = timer.check()
+
+    result = Result(
+        algorithm=args.algorithm,
+        time=elapsed_time,
+        p_y_unlabeled_true=sampler.unlabeled.p_y.tolist(),
+        p_y_unlabeled_estimate=estimate.tolist(),
+        input_arguments=args,
+    )
+
+    if raw_args.output_dir is not None:
+        raw_args.output_dir.mkdir(exist_ok=True, parents=True)
+        output_path = raw_args.output_dir / raw_args.output
+    else:
+        output_path = raw_args.output
+
+    with open(output_path, "w") as f:
+        f.write(result.json())
+
+    print(result)
+    print("Finished.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/setup.cfg b/setup.cfg
index 4aa5df7..e2b17dd 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -23,6 +23,7 @@ setup_requires =
 install_requires =
     arviz
     numpy >= 1.12,<2
+    petname
     pydantic
     pymc
     scikit-learn
diff --git a/tests/algorithms/test_bayesian_discrete.py b/tests/algorithms/test_bayesian_discrete.py
index 81d462a..e9e23ac 100644
--- a/tests/algorithms/test_bayesian_discrete.py
+++ b/tests/algorithms/test_bayesian_discrete.py
@@ -19,8 +19,10 @@ def test_right_values(n_labeled: int = 10_000, n_unlabeled: int = 10_000) -> Non
         ]
     )
 
-    sampler = dc.DiscreteSampler(
-        p_y_labeled=[0.4, 0.6], p_y_unlabeled=p_y_unlabeled, p_c_cond_y=p_c_cond_y
+    sampler = dc.discrete_sampler_factory(
+        p_y_labeled=[0.4, 0.6],
+        p_y_unlabeled=p_y_unlabeled,
+        p_c_cond_y_labeled=p_c_cond_y,
     )
     statistic = sampler.sample_summary_statistic(
         n_labeled=n_labeled, n_unlabeled=n_unlabeled, seed=111
diff --git a/tests/algorithms/test_bbse.py b/tests/algorithms/test_bbse.py
index 75e0878..0c9e507 100644
--- a/tests/algorithms/test_bbse.py
+++ b/tests/algorithms/test_bbse.py
@@ -65,8 +65,10 @@ def test_bbse_from_sufficient_statistic(
         ]
     )
 
-    sampler = dc.DiscreteSampler(
-        p_y_labeled=p_y_labeled, p_y_unlabeled=p_y_unlabeled, p_c_cond_y=p_c_cond_y
+    sampler = dc.discrete_sampler_factory(
+        p_y_labeled=p_y_labeled,
+        p_y_unlabeled=p_y_unlabeled,
+        p_c_cond_y_labeled=p_c_cond_y,
     )
     statistic = sampler.sample_summary_statistic(
         n_labeled=n_labeled, n_unlabeled=n_unlabeled, seed=111
diff --git a/tests/algorithms/test_ratio_estimator.py b/tests/algorithms/test_ratio_estimator.py
index c88c58b..a13a67a 100644
--- a/tests/algorithms/test_ratio_estimator.py
+++ b/tests/algorithms/test_ratio_estimator.py
@@ -27,8 +27,10 @@ def test_ratio_estimator_from_sufficient_statistic(
         ]
     )
 
-    sampler = dc.DiscreteSampler(
-        p_y_labeled=p_y_labeled, p_y_unlabeled=p_y_unlabeled, p_c_cond_y=p_c_cond_y
+    sampler = dc.discrete_sampler_factory(
+        p_y_labeled=p_y_labeled,
+        p_y_unlabeled=p_y_unlabeled,
+        p_c_cond_y_labeled=p_c_cond_y,
     )
     statistic = sampler.sample_summary_statistic(
         n_labeled=n_labeled, n_unlabeled=n_unlabeled, seed=111
diff --git a/tests/datasets/test_discrete_categorical.py b/tests/datasets/test_discrete_categorical.py
index 1f30ec7..eb1f5d9 100644
--- a/tests/datasets/test_discrete_categorical.py
+++ b/tests/datasets/test_discrete_categorical.py
@@ -70,14 +70,14 @@ def test_p_y_labeled(self, n_c: int, n_labeled: int, p_y: Sequence[float]) -> No
         p_c_cond_y = rng.dirichlet(alpha=np.ones(n_c), size=len(p_y))
         p_y_unlabeled = np.ones_like(p_y) / len(p_y)
 
-        sampler = dc.DiscreteSampler(
+        sampler = dc.discrete_sampler_factory(
             p_y_labeled=p_y,
             p_y_unlabeled=p_y_unlabeled,
-            p_c_cond_y=p_c_cond_y,
+            p_c_cond_y_labeled=p_c_cond_y,
         )
 
         summary_stats = sampler.sample_summary_statistic(
-            n_labeled=n_labeled, n_unlabeled=1
+            n_labeled=n_labeled, n_unlabeled=1, seed=12
         )
 
         assert summary_stats.n_y_labeled.sum() == n_labeled
@@ -97,12 +97,14 @@ def test_p_c(self) -> None:
             [0.0, 1, 0],
         ]
 
-        sampler = dc.DiscreteSampler(
-            p_y_labeled=p_y_labeled, p_y_unlabeled=p_y_unlabeled, p_c_cond_y=p_c_cond_y
+        sampler = dc.discrete_sampler_factory(
+            p_y_labeled=p_y_labeled,
+            p_y_unlabeled=p_y_unlabeled,
+            p_c_cond_y_labeled=p_c_cond_y,
         )
 
-        assert sampler.p_c_labeled == pytest.approx(np.array([0.1, 0.9, 0.0]))
-        assert sampler.p_c_unlabeled == pytest.approx(np.array([0.2, 0.8, 0.0]))
+        assert sampler.labeled.p_c == pytest.approx(np.array([0.1, 0.9, 0.0]))
+        assert sampler.unlabeled.p_c == pytest.approx(np.array([0.2, 0.8, 0.0]))
 
     @pytest.mark.parametrize("n_y", (2, 5))
     @pytest.mark.parametrize("n_c", (2, 3))
@@ -114,15 +116,17 @@ def test_create_right_fields(self, n_c: int, n_y: int) -> None:
         p_y_unlabeled = rng.dirichlet(alpha=np.ones(n_y))
         p_y_labeled = rng.dirichlet(alpha=np.ones(n_y))
 
-        sampler = dc.DiscreteSampler(
+        sampler = dc.discrete_sampler_factory(
             p_y_labeled=p_y_labeled,
             p_y_unlabeled=p_y_unlabeled,
-            p_c_cond_y=p_c_cond_y,
+            p_c_cond_y_labeled=p_c_cond_y,
         )
 
-        assert sampler.p_c_cond_y == pytest.approx(p_c_cond_y)
-        assert sampler.p_y_labeled == pytest.approx(p_y_labeled)
-        assert sampler.p_y_unlabeled == pytest.approx(p_y_unlabeled)
+        assert sampler.labeled.p_c_cond_y == pytest.approx(p_c_cond_y)
+        assert sampler.unlabeled.p_c_cond_y == pytest.approx(p_c_cond_y)
+
+        assert sampler.labeled.p_y == pytest.approx(p_y_labeled)
+        assert sampler.unlabeled.p_y == pytest.approx(p_y_unlabeled)
 
-        assert sampler.p_c_labeled.shape == (n_c,)
-        assert sampler.p_c_unlabeled.shape == (n_c,)
+        assert sampler.labeled.p_c.shape == (n_c,)
+        assert sampler.unlabeled.p_c.shape == (n_c,)
diff --git a/tests/datasets/test_split.py b/tests/datasets/test_split.py
new file mode 100644
index 0000000..18ac1fb
--- /dev/null
+++ b/tests/datasets/test_split.py
@@ -0,0 +1,37 @@
+"""Tests of `labelshift.datasets.split`."""
+import numpy as np
+import pytest
+from sklearn import datasets
+
+import labelshift.datasets.split as split
+
+
+@pytest.mark.parametrize("seed_prevalence", range(5))
+@pytest.mark.parametrize("n_classes", [2, 10])
+def test_split_dataset(
+    seed_prevalence: int, n_classes: int, seed_split: int = 0
+) -> None:
+    """
+    Args:
+        seed_prevalence: seed used to draw the prevalence vectors
+    """
+    rng = np.random.default_rng(seed_prevalence)
+    dataset = datasets.load_digits(n_class=n_classes)
+
+    spec = split.SplitSpecification(
+        train=rng.choice(range(4, 20), size=n_classes).tolist(),
+        valid=rng.choice(range(4, 20), size=n_classes).tolist(),
+        test=rng.choice(range(4, 20), size=n_classes).tolist(),
+    )
+
+    split_dataset = split.split_dataset(
+        dataset=dataset, specification=spec, random_seed=seed_split
+    )
+
+    assert len(split_dataset.train_x) == sum(spec.train)
+    assert len(split_dataset.valid_x) == sum(spec.valid)
+    assert len(split_dataset.test_x) == sum(spec.test)
+
+    assert len(split_dataset.train_x) == len(split_dataset.train_y)
+    assert len(split_dataset.valid_x) == len(split_dataset.valid_y)
+    assert len(split_dataset.test_x) == len(split_dataset.test_y)
diff --git a/tests/test_partition.py b/tests/test_partition.py
new file mode 100644
index 0000000..6d44f44
--- /dev/null
+++ b/tests/test_partition.py
@@ -0,0 +1,81 @@
+"""Tests of the `partition` submodule."""
+import numpy as np
+import pytest
+from scipy import stats
+
+import labelshift.partition as part
+
+
+class TestRealLinePartition:
+    """Tests of the partition class."""
+
+    @pytest.mark.parametrize("k", (2, 3, 10))
+    def test_length(self, k: int) -> None:
+        """Tests whether the length of the generated partition
+        is right.
+
+        Args:
+            k: the length of the partition
+        """
+        breakpoints = np.arange(k - 1)
+
+        partition = part.RealLinePartition(breakpoints)
+
+        assert len(partition) == k
+        for i in range(k):
+            a, b = partition.interval(i)
+            assert a < b
+
+        for i in range(k, k + 10):
+            with pytest.raises(IndexError):
+                partition.interval(k)
+
+    @pytest.mark.parametrize("a", [0.1, -0.5, 0.8])
+    def test_two_intervals(self, a: float) -> None:
+        """We use one breakpoint to split the real line
+        into two halves.
+
+        Args:
+            a: breakpoint
+        """
+        partition = part.RealLinePartition([a])
+
+        assert len(partition) == 2
+        assert partition.interval(0) == (-np.inf, a)
+        assert partition.interval(1) == (a, np.inf)
+
+        with pytest.raises(IndexError):
+            partition.interval(2)
+
+
+class TestGaussianProbabilityMasses:
+    """Tests of `gaussian_probability_masses` function."""
+
+    @pytest.mark.parametrize("mean", [0.1, 0.6, 10.0])
+    @pytest.mark.parametrize("sigma", [1.0, 2.0])
+    @pytest.mark.parametrize("a", [-0.3, 0.1, 1.0])
+    def test_one_gaussian_halves(self, mean: float, sigma: float, a: float) -> None:
+        """We have L = 1 and K = 2."""
+
+        result = part.gaussian_probability_masses(
+            means=[mean], sigmas=[sigma], partition=part.RealLinePartition([a])
+        )
+        assert result.shape == (1, 2)
+        assert result[0][0] == pytest.approx(stats.norm.cdf(a, loc=mean, scale=sigma))
+        assert result.sum() == pytest.approx(1.0)
+
+    @pytest.mark.parametrize("K", (2, 3, 10))
+    @pytest.mark.parametrize("L", (1, 2, 5))
+    def test_shape(self, K: int, L: int) -> None:
+        """Tests if the generated shape is right."""
+        partition = part.RealLinePartition(np.linspace(0, 1, K - 1))
+        assert len(partition) == K
+
+        means = np.linspace(-5, 5, L)
+        sigmas = np.linspace(0.1, 1.0, L)
+
+        output = part.gaussian_probability_masses(
+            means=means, sigmas=sigmas, partition=partition
+        )
+
+        assert output.shape == (L, K)
diff --git a/tests/test_timer.py b/tests/test_timer.py
index 2dbdbf5..b572143 100644
--- a/tests/test_timer.py
+++ b/tests/test_timer.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-from labelshift.timer import Timer
+from labelshift.experiments.timer import Timer
 
 
 @pytest.mark.parametrize("t1", [0.1, 0.2])