# Dawid Skene with Shared Confusion Matrix (Pooled: Multinomial Model)
Uses a single confusion matrix instead of one per worker. No need to track individual worker behavior.

However, treats all workers equally reliable/unreliable.

Instead of indexing by worker (pi[k, j, l]), we can use the shared confusion matrix pi[j, l].
The probability of a task belonging to class j is computed using all worker responses at once. (So basically we have a single worker, right?)



## What might be worth investigating:

- `_m_step`:
    ```python
    np.where(denom <= 0, -1e9, denom)
    ```
    `-1e9` could be changed to epsilon (`np.finfo(float).eps`) or `np.maximum(denom, eps)` or something similar?

- `_e_step`:
    Maybe the e-step should rely more on log `log_T = np.log(self.rho) + (self.crowd_matrix * np.log(self.pi)).sum(axis=(1, 2))` or something similar to this.
    Can double loop (over `i` and `j`) be solved by np.einsum?


```python
# Summing over l in y -> shape (N, J)
sum_y_nj = np.sum(self.crowd_matrix, axis=1)  # Summing over L

# Summing over n with weighting by t -> shape (K, J)
numerator = np.dot(self.T.T, sum_y_nj)  # (K, N) @ (N, J) -> (K, J)

# Summing over i (K) in the denominator
denom = np.sum(numerator, axis=1, keepdims=True)  # (J, 1)

```
should be equivalent to:
```python
aggregated_votes = np.einsum(
    "tq, tij -> qj", self.T, self.crowd_matrix
)  # shape (n_classes, n_classes)
denom = aggregated_votes.sum(
    axis=1, keepdims=True
)
```

In [34]:
import warnings
from os import PathLike
from sys import getsizeof
from typing import Annotated, Generator

import numpy as np
import sparse as sp
from annotated_types import Ge
from loguru import logger
from memory_profiler import profile
from pydantic import validate_call
from tqdm.auto import tqdm

from peerannot.models.template import CrowdModel,AnswersDict

# load data
from toy_data import votes, N_CLASSES, N_WORKERS



In [35]:

FilePathInput = PathLike | str | list[str] | Generator[str, None, None] | None


class DawidSkeneShared(CrowdModel):
    """
    =============================
    Dawid and Skene model (1979)
    =============================

    Assumptions:
    - independent workers

    Using:
    - EM algorithm

    Estimating:
    - One confusion matrix for each workers
    """

    @validate_call
    def __init__(
        self,
        answers: AnswersDict,
        # TODO@jzftran probably annotation with 0 or 1 worker doesn't make sense: for 0 it should be an error
        n_workers: Annotated[int, Ge(1)],
        n_classes: Annotated[int, Ge(1)],
        *,
        sparse: bool = False,
        path_remove: FilePathInput = None,
    ) -> None:
        r"""Dawid and Skene strategy: estimate confusion matrix for each worker.

        Assuming that workers are independent, the model assumes that

        .. math::

            (y_i^{(j)}\ | y_i^\\star = k) \\sim \\mathcal{M}\\left(\\pi^{(j)}_{k,\\cdot}\\right)

        and maximizes the log likelihood of the model using an EM algorithm.

        .. math::

            \\underset{\\rho,\\\pi,T}{\mathrm{argmax}}\\prod_{i\\in [n_{\\texttt{task}}]}\prod_{k \\in [K]}\\bigg[\\rho_k\prod_{j\\in [n_{\\texttt{worker}}]}\prod_{\\ell\in [K]}\\big(\\pi^{(j)}_{k, \\ell}\\big)^{\mathbf{1}_{\\{y_i^{(j)}=\\ell\\}}}\\bigg]^{T_{i,k}},

        where :math:`\\rho` is the class marginals, :math:`\\pi` is the confusion matrix and :math:`T` is the indicator variables of belonging to each class.

        :param answers: Dictionary of workers answers with format

         .. code-block:: javascript

            {
                task0: {worker0: label, worker1: label},
                task1: {worker1: label}
            }

        :type answers: dict
        :param sparse: If the number of workers/tasks/label is large (:math:`>10^{6}` for at least one), use sparse=True to run per task
        :type sparse: bool, optional
        :param n_classes: Number of possible classes, defaults to 2
        :type n_classes: int, optional"""

        super().__init__(answers)
        self.n_workers: int = n_workers
        self.n_classes: int = n_classes
        self.sparse: bool = sparse
        self.path_remove: FilePathInput = path_remove
        self.n_task: int = len(self.answers)

        self.exclude_answers()
        if self.sparse:
            raise NotImplementedError('The sparse algorithm for DawidSkeneShared in the current context is not implemented.')
        else:
            self.init_crowd_matrix()

    def exclude_answers(self) -> None:
        answers_modif = {}
        if self.path_remove is not None:
            to_remove = np.loadtxt(self.path_remove, dtype=int)
            i = 0
            for key, val in self.answers.items():
                if int(key) not in to_remove[:, 1]:
                    answers_modif[i] = val
                    i += 1
            self.answers = answers_modif


    def init_crowd_matrix(self)->None:
        """Transform dictionnary of labels to a tensor of size (n_task, n_workers, n_classes)."""

        matrix = np.zeros((self.n_task, self.n_workers, self.n_classes))
        for task, ans in self.answers.items():
            for worker, label in ans.items():
                matrix[task, worker, label] += 1

        logger.info(f"Dense crowd matrix  {getsizeof(matrix)}")
        # shape
        self.crowd_matrix = matrix

    def init_T(self):
        """NS initialization"""
        # shape (n_task, n_workers)
        T = self.crowd_matrix.sum(axis=1)
        logger.info(f"Size of T before calc: {getsizeof(T)}")

        tdim = T.sum(1, keepdims=True)
        self.T = np.where(tdim > 0, T / tdim, 0)
        logger.info(f"Size of T: {getsizeof(self.T)}")

    def _m_step(
        self,
    ) -> None:
        """Maximizing log likelihood (see eq. 2.3 and 2.4 Dawid and Skene 1979)

        Returns:
            :math:`\\rho`: :math:`(\\rho_j)_j` probabilities that instance has true response j if drawn at random (class marginals)
            pi: number of times worker k records l when j is correct
        """
        self.rho = self.T.sum(0) / self.n_task

        aggregated_votes = np.einsum(
            "tq, tij -> qj", self.T, self.crowd_matrix
        )  # shape (n_classes, n_classes)
        denom = aggregated_votes.sum(
            axis=1, keepdims=True
        )
        # self.shared_pi = aggregated_votes/ np.where(denom <=0, -1e9, denom).reshape(-1,1)
        self.shared_pi = np.where(denom > 0, aggregated_votes / denom, 0)
        
    def _e_step(self) -> None:
        """Estimate indicator variables using a shared confusion matrix"""

        T = np.zeros((self.n_task, self.n_classes))

        for i in range(self.n_task):
            for j in range(self.n_classes):
                num = (
                    np.prod(
                        np.power(
                            self.shared_pi[j, :], self.crowd_matrix[i, :, :]
                        )
                    )
                    * self.rho[j]
                )
                T[i, j] = num


        self.denom_e_step = T.sum(axis=1, keepdims=True)
        self.T = np.where(self.denom_e_step > 0, T / self.denom_e_step, T)

    def log_likelihood(self):
        """Compute log likelihood of the model"""
        return np.log(np.sum(self.denom_e_step))

    def run_dense(
        self,
        epsilon: Annotated[float, Ge(0)] = 1e-6,
        maxiter: Annotated[int, Ge(0)] = 50,
        *,
        verbose: bool = False,
    ) -> tuple[list[np.float64], int]:
        i = 0
        eps = np.inf

        self.init_T()
        ll = []
        pbar = tqdm(total=maxiter, desc="Dawid and Skene")
        while i < maxiter and eps > epsilon:
            self._m_step()
            self._e_step()
            likeli = self.log_likelihood()
            ll.append(likeli)
            if len(ll) >= 2:
                eps = np.abs(ll[-1] - ll[-2])
            i += 1
            pbar.update(1)

        pbar.set_description("Finished")
        pbar.close()
        self.c = i
        if eps > epsilon and verbose:
            print(f"DS did not converge: err={eps}")
        return ll, i


    @validate_call
    def run(
        self,
        epsilon: Annotated[float, Ge(0)] = 1e-6,
        maxiter: Annotated[int, Ge(0)] = 50,
        *,
        verbose: bool = False,
    ) -> tuple[list[np.float64], int]:
        """Run the EM optimization

        :param epsilon: stopping criterion (:math:`\\ell_1` norm between two iterates of log likelihood), defaults to 1e-6
        :type epsilon: float, optional
        :param maxiter: Maximum number of steps, defaults to 50
        :type maxiter: int, optional
        :param verbose: Verbosity level, defaults to False
        :type verbose: bool, optional
        :return: Log likelihood values and number of steps taken
        :rtype: (list,int)
        """

        if self.sparse:
            raise NotImplementedError('The sparse algorithm for DawidSkeneShared in the current context is not implemented.')
        return self.run_dense(
            epsilon=epsilon,
            maxiter=maxiter,
            verbose=verbose,
        )

    def get_answers(self):
        """Get most probable labels"""
        if self.sparse:
            return np.vectorize(self.inv_labels.get)(
                sp.argmax(self.sparse_T, axis=1).todense()
            )
        return np.vectorize(self.inv_labels.get)(
            np.argmax(self.get_probas(), axis=1)
        )

    def get_probas(self):
        """Get soft labels distribution for each task"""
        if self.sparse:
            warnings.warn("Sparse implementation only returns hard labels")
            return self.get_answers()
        return self.T


In [36]:
dss = DawidSkeneShared(answers=votes, n_workers=N_WORKERS, n_classes=N_CLASSES, sparse=False)

[32m2025-03-19 14:08:45.340[0m | [1mINFO    [0m | [36m__main__[0m:[36minit_crowd_matrix[0m:[36m95[0m - [1mDense crowd matrix  5904[0m


In [37]:
dss.run(maxiter=100)

[32m2025-03-19 14:08:46.598[0m | [1mINFO    [0m | [36m__main__[0m:[36minit_T[0m:[36m103[0m - [1mSize of T before calc: 1568[0m
[32m2025-03-19 14:08:46.602[0m | [1mINFO    [0m | [36m__main__[0m:[36minit_T[0m:[36m107[0m - [1mSize of T: 1568[0m
Finished:  61%|██████    | 61/100 [00:00<00:00, 322.73it/s]


([np.float64(-0.027586772757028165),
  np.float64(-0.04577618296861183),
  np.float64(-0.027089862546890126),
  np.float64(-0.007047782094224704),
  np.float64(0.002385997332072406),
  np.float64(0.005258957474095518),
  np.float64(0.00689830431052444),
  np.float64(0.008151101246933223),
  np.float64(0.00919250268013707),
  np.float64(0.010164030187279821),
  np.float64(0.011195333568343138),
  np.float64(0.012400123406574684),
  np.float64(0.013807222652733637),
  np.float64(0.015235613815202017),
  np.float64(0.016361922718117344),
  np.float64(0.017075935584952434),
  np.float64(0.017530719713185882),
  np.float64(0.017887639872181036),
  np.float64(0.018231767162021358),
  np.float64(0.018602405750528638),
  np.float64(0.019020924380967762),
  np.float64(0.019503528820936436),
  np.float64(0.020066211474956956),
  np.float64(0.020726305053762717),
  np.float64(0.021502294725816083),
  np.float64(0.02241229060096038),
  np.float64(0.02347118201173101),
  np.float64(0.02468661935593

In [30]:
dss.get_answers()

array([2, 2, 8, 1, 4, 0, 5, 3, 7, 8, 0, 5, 8, 5, 4, 7, 4, 7, 5, 7])