In [1]:

import numpy as np

# load data
from toy_data import N_CLASSES, N_WORKERS, votes

from peerannot.models import DawidSkene



In [2]:
ds = DawidSkene(votes, N_WORKERS, N_CLASSES)
ds.run(maxiter=70)
ds.get_answers()

[32m2025-04-10 23:49:24.912[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36m_init_crowd_matrix[0m:[36m107[0m - [34m[1mSize of dense crowd matrix: 864[0m


DawidSkene:   0%|          | 0/70 [00:00<?, ?it/s]

array([2, 2, 8, 1, 4, 0, 5, 3, 7, 1, 0, 3, 8, 3, 4, 7, 4, 7, 3, 7])

In [5]:
ds.T.shape

(20, 9)

In [4]:
a = ds.crowd_matrix

In [5]:
ds = DawidSkene.from_crowd_matrix(a)

[32m2025-04-10 17:51:53.550[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36m_init_crowd_matrix[0m:[36m129[0m - [34m[1mSize of dense crowd matrix: 145[0m


In [6]:
ds.run()

DawidSkene:   0%|          | 0/50 [00:00<?, ?it/s]

([np.float64(0.3633869682954748),
  np.float64(0.4865896758715302),
  np.float64(0.5645373082298869),
  np.float64(0.5894401764017763),
  np.float64(0.5972999482341729),
  np.float64(0.6011377186417711),
  np.float64(0.6030563113012529),
  np.float64(0.6041311628565327),
  np.float64(0.6050284872743155),
  np.float64(0.6061538384372023),
  np.float64(0.6076956199088558),
  np.float64(0.6099042035713347),
  np.float64(0.6131757023590818),
  np.float64(0.6180981622692652),
  np.float64(0.6252309197106991),
  np.float64(0.6337922044398349),
  np.float64(0.6398838187014028),
  np.float64(0.641430847906076),
  np.float64(0.641575748502922),
  np.float64(0.6416452221691288),
  np.float64(0.6416973262270965),
  np.float64(0.6417364310053917),
  np.float64(0.6417657748302665),
  np.float64(0.6417877913620794),
  np.float64(0.6418043086740544),
  np.float64(0.6418166994395368),
  np.float64(0.6418259940862142),
  np.float64(0.6418329659593454),
  np.float64(0.6418381953653485),
  np.float64(0.6

In [None]:
import warnings
from collections.abc import Generator
from os import PathLike
from sys import getsizeof
from typing import Annotated

import numpy as np
from annotated_types import Ge
from loguru import logger
from numpy.typing import NDArray
from pydantic import validate_call
from tqdm.auto import tqdm

from peerannot.models.aggregation.warnings import DidNotConverge
from peerannot.models.template import AnswersDict, CrowdModel

FilePathInput = PathLike | str | list[str] | Generator[str, None, None] | None


class DawidSkene(CrowdModel):
    """
    =============================
    Dawid and Skene model (1979)
    =============================

    Assumptions:
    - independent workers

    Using:
    - EM algorithm

    Estimating:
    - One confusion matrix for each workers
    """

    @validate_call
    def __init__(
        self,
        answers: AnswersDict,
        n_workers: Annotated[int, Ge(1)],
        n_classes: Annotated[int, Ge(1)],
        *,
        path_remove: FilePathInput = None,
    ) -> None:
        r"""Dawid and Skene strategy: estimate confusion matrix for each worker.

        Assuming that workers are independent, the model assumes that

        .. math::

            (y_i^{(j)}\ | y_i^\\star = k) \\sim \\mathcal{M}\\left(\\pi^{(j)}_{k,\\cdot}\\right)

        and maximizes the log likelihood of the model using an EM algorithm.

        .. math::

            \\underset{\\rho,\\\pi,T}{\mathrm{argmax}}\\prod_{i\\in [n_{\\texttt{task}}]}\prod_{k \\in [K]}\\bigg[\\rho_k\prod_{j\\in [n_{\\texttt{worker}}]}\prod_{\\ell\in [K]}\\big(\\pi^{(j)}_{k, \\ell}\\big)^{\mathbf{1}_{\\{y_i^{(j)}=\\ell\\}}}\\bigg]^{T_{i,k}},

        where :math:`\\rho` is the class marginals, :math:`\\pi` is the confusion matrix and :math:`T` is the indicator variables of belonging to each class.

        :param answers: Dictionary of workers answers with format

         .. code-block:: javascript

            {
                task0: {worker0: label, worker1: label},
                task1: {worker1: label}
            }

        :type answers: dict
        :param sparse: If the number of workers/tasks/label is large (:math:`>10^{6}` for at least one), # use sparse=True to run per task
        :param n_classes: Number of possible classes, defaults to 2
        :type n_classes: int, optional"""

        super().__init__(answers)
        self.n_workers: int = n_workers
        self.n_classes: int = n_classes
        self.n_task: int = len(self.answers)

        self._exclude_answers()

        self._init_crowd_matrix()
        logger.debug(f"Dense Crowd matrix{getsizeof(self.crowd_matrix)}")


    def _init_crowd_matrix(self) -> None:
        """Transform dictionnary of labels to a tensor of size
        (n_task, n_workers, n_classes)."""

        matrix = np.zeros((self.n_task, self.n_workers, self.n_classes))
        for task, ans in self.answers.items():
            for worker, label in ans.items():
                matrix[task, worker, label] += 1

        logger.debug(f"Dense crowd matrix  {getsizeof(matrix)}")
        self.crowd_matrix = matrix

    def _init_T(self) -> None:  # noqa: N802
        """NS initialization"""
        # T shape is n_task, n_classes
        T = self.crowd_matrix.sum(axis=1)  # noqa: N806
        logger.debug(f"Size of T before calc: {getsizeof(T)}")

        tdim = T.sum(1, keepdims=True)
        self.T = np.where(tdim > 0, T / tdim, 0)
        logger.debug(f"Size of T: {getsizeof(self.T)}")

    def _m_step(
        self,
    ) -> None:
        """Maximizing log likelihood (see eq. 2.3 and 2.4 Dawid and Skene 1979)

        Returns:
            :math:`\\rho`: :math:`(\\rho_j)_j` probabilities that instance has
                true response j if drawn at random (class marginals)
            pi: number of times worker k records l when j is correct
        """
        
        self.rho = self.T.sum(0) / self.n_task
        pi = np.einsum('tq,twc->wqc', self.T, self.crowd_matrix)
        denom = pi.sum(axis=2, keepdims=True)
        self.pi = pi / np.where(denom <= 0, -1e9, denom)


    def _e_step(self) -> None:
        """Estimate indicator variables (see eq. 2.5 Dawid and Skene 1979)"""

        exp_pi = np.power(self.pi[np.newaxis, :, :, :], self.crowd_matrix[:, :, np.newaxis, :])

        # numerator by taking the product over the worker axis
        num = np.prod(exp_pi, axis=3).prod(axis=1) * self.rho[np.newaxis, :]
        self.denom_e_step = num.sum(axis=1, keepdims=True)
        self.T = np.where(self.denom_e_step > 0, num / self.denom_e_step, num)




    def _log_likelihood(self) -> float:
        """Compute log likelihood of the model"""
        return np.log(np.sum(self.denom_e_step))

    @validate_call
    def run(
        self,
        epsilon: Annotated[float, Ge(0)] = 1e-6,
        maxiter: Annotated[int, Ge(0)] = 50,
    ) -> tuple[list[float], int]:
        """Run the EM optimization

        :param epsilon: stopping criterion (:math:`\\ell_1` norm between two iterates of log likelihood), defaults to 1e-6
        :type epsilon: float, optional
        :param maxiter: Maximum number of steps, defaults to 50
        :type maxiter: int, optional
        :param verbose: Verbosity level, defaults to False
        :return: Log likelihood values and number of steps taken
        :rtype: (list,int)
        """

        i = 0
        eps = np.inf

        self._init_T()
        ll = []
        pbar = tqdm(total=maxiter, desc="Dawid and Skene")
        while i < maxiter and eps > epsilon:
            self._m_step()
            self._e_step()
            likeli = self._log_likelihood()
            ll.append(likeli)
            if i > 0:
                eps = np.abs(ll[-1] - ll[-2])
            i += 1
            pbar.update(1)

        pbar.set_description("Finished")
        pbar.close()
        self.c = i
        if eps > epsilon:
            warnings.warn(
                DidNotConverge(self.__class__.__name__, eps, epsilon),
                stacklevel=2,
            )

        return ll, i

    def get_answers(self) -> NDArray:
        """Get most probable labels"""

        return np.vectorize(self.inv_labels.get)(
            np.argmax(self.get_probas(), axis=1),
        )

    def get_probas(self) -> NDArray:
        """Get soft labels distribution for each task"""
        return self.T


In [6]:
ds = DawidSkene(votes, N_WORKERS, N_CLASSES)
ds.run(maxiter=70)
ds.get_answers()

[32m2025-04-03 16:16:16.119[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m_init_crowd_matrix[0m:[36m106[0m - [34m[1mDense crowd matrix  5904[0m
[32m2025-04-03 16:16:16.120[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m__init__[0m:[36m84[0m - [34m[1mDense Crowd matrix5904[0m
[32m2025-04-03 16:16:16.121[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m_init_T[0m:[36m113[0m - [34m[1mSize of T before calc: 1568[0m
[32m2025-04-03 16:16:16.121[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36m_init_T[0m:[36m117[0m - [34m[1mSize of T: 1568[0m


Dawid and Skene:   0%|          | 0/70 [00:00<?, ?it/s]

array([2, 2, 8, 1, 4, 0, 5, 3, 7, 1, 0, 3, 8, 3, 4, 7, 4, 7, 3, 7])