In [None]:

import numpy as np

# load data
from toy_data import N_CLASSES, N_WORKERS, votes

from peerannot.models import DawidSkene


  from .autonotebook import tqdm as notebook_tqdm


In [198]:
class MultinomialBinary(DawidSkene):
    def __init__(self, answers, n_workers, n_classes, *, path_remove=None):
        super().__init__(
            answers, n_workers, n_classes, path_remove=path_remove,
        )

    def _m_step(self) -> None:
        """Maximizing log likelihood with only diagonal elements of pi."""
        rho = self.T.sum(0) / self.n_task

        pi = np.zeros(self.n_workers)
        for j in range(self.n_workers):
            alpha = (self.T * self.crowd_matrix[:, j, :]).sum() / self.n_task
            pi[j] = alpha

        off_diag_alpha = (np.ones_like(pi) - pi) / (self.n_classes - 1)
        self.rho, self.pi, self.off_diag_alpha = rho, pi, off_diag_alpha

    def _e_step(self) -> None:
        """Estimate indicator variables (see eq. 2.5 Dawid and Skene 1979)

        Returns:
            T: New estimate for indicator variables (n_task, n_worker)
            denom: value used to compute likelihood easily
        """
        T = np.zeros((self.n_task, self.n_classes))
        for i in range(self.n_task):
            for j in range(self.n_classes):

                diag_contrib = np.prod(np.power(
                    self.pi, self.crowd_matrix[i, :, j],
                ) ) # shape (n_workers,)

                mask = np.ones(self.n_classes, dtype=bool)
                mask[j] = False
                off_diag_labels = self.crowd_matrix[i, :, mask]

                off_diag_contrib = np.prod(
                    np.power(self.off_diag_alpha,
                    off_diag_labels),
                )

                T[i,j] = np.prod(diag_contrib * off_diag_contrib)*self.rho[j]

        self.denom_e_step = T.sum(1, keepdims=True)
        self.T = np.where(self.denom_e_step > 0, T / self.denom_e_step, T)



In [199]:
mb = MultinomialBinary(votes, N_WORKERS, N_CLASSES)
mb.run(200)
mb.get_answers()


[32m2025-03-27 16:28:14.069[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36minit_crowd_matrix[0m:[36m105[0m - [34m[1mDense crowd matrix  5904[0m
[32m2025-03-27 16:28:14.070[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36m__init__[0m:[36m83[0m - [34m[1mDense Crowd matrix5904[0m
[32m2025-03-27 16:28:14.070[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36minit_T[0m:[36m112[0m - [34m[1mSize of T before calc: 1568[0m
[32m2025-03-27 16:28:14.071[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36minit_T[0m:[36m116[0m - [34m[1mSize of T: 1568[0m
Finished:   4%|▍         | 2/50 [00:00<00:00, 252.84it/s]


array([2, 2, 8, 1, 4, 0, 3, 4, 7, 1, 0, 3, 1, 3, 4, 7, 4, 7, 3, 7])

In [200]:
# dirty implementation just for comparison
class MultinomialBinary(DawidSkene):
    def __init__(self, answers, n_workers, n_classes, *, path_remove=None):
        super().__init__(
            answers, n_workers, n_classes, path_remove=path_remove,
            )
    def _m_step(self) -> None:
        """Maximizing log likelihood with full confusion matrices (diagonal + off-diagonal)."""

        # Update rho (class prior probabilities)
        self.rho = self.T.sum(0) / self.n_task

        # Initialize pi as a (n_workers, n_classes, n_classes) tensor
        pi = np.zeros((self.n_workers, self.n_classes, self.n_classes))

        for j in range(self.n_workers):
            # Compute alpha for worker j (per-class reliability)
            alpha = ((self.T * self.crowd_matrix[:, j, :]).sum() )/ self.n_task
            # Initialize the full confusion matrix for worker j
            pij = np.full((self.n_classes, self.n_classes), (1 - alpha) / (self.n_classes - 1))

            # Set the diagonal elements (worker accuracy per class)
            np.fill_diagonal(pij, alpha)

            pi[j] = pij

        # Store off-diagonal alpha for reference
        self.off_diag_alpha = (1 - pi) / (self.n_classes - 1)
        self.pi = pi

In [5]:
mb = MultinomialBinary(votes, N_WORKERS, N_CLASSES)
mb.run(100)
mb.get_answers()

[32m2025-03-27 15:05:00.754[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36minit_crowd_matrix[0m:[36m105[0m - [34m[1mDense crowd matrix  5904[0m
[32m2025-03-27 15:05:00.755[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36m__init__[0m:[36m83[0m - [34m[1mDense Crowd matrix5904[0m
[32m2025-03-27 15:05:00.756[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36minit_T[0m:[36m112[0m - [34m[1mSize of T before calc: 1568[0m
[32m2025-03-27 15:05:00.756[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36minit_T[0m:[36m116[0m - [34m[1mSize of T: 1568[0m
Finished:   4%|▍         | 2/50 [00:00<00:00, 219.74it/s]


array([2, 2, 8, 1, 4, 0, 3, 4, 7, 1, 0, 3, 1, 3, 4, 7, 4, 7, 3, 7])