In [2]:

import numpy as np

# load data
from toy_data import N_CLASSES, N_WORKERS, votes

from peerannot.models import DawidSkene



In [None]:

class PoooledMultinomialBinary(DawidSkene):


    def _init_T(self)->None:
        # T shape n_tasks, n classes
        self.n_il = np.sum(self.crowd_matrix, axis=1)  # n_tasks, n_classes: sum of votes given by each worker

        n_i = np.sum(self.n_il, axis=0) # how many votes for each class

        self.total_votes = np.sum(self.n_il)

        self.T = self.n_il/n_i

    def _m_step(self) -> None:
        """Maximizing log likelihood with a single confusion matrix shared across all workers."""

        self.rho = self.T.sum(0) / self.n_task

        # trace(T.T @ crowd_matrix)
        sum_diag_votes = np.einsum("tq, tiq ->", self.T, self.crowd_matrix)
        self.alpha = sum_diag_votes/self.total_votes

    def _e_step(self):
        T = np.zeros((self.n_task, self.n_classes))

        for i in range(self.n_task):
            n_i = self.n_il[i].sum() # total numer of annotators of task i
            for l in range(self.n_classes):
                n_il = self.n_il[i,l] # numer of annotators of task i voting for label l
                diag_contrib = np.power(self.alpha, n_il)
                off_diag_contrib = np.power((1 - self.alpha)/\
                                            (self.n_classes -1), n_i - n_il )

                T[i, l] = diag_contrib * off_diag_contrib * self.rho[l]

        self.denom_e_step = T.sum(axis=1, keepdims=True)
        self.T = np.where(self.denom_e_step > 0, T / self.denom_e_step, T)


pmb = PoooledMultinomialBinary(votes, N_WORKERS, N_CLASSES)

pmb.run(maxiter=400)
pmb.get_answers()



In [None]:
class VectorizedPoooledMultinomialBinary(PoooledMultinomialBinary):
    def _e_step(self):
        n_i = self.n_il.sum(axis=1, keepdims=True)

        diag_contrib = self.alpha ** self.n_il

        off_diag_factor = (1 - self.alpha) / (self.n_classes - 1)
        off_diag_contrib = off_diag_factor ** (n_i - self.n_il)

        T = diag_contrib * off_diag_contrib * self.rho[np.newaxis, :]

        self.denom_e_step = T.sum(axis=1, keepdims=True)
        self.T = np.where(self.denom_e_step > 0, T / self.denom_e_step, T)


pmb = VectorizedPoooledMultinomialBinary(votes, N_WORKERS, N_CLASSES)

pmb.run(maxiter=400)
pmb.get_answers()

[32m2025-04-03 11:57:20.660[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36m_init_crowd_matrix[0m:[36m106[0m - [34m[1mDense crowd matrix  5904[0m
[32m2025-04-03 11:57:20.661[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36m__init__[0m:[36m84[0m - [34m[1mDense Crowd matrix5904[0m


Dawid and Skene:   0%|          | 0/400 [00:00<?, ?it/s]

array([2, 2, 8, 1, 4, 0, 3, 3, 7, 1, 0, 3, 7, 3, 4, 7, 4, 7, 3, 7])

In [None]:
class PoooledMultinomialBinary(DawidSkene):
    def _m_step(self) -> None:
        """Maximizing log likelihood with a single confusion matrix shared across all workers."""

        self.rho = self.T.sum(0) / self.n_task

        diag_votes = np.einsum("tq, tiq ->", self.T, self.crowd_matrix)

        denom = np.einsum("tq, tij ->", self.T, self.crowd_matrix) # constant should be in init
        self.alpha = (diag_votes/np.where(denom > 0, denom, 1e-9))
        self.off_diag_alpha = (1 - self.alpha) /  (self.n_classes - 1)

    def _e_step(self):
        """Expectation step: estimate probability of true labels given current parameters."""

        T = np.zeros((self.n_task, self.n_classes))

        for i in range(self.n_task):
            worker_labels = np.sum(self.crowd_matrix[i], axis = 0 )
            for l in range(self.n_classes):
                n_il = worker_labels[l]  # Number of annotators labeling task i as class l
                n_i = worker_labels.sum()  # Total number of annotations for task i

                diag_contrib = self.alpha ** n_il
                off_diag_contrib = self.off_diag_alpha ** (n_i - n_il)

                T[i, l] = diag_contrib * off_diag_contrib * self.rho[l]
        self.denom_e_step = T.sum(1, keepdims=True)

        self.T = np.where(self.denom_e_step > 0, T / self.denom_e_step, T)

pmb = PoooledMultinomialBinary(votes, N_WORKERS, N_CLASSES)

pmb._init_T()
pmb.T
# pmb.run(maxiter=200)
# pmb.get_answers()
