In [None]:

import numpy as np

# load data
from toy_data import N_CLASSES, N_WORKERS, votes

from peerannot.models import DawidSkene



In [None]:
ds = DawidSkene(votes, N_WORKERS, N_CLASSES)
ds.run(maxiter=70)
ds.get_answers()

In [154]:

class PoooledMultinomialBinary(DawidSkene):


    def _init_T(self)->None:
        # T shape n_tasks, n classes
        # n_i shape
        self.sum_n_il = np.sum(self.crowd_matrix, axis=1)  # n_tasks, n_classes: sum of votes given by each worker
        self.sum_n_i = np.sum(self.sum_n_il, axis=0) # how many votes for each class
        self.T = self.sum_n_il/self.sum_n_i

    def _m_step(self) -> None:
        """Maximizing log likelihood with a single confusion matrix shared across all workers."""

        self.rho = self.T.sum(0) / self.n_task

        sum_diag_votes = np.einsum("tq, tiq ->", self.T, self.crowd_matrix) # equivalent to:
        # numerator = np.trace(np.dot(self.T.T, self.sum_n_il))

        self.alpha = sum_diag_votes/np.sum(self.sum_n_i) # denom could be moved to _init_T

    def _e_step(self):
        T = np.zeros((self.n_task, self.n_classes))

        for i in range(self.n_task):
            worker_labels = self.sum_n_il[i]
            for l in range(self.n_classes):
                n_i = worker_labels.sum() # total numer of annotators of task i
                n_il = worker_labels[l] # numer of annotators of task i voting for label l
                diag_contrib = np.power(self.alpha, n_il)
                off_diag_contrib = np.power((1 - self.alpha), n_i - n_il ) # (self.n_classes - 1 )

                T[i, l] = diag_contrib * off_diag_contrib * self.rho[l]

        self.denom_e_step = T.sum(axis=1, keepdims=True)
        self.T = np.where(self.denom_e_step > 0, T / self.denom_e_step, T)


pmb = PoooledMultinomialBinary(votes, N_WORKERS, N_CLASSES)

pmb.run(maxiter=400)
pmb.get_answers()



[32m2025-04-02 11:25:31.354[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36m_init_crowd_matrix[0m:[36m106[0m - [34m[1mDense crowd matrix  5904[0m
[32m2025-04-02 11:25:31.355[0m | [34m[1mDEBUG   [0m | [36mpeerannot.models.aggregation.DS[0m:[36m__init__[0m:[36m84[0m - [34m[1mDense Crowd matrix5904[0m
Finished:  67%|██████▋   | 268/400 [00:00<00:00, 1206.22it/s]       


array([8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [None]:
class PoooledMultinomialBinary(DawidSkene):
    def _m_step(self) -> None:
        """Maximizing log likelihood with a single confusion matrix shared across all workers."""

        self.rho = self.T.sum(0) / self.n_task

        diag_votes = np.einsum("tq, tiq ->", self.T, self.crowd_matrix)

        denom = np.einsum("tq, tij ->", self.T, self.crowd_matrix) # constant should be in init
        self.alpha = (diag_votes/np.where(denom > 0, denom, 1e-9))
        self.off_diag_alpha = (1 - self.alpha) /  (self.n_classes - 1)

    def _e_step(self):
        """Expectation step: estimate probability of true labels given current parameters."""

        T = np.zeros((self.n_task, self.n_classes))

        for i in range(self.n_task):
            worker_labels = np.sum(self.crowd_matrix[i], axis = 0 )
            for l in range(self.n_classes):
                n_il = worker_labels[l]  # Number of annotators labeling task i as class l
                n_i = worker_labels.sum()  # Total number of annotations for task i

                diag_contrib = self.alpha ** n_il
                off_diag_contrib = self.off_diag_alpha ** (n_i - n_il)

                T[i, l] = diag_contrib * off_diag_contrib * self.rho[l]
        self.denom_e_step = T.sum(1, keepdims=True)
        
        self.T = np.where(self.denom_e_step > 0, T / self.denom_e_step, T)

pmb = PoooledMultinomialBinary(votes, N_WORKERS, N_CLASSES)

pmb._init_T()
pmb.run(maxiter=200)
pmb.get_answers()
