In [2]:
import numpy as np

In [3]:
fashion = np.genfromtxt('data/aij_fashion.csv', delimiter=",",dtype=int)

In [173]:
def format_data(aij_fashion):
    data = {}
    for (worker,influencer,label) in aij_fashion:
        if influencer not in data:
            data[influencer] = {}
        result_label = [0]
        if label == 1:
             result_label = [1]
        data[influencer][worker] = result_label
    return data

In [174]:
fashion = format_data(fashion)

In [175]:
def fashion_to_counts(fashions):
    """
    Convert a matrix of fashions to count data
    Args:
        fashions: dictionary of label {influencers:{workers:[labels]}}
    Returns:
        influencers: list of influencers
        workers: list of workers
        choices: list of choices (1 or 0 in our case)
        counts: 3d array of counts: [influencers x workers]
    """
    influencers = list(fashions)
    nInfluencers = len(influencers)

    workers = set()
    choices = set()
    for i in influencers:
        i_workers = list(fashions[i])
        for k in i_workers:
            if k not in workers:
                workers.add(k)
            ik_label = fashions[i][k]
            choices.update(ik_label)

    choices = list(choices)
    choices.sort()
    nChoices = len(choices)
    
    workers = list(workers)
    nWorkers = len(workers)

    # create a 3d array to hold counts
    counts = np.zeros([nInfluencers, nWorkers, nChoices])

    # convert responses to counts
    for influencer in influencers:
        i = influencers.index(influencer)
        for worker in fashions[influencer].keys():
            k = workers.index(worker)
            for label in fashions[influencer][worker]:
                j = choices.index(label)
                counts[i, k, j] += 1

    return (influencers, workers, choices, counts)

In [176]:
(influencers, workers, choices, counts) = fashion_to_counts(fashion)

In [177]:
def majority_voting(counts):
    [nInfluencers, nWorkers, nChoices] = np.shape(counts)
    responses_sums = np.sum(counts, 1)
    result = np.zeros([nInfluencers, nChoices])
    for p in range(nInfluencers):
        result[p, :] = responses_sums[p, :] / np.sum(responses_sums[p, :], dtype=float)
    return result

In [178]:
influencers_label = majority_voting(counts)

In [1]:
influencers_label

NameError: name 'influencers_label' is not defined

In [180]:
def m_step(counts, influencers_label):
    [nInfluencers, nWorkers, nChoices] = np.shape(counts)

    # compute class marginals
    class_marginals = np.sum(influencers_label, 0) / float(nInfluencers)

    # compute error rates
    error_rates = np.zeros([nWorkers, nChoices, nChoices])
    for k in range(nWorkers):
        for j in range(nChoices):
            for l in range(nChoices):
                error_rates[k, j, l] = np.dot(
                    influencers_label[:, j], counts[:, k, l])
            sum_over_responses = np.sum(error_rates[k, j, :])
            if sum_over_responses > 0:
                error_rates[k, j, :] = error_rates[
                    k, j, :] / float(sum_over_responses)

    return (class_marginals, error_rates)

In [181]:
(class_marginals, error_rates) = m_step(counts,influencers_label)

In [185]:
def e_step(counts, class_marginals, error_rates):
    [nInfluencers, nWorkers, nChoices] = np.shape(counts)

    influencers_label = np.zeros([nInfluencers, nChoices])

    for i in range(nInfluencers):
        for j in range(nChoices):
            estimate = class_marginals[j]
            estimate *= np.prod(np.power(error_rates[:, j, :], counts[i, :, :]))
            influencers_label[i, j] = estimate
        question_sum = np.sum(influencers_label[i, :])
        if question_sum > 0:
            influencers_label[i, :] = influencers_label[i, :] / float(question_sum)
    return influencers_label

In [183]:
influencers_label = e_step(counts, class_marginals, error_rates)

In [184]:
influencers_label

array([[9.96818451e-01, 3.18154879e-03],
       [3.67969483e-07, 9.99999632e-01],
       [9.99680657e-01, 3.19343028e-04],
       ...,
       [9.99272876e-01, 7.27124197e-04],
       [9.99556229e-01, 4.43770599e-04],
       [9.99904300e-01, 9.57003684e-05]])

In [4]:
import main

1 	 -8076.233137050091
2 	 -7836.089021402673 	0.045172	14.878785
3 	 -7820.205137613682 	0.009740	2.941154
4 	 -7814.963102801673 	0.006388	1.642807
5 	 -7813.45373362262 	0.003417	0.779985
6 	 -7811.568698459982 	0.002312	0.572327
7 	 -7808.159271230666 	0.002686	0.691574
8 	 -7806.035938694625 	0.002892	0.732018
9 	 -7805.482047327452 	0.001549	0.336119
10 	 -7805.319430473255 	0.000805	0.165676
11 	 -7805.219237337406 	0.000564	0.123869
12 	 -7805.1467182919305 	0.000429	0.095837
13 	 -7805.090285898312 	0.000343	0.076959
14 	 -7805.036993576848 	0.000301	0.067951
15 	 -7804.977476897134 	0.000289	0.067074
16 	 -7804.911759426238 	0.000279	0.065799
17 	 -7804.851529640802 	0.000243	0.057210
18 	 -7804.809665505912 	0.000177	0.041241
19 	 -7804.78724372634 	0.000107	0.024653
20 	 -7804.777179377221 	0.000058	0.013212
21 	 -7804.773057458034 	0.000030	0.007018
22 	 -7804.7714326358855 	0.000017	0.003977
23 	 -7804.770799308372 	0.000010	0.002473
24 	 -7804.7705518151215 	0.000007	0.0