# Supplimental 1: Labeling Function vs. Majority Vote
This is the illustrative example used in Figure 2 of the Trove paper.


In [6]:
import numpy as np
from scipy.stats import mode
from snorkel.labeling.model.label_model import LabelModel

X = ['risk','factor','for','diabetes','type','2']
Y = [0,0,0,1,1,1]

# label matrix \Lambda (words x labeling functions)
L = [
    [-1, 0, 0, 0], # risk
    [ 0, 0, 0, 0], # factor
    [-1,-1,-1, 0], # for
    [ 1, 1, 1,-1], # diabetes
    [ 0, 1, 1, 0], # type
    [ 0, 1, 1, 0], # 2
]
L = np.array(L)

# labeling function names (terminolgies)
lf_names = ['MTH', 'CHV', 'LNC', 'SNOMEDCT']

# majority vote
Y_mv = mode(L, axis=1)
Y_mv[Y_mv == -1] = 0

# data programming
model = LabelModel(cardinality=2, verbose=True)
model.fit(L, seed=100, n_epochs=100)
Y_hat = model.predict_proba(L)

for y in Y:
    print(y[1])
    

0.00010941123729693339
0.0001338811595505871
0.38824098106324595
0.9998885791761051
0.9956095834084472
0.9956095834084472


## Learned Labeling Function Accuracies

In [21]:
w = model.get_weights()
print("Loaded LF weights (accuracies)")
print(w)

print("Conditional probs table")
cprobs = model.get_conditional_probs()
print(cprobs)

Loaded LF weights (accuracies)
[0.61711445 1.         1.         0.60000001]
Conditional probs table
[[[0.47450089 0.06187344]
  [0.51549911 0.63080639]
  [0.01       0.30732015]]

 [[0.18073374 0.0039503 ]
  [0.76156282 0.01      ]
  [0.05770345 0.98604971]]

 [[0.18073374 0.0039503 ]
  [0.76156282 0.01      ]
  [0.05770345 0.98604971]]

 [[0.         0.36171573]
  [0.99000001 0.62828428]
  [0.01       0.01      ]]]
