In [10]:
import time
from examples.factors import accuracies, propensities
from examples.prediction_and_evaluation import pred_and_eval_gen_model, eval_majority_vote
from examples.utils import change_labels
from factor_graph import FactorGraph
import numpy as np

# Comparing the implemented factor graph against Snorkel (a latent MRF model)

## The data used consists of:
 - labels Y for the created task of discriminating professors from teachers in the Bias in Bios dataset
 - 99 selected labeling functions, usable for a standard data programming pipeline

In [11]:
def train_supervised(label_matrix, Y_true, lf_prop=True, n_epoch=25, lr=0.1, gibbs_samples=10, batch_size=250):
    start_t = time.time()
    n_LFs = label_matrix.shape[1]
    """ Get polarities of each LF, ASSUMPTION: Each LF only votes for ONE label, and abstains otherwise"""
    polarities = [(0, pol) for pol in np.sign(np.sum(label_matrix, axis=0))]
    """ In the supervised case, the data fed into the PGM Learning will just be all concatenated """
    observations = np.concatenate((Y_true.reshape((-1, 1)), label_matrix), axis=1)
    """ Create a MRF with fully observed variables"""
    potentials = [(accuracies, n_LFs)]  # (function, #outputs), e.g. we have n_LFs accuracies to model
    if lf_prop:
        potentials += [(propensities, n_LFs)]
    lm = FactorGraph(n_vars=n_LFs+1, polarities=[(1, -1)] + list(polarities), potentials=potentials)
    lm.fit(observations, lr=lr, n_epochs=n_epoch, batch_size=batch_size, gibbs_samples=gibbs_samples, verbose=False)
    """ Evaluate the learned generative model """
    stat, probs = pred_and_eval_gen_model(lm, observations, Y_true, version=99, abst=0, verbose=True, print_MV=False,
                                          eps=0.0, return_preds=True, coverage_stats=False, neg_label=-1, pos_label=1)
    duration = time.time() - start_t
    print(f"Time needed by generative model: {duration}")
    # Will train the downstream classifier:
    # stat_cl = train_and_eval_classifier(Xtrain, Xtest, probs, Ytest, label_matrix, library='torch',
    #                                    optim='Adam', devicestring=device, epochs=250, print_step=505)
    return lm, stat, probs

In [12]:
def train_snorkel(label_matrix, Y_true, n_epoch=1000, lr=0.1):
    from snorkel.labeling.model import LabelModel
    # LABEL MODEL
    start_t = time.time()
    """ Snorkel requires abstention label to be -1..."""
    label_matrix, Y_true = change_labels(label_matrix, Y_true, new_label=-1, old_label=0)
    """ Train latent label model from Snorkel """
    lm = LabelModel(cardinality=2)
    lm.fit(label_matrix, n_epochs=n_epoch, seed=77, lr=lr)
    """ Evaluate the learned generative model """
    stat, probs = pred_and_eval_gen_model(lm, label_matrix, Y_true, abst=-1, verbose=True,
                                          print_MV=False, eps=0.0, neg_label=0, pos_label=+1,
                                          return_preds=True, version=10, coverage_stats=False)

    duration = time.time() - start_t
    print(f"Time needed by Snorkel's generative model: {duration}")
    # Will train the downstream classifier:
    # stat_cl = train_and_eval_classifier(Xtrain, Xtest, probs, Ytest, label_matrix, library='torch',
    #                                    optim='Adam', devicestring=device, epochs=250, print_step=505)
    return lm, stat, probs

In [13]:
seed = 77
n_runs = 5
data = np.load("../data/professor_vs_teacher_99LFs.npz")
L_arr, Ytrain = data["L"], data["Y"]

In [14]:
print("---------------------------------- MAJORITY VOTE STATS --------------------------------------------------")
print("MV on all samples with ", L_arr.shape[1], "LFs")
eval_majority_vote(L_arr, Ytrain, abst=0, MV_policy='random')
print("---------------------------------------------------------------------------------------------------------")
# PRINT LF descriptions: [print(d) for d in descr]
lfprop = False
n_samples, nlf = L_arr.shape

---------------------------------- MAJORITY VOTE STATS --------------------------------------------------
MV on all samples with  99 LFs
Majority vote stats:
Accuracy:0.754 | Precision:0.771 | Recall:0.717 | F1 score:0.743 | AUC:0.796 | Log loss:5.506 | Brier:0.917 | Coverage:1.000 | MSE, MAE:0.917, 0.751
---------------------------------------------------------------------------------------------------------


# Supervised (ours)

In [15]:
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.1, n_epoch=10)
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.1, n_epoch=25)
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.1, n_epoch=25)

Accuracy:0.790 | Precision:0.758 | Recall:0.848 | F1 score:0.800 | AUC:0.888 | Log loss:1.065 | Brier:0.944 | Coverage:1.000 | MSE, MAE:0.944, 0.735
Time needed by generative model: 4.383999824523926
Accuracy:0.781 | Precision:0.764 | Recall:0.806 | F1 score:0.784 | AUC:0.875 | Log loss:1.803 | Brier:0.931 | Coverage:1.000 | MSE, MAE:0.931, 0.730
Time needed by generative model: 11.266000509262085
Accuracy:0.791 | Precision:0.757 | Recall:0.850 | F1 score:0.801 | AUC:0.883 | Log loss:1.795 | Brier:0.939 | Coverage:1.000 | MSE, MAE:0.939, 0.729
Time needed by generative model: 11.008025407791138


In [16]:
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.1, n_epoch=10, gibbs_samples=5)
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.1, n_epoch=25, gibbs_samples=5)
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.01, n_epoch=50, gibbs_samples=5)

Accuracy:0.778 | Precision:0.738 | Recall:0.855 | F1 score:0.792 | AUC:0.887 | Log loss:1.069 | Brier:0.947 | Coverage:1.000 | MSE, MAE:0.947, 0.735
Time needed by generative model: 2.7640011310577393
Accuracy:0.786 | Precision:0.752 | Recall:0.847 | F1 score:0.797 | AUC:0.883 | Log loss:1.803 | Brier:0.936 | Coverage:1.000 | MSE, MAE:0.936, 0.729
Time needed by generative model: 7.083952188491821
Accuracy:0.779 | Precision:0.739 | Recall:0.856 | F1 score:0.793 | AUC:0.887 | Log loss:0.664 | Brier:0.958 | Coverage:1.000 | MSE, MAE:0.958, 0.748
Time needed by generative model: 14.245006799697876


In [17]:
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.01, n_epoch=50, gibbs_samples=1)
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.003, n_epoch=100, gibbs_samples=5)

Accuracy:0.786 | Precision:0.755 | Recall:0.843 | F1 score:0.796 | AUC:0.889 | Log loss:0.658 | Brier:0.941 | Coverage:1.000 | MSE, MAE:0.941, 0.742
Time needed by generative model: 7.099032163619995
Accuracy:0.776 | Precision:0.738 | Recall:0.850 | F1 score:0.790 | AUC:0.887 | Log loss:0.525 | Brier:0.985 | Coverage:1.000 | MSE, MAE:0.985, 0.766
Time needed by generative model: 27.555073499679565


# Snorkel
### Note that this is the newer, faster matrix completion snorkel. (the old snorkel using SGD+MLE is slower than ours above)

In [18]:
_, _, _ = train_snorkel(L_arr, Ytrain, lr=0.01, n_epoch=1000)
_, _, _ = train_snorkel(L_arr, Ytrain, lr=0.01, n_epoch=1000)
_, _, _ = train_snorkel(L_arr, Ytrain, lr=0.1, n_epoch=1000)


Accuracy:0.769 | Precision:0.718 | Recall:0.878 | F1 score:0.790 | AUC:0.880 | Log loss:0.678 | Brier:0.159 | Coverage:1.000 | MSE, MAE:0.159, 0.275
Time needed by Snorkel's generative model: 1.7589950561523438
Accuracy:0.769 | Precision:0.718 | Recall:0.878 | F1 score:0.790 | AUC:0.880 | Log loss:0.678 | Brier:0.159 | Coverage:1.000 | MSE, MAE:0.159, 0.275
Time needed by Snorkel's generative model: 1.7600352764129639
Accuracy:0.769 | Precision:0.718 | Recall:0.878 | F1 score:0.790 | AUC:0.880 | Log loss:0.678 | Brier:0.159 | Coverage:1.000 | MSE, MAE:0.159, 0.275
Time needed by Snorkel's generative model: 1.8869962692260742
