In [8]:
import time
from examples.factors import accuracies, propensities
from examples.prediction_and_evaluation import pred_and_eval_gen_model, eval_majority_vote
from examples.utils import change_labels
from factor_graph import FactorGraph
import numpy as np

# Comparing the implemented factor graph against Snorkel (a latent MRF model)

## The data used consists of:
 - labels Y for the created task of discriminating professors from teachers in the Bias in Bios dataset
 - 99 selected labeling functions, usable for a standard data programming pipeline

In [17]:
def train_supervised(label_matrix, Y_true, lf_prop=True, n_epoch=25, lr=0.1, gibbs_samples=10):
    start_t = time.time()
    n_LFs = label_matrix.shape[1]
    """ Get polarities of each LF, ASSUMPTION: Each LF only votes for ONE label, and abstains otherwise"""
    polarities = np.sign(np.sum(label_matrix, axis=0))
    """ In the supervised case, the data fed into the PGM Learning will just be all concatenated """
    observations = np.concatenate((Y_true.reshape((-1, 1)), label_matrix), axis=1)
    """ Create a MRF with fully observed variables"""
    potentials = [(accuracies, n_LFs)]
    if lf_prop:
        potentials += [(propensities, n_LFs)]
    lm = FactorGraph(n_vars=n_LFs+1, polarities=[-1] + list(polarities), potentials=potentials)
    lm.fit(observations, lr=lr, n_epochs=n_epoch, batch_size=250, gibbs_samples=gibbs_samples, verbose=False)
    """ Evaluate the learned generative model """
    stat, probs = pred_and_eval_gen_model(lm, observations, Y_true, version=99, abst=0, verbose=True, print_MV=False,
                                          eps=0.0, return_preds=True, coverage_stats=False, add_prefix="")
    duration = time.time() - start_t
    print(f"Time needed by generative model: {duration}")
    # Will train the downstream classifier:
    # stat_cl = train_and_eval_classifier(Xtrain, Xtest, probs, Ytest, label_matrix, library='torch',
    #                                    optim='Adam', devicestring=device, epochs=250, print_step=505)
    return lm, stat, probs

In [10]:
def train_snorkel(label_matrix, Y_true, n_epoch=1000, lr=0.1):
    from snorkel.labeling.model import LabelModel
    # LABEL MODEL
    start_t = time.time()
    """ Snorkel requires abstention label to be -1..."""
    label_matrix, Y_true = change_labels(label_matrix, Y_true, new_label=-1, old_label=0)
    """ Train latent label model from Snorkel """
    lm = LabelModel(cardinality=2)
    lm.fit(label_matrix, n_epochs=n_epoch, seed=77, lr=lr)
    """ Evaluate the learned generative model """
    stat, probs = pred_and_eval_gen_model(lm, label_matrix, Y_true, abst=-1, verbose=True,
                                          print_MV=False, eps=0.0, MV_policy="random",
                                          return_preds=True, version=10, coverage_stats=False)

    duration = time.time() - start_t
    print(f"Time needed by Snorkel's generative model: {duration}")
    # Will train the downstream classifier:
    # stat_cl = train_and_eval_classifier(Xtrain, Xtest, probs, Ytest, label_matrix, library='torch',
    #                                    optim='Adam', devicestring=device, epochs=250, print_step=505)
    return lm, stat, probs

In [11]:
seed = 77
n_runs = 5
data = np.load("../data/professor_vs_teacher_99LFs.npz")
L_arr, Ytrain = data["L"], data["Y"]

In [12]:
print("---------------------------------- MAJORITY VOTE STATS --------------------------------------------------")
print("MV on all samples with ", L_arr.shape[1], "LFs")
eval_majority_vote(L_arr, Ytrain, abst=0, MV_policy='random')
print("---------------------------------------------------------------------------------------------------------")
# PRINT LF descriptions: [print(d) for d in descr]
lfprop = False
n_samples, nlf = L_arr.shape

---------------------------------- MAJORITY VOTE STATS --------------------------------------------------
MV on all samples with  99 LFs
Majority vote stats:
Accuracy:0.753 | Precision:0.770 | Recall:0.717 | F1 score:0.742 | AUC:0.796 | Log loss:5.506 | Brier:0.917 | Coverage:1.000 | MSE, MAE:0.917, 0.751
---------------------------------------------------------------------------------------------------------


# Supervised (ours)

In [14]:
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.1, n_epoch=10)
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.1, n_epoch=25)
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.1, n_epoch=25)

Accuracy:0.753 | Precision:0.694 | Recall:0.896 | F1 score:0.782 | AUC:0.888 | Log loss:1.586 | Brier:1.074 | Coverage:1.000 | MSE, MAE:1.074, 0.755
Time needed by generative model: 5.789618015289307
Accuracy:0.750 | Precision:0.693 | Recall:0.892 | F1 score:0.780 | AUC:0.881 | Log loss:2.865 | Brier:1.069 | Coverage:1.000 | MSE, MAE:1.069, 0.750
Time needed by generative model: 13.907033681869507
Accuracy:0.753 | Precision:0.696 | Recall:0.893 | F1 score:0.782 | AUC:0.881 | Log loss:2.861 | Brier:1.069 | Coverage:1.000 | MSE, MAE:1.069, 0.750
Time needed by generative model: 13.33698320388794


In [18]:
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.1, n_epoch=10, gibbs_samples=5)
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.1, n_epoch=25, gibbs_samples=5)
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.01, n_epoch=50, gibbs_samples=5)

Accuracy:0.759 | Precision:0.701 | Recall:0.896 | F1 score:0.787 | AUC:0.891 | Log loss:1.451 | Brier:1.057 | Coverage:1.000 | MSE, MAE:1.057, 0.750
Time needed by generative model: 3.4220330715179443
Accuracy:0.758 | Precision:0.700 | Recall:0.895 | F1 score:0.785 | AUC:0.886 | Log loss:2.568 | Brier:1.051 | Coverage:1.000 | MSE, MAE:1.051, 0.744
Time needed by generative model: 8.094967126846313
Accuracy:0.762 | Precision:0.704 | Recall:0.897 | F1 score:0.789 | AUC:0.891 | Log loss:0.843 | Brier:1.062 | Coverage:1.000 | MSE, MAE:1.062, 0.758
Time needed by generative model: 16.368001461029053


In [20]:
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.01, n_epoch=50, gibbs_samples=1)
_, _, _ = train_supervised(L_arr, Ytrain, lf_prop=lfprop, lr=0.003, n_epoch=100, gibbs_samples=5)

Accuracy:0.788 | Precision:0.758 | Recall:0.841 | F1 score:0.798 | AUC:0.889 | Log loss:0.657 | Brier:0.941 | Coverage:1.000 | MSE, MAE:0.941, 0.742
Time needed by generative model: 8.22499942779541
Accuracy:0.759 | Precision:0.701 | Recall:0.894 | F1 score:0.786 | AUC:0.892 | Log loss:0.617 | Brier:1.075 | Coverage:1.000 | MSE, MAE:1.075, 0.771
Time needed by generative model: 31.87503147125244


# Snorkel
### Note that this is the newer, faster snorkel. (the old snorkel using SGD+MLE is similarly slow or slower)

In [15]:
_, _, _ = train_snorkel(L_arr, Ytrain, lr=0.01, n_epoch=1000)
_, _, _ = train_snorkel(L_arr, Ytrain, lr=0.01, n_epoch=1000)
_, _, _ = train_snorkel(L_arr, Ytrain, lr=0.1, n_epoch=1000)


Accuracy:0.769 | Precision:0.718 | Recall:0.878 | F1 score:0.790 | AUC:0.880 | Log loss:0.678 | Brier:0.159 | Coverage:1.000 | MSE, MAE:0.159, 0.275
Time needed by Snorkel's generative model: 2.810915946960449
Accuracy:0.769 | Precision:0.718 | Recall:0.878 | F1 score:0.790 | AUC:0.880 | Log loss:0.678 | Brier:0.159 | Coverage:1.000 | MSE, MAE:0.159, 0.275
Time needed by Snorkel's generative model: 1.949998140335083
Accuracy:0.769 | Precision:0.718 | Recall:0.878 | F1 score:0.790 | AUC:0.880 | Log loss:0.678 | Brier:0.159 | Coverage:1.000 | MSE, MAE:0.159, 0.275
Time needed by Snorkel's generative model: 1.9000012874603271
