# E1.1: Label Model

In [None]:
from snorkel.labeling.model import LabelModel
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from my_utils import threshold_predict, threshold_suppressible_predict
import matplotlib.pyplot as plt
import numpy as np
import logging

In [None]:
# Extract relevant parts of label matrix
lf_subset = list(range(57)) 
L_data = np.copy(L_alarms[:,lf_subset])
Y_data = alarms_df.true_label.values

In [None]:
metrics = ["accuracy","coverage","precision","recall","f1"]

In [None]:
all_cmtx = np.zeros((3,3))

thresholds = np.arange(0.5, 1.0, 0.01)
m = np.zeros((5,len(thresholds),3))   # for each fold, for each threshold, for each metric

kf = KFold(n_splits=5, shuffle=True)

for i, (train_idx, test_idx) in enumerate(kf.split(L_data)):
    # Define training dataset
    L_train = L_data[train_idx]
    Y_train = Y_data[train_idx]
    # Define test dataset
    L_test = L_data[test_idx]
    Y_test = Y_data[test_idx]

    # Fit a Label Model
    l_model = LabelModel(cardinality=2, verbose=True)
    l_model.fit(L_train, n_epochs=100, lr=0.01)
    Y_pred, Y_prob = l_model.predict(L_test, return_probs=True, tie_break_policy="abstain")

    # Evaluate model performance
    all_cmtx = all_cmtx + confusion_matrix(Y_test, Y_pred)
    scores = l_model.score(L_test, Y_test, metrics=metrics)
    scores["abstain"] = np.sum(Y_pred == ABSTAIN) / len(Y_pred)
    scores["suppress"] = np.sum(Y_pred == SUPPRESSIBLE) / len(Y_pred)
    logging.info("Iteration " + str(i+1) + ": ", scores)

    # Evaluate model with thresholding suppressible predictions
    abstain, accuracy, f1 = [], [], []
    
    for thresh in thresholds:
        #Y_pred = np.apply_along_axis(threshold_suppressible_predict, 1, Y_prob, thresh)
        Y_pred = np.apply_along_axis(threshold_predict, 1, Y_prob, thresh)
        Y_pred_noa = Y_pred[Y_pred != ABSTAIN]
        Y_test_noa = Y_test[Y_pred != ABSTAIN]
        abstain.append( np.sum(Y_pred == ABSTAIN) / len(Y_pred) )
        accuracy.append( accuracy_score(Y_test_noa, Y_pred_noa) )
        f1.append( f1_score(Y_test_noa, Y_pred_noa) )

    m[i,:,0] = abstain
    m[i,:,1] = accuracy
    m[i,:,2] = f1

In [None]:
# Summarize metrics
print("-- Summary (LM, 5-Fold CV) --")
print(all_cmtx)
print("accuracy: ", (all_cmtx[1,1] + all_cmtx[2,2]) / np.sum(all_cmtx[1:,1:]))
print("coverage: ", np.sum(all_cmtx[1:,1:]) / np.sum(all_cmtx))
print("precision: ", all_cmtx[2,2] / np.sum(all_cmtx[1:,2]))
print("recall: ", all_cmtx[2,2] / np.sum(all_cmtx[2,1:]))
print("f1: ", all_cmtx[2,2] / (all_cmtx[2,2] + 0.5 * (all_cmtx[1,2] + all_cmtx[2,1])))
print("abstain: ", np.sum(all_cmtx[:,0]) / np.sum(all_cmtx))
print("suppress: ", np.sum(all_cmtx[:,2]) / np.sum(all_cmtx))

In [None]:
# Observe effect of thresholding suppressible predictions
x = np.arange(0.5, 1.0, 0.05)
avg_thresh_scores = np.mean(m, axis=0)
plt.plot(avg_thresh_scores[:,0], avg_thresh_scores[:,1])
plt.plot(avg_thresh_scores[:,0], avg_thresh_scores[:,2])
plt.xlabel("Abstain")
plt.ylabel("%")
plt.legend(("Accuracy","F1"), bbox_to_anchor=(1,1))
plt.title("Effect of thresholding predictions")
#plt.title("Effect of thresholding suppressible predictions")
plt.show()

In [None]:
print(avg_thresh_scores)