# E5: Iteratively find Dependencies between LFs

In [None]:
from sklearn.model_selection import train_test_split
from Our_Monitors.CDGA_Monitor import CDGAM
from snorkel.labeling import labeling_function, LFApplier
from snorkel.labeling.model.label_model import LabelModel
import igraph as ig

In [None]:
def make_lf(id, mis, l_model):
    @labeling_function(name="lf" + str(id))
    def lf(x):
        return l_model.predict( np.asarray( [x[mis]] ) )[0]

    return lf

In [None]:
lf_subset = list(range(57))     # only using original LFs
lfs_iter = np.asarray(lfs)[lf_subset]
L_data = np.copy(L_alarms[:,lf_subset])
num_iters = 5

for iter in range(num_iters):
    print("-- Iteration ", iter + 1, "--")

    # Apply lfs_iter to the alarms data
    if iter != 0:
        applier = LFApplier(lfs_iter)
        L_data = applier.apply(alarms_data, progress_bar=True, fault_tolerant=True)

    # Split into train and development set
    L_train, L_dev, Y_train, Y_dev = train_test_split(L_data, alarms_df.true_label.values, test_size=0.2, random_state=SEED)

    # Get edges of dependency graph from Conditional Dependency Monitor (CDM)
    deps = CDGAM(L_dev, k=2, sig=0.05, policy='new', verbose=False, return_more_info=False)
    print("Num deps: ", len(deps))

    # Generate graph from dependencies and find the maximum independent sets
    G = ig.Graph()
    G.add_vertices(len(lfs_iter))
    G.add_edges(deps)
    max_indep_sets = G.largest_independent_vertex_sets()
    print("Num max independent sets: ", len(max_indep_sets))
    print("Size max independent sets: ", len(max_indep_sets[0]))

    # Stop iterating if no dependencies to consider
    if len(deps) == 0:
        break

    # Define a labeling function for each maximum independent set
    print("creating new LFs...")
    new_lfs_iter = []
    for i, mis in enumerate(max_indep_sets):
        mis = list(mis)     # mis is a tuple, converting to list
        l_model = LabelModel(cardinality=2, verbose=True)
        l_model.fit(L_train[:,mis], seed=SEED)
        new_lfs_iter.append( make_lf(i, mis, l_model) )

    # Update parameters
    lfs_iter = new_lfs_iter
    alarms_data = np.copy(L_data)