# E5: Iteratively find Dependencies between LFs

In [None]:
from sklearn.model_selection import train_test_split
from Our_Monitors.CD_Monitor import CDM
from snorkel.labeling.model.label_model import LabelModel
from random import sample
import igraph as ig

In [None]:
# Parameters for the analysis
num_iters = 5
sample_size = 200

In [None]:
# Only using original LFs in analysis
lf_subset = list(range(57)) 
L_data = np.copy(L_alarms[:,lf_subset])
Y_data = alarms_df.true_label.values

In [None]:
for iter in range(num_iters):
    print("-- Iteration ", iter + 1, "--")

    # Split into train and development set
    L_train, L_dev, Y_train, Y_dev = train_test_split(L_data, Y_data, test_size=0.2, random_state=SEED)

    # Get edges of dependency graph from Conditional Dependency Monitor (CDM)
    start = time()
    deps = CDM(L_dev, Y_dev, k=2, sig=0.05, policy="old", verbose=False)
    print("CDM runtime: ", time() - start)
    print("Num deps: ", len(deps))

    # Generate graph from dependencies and find the maximum independent sets
    G = ig.Graph()
    G.add_vertices(L_data.shape[1])
    G.add_edges(deps)
    max_indep_sets = G.largest_independent_vertex_sets()
    print("Num max independent sets: ", len(max_indep_sets))
    print("Size max independent sets: ", len(max_indep_sets[0]))

    # Dont iterate if there are no dependencies to consider in the next iteration
    if len(deps) == 0:
        break

    # Update L_data (equivalent to defining a new LF per MIS, then applying them to the previous L_data)
    print("updating L_data...")
    
    if sample_size > 0:
        print("sampling ", sample_size, "of the MISs...")
        max_indep_sets = sample(max_indep_sets, sample_size)

    print("LFs covered: ",  set.union( *[set(mis) for mis in max_indep_sets] )) 

    L_data_new = np.zeros((L_data.shape[0],len(max_indep_sets)))

    for i, mis in enumerate(max_indep_sets):
        mis = list(mis)     # mis is a tuple, converting to list
        l_model = LabelModel(cardinality=2, verbose=True)
        l_model.fit(L_train[:,mis], seed=SEED)
        L_data_new[:,i] = l_model.predict(L_data[:,mis])

    L_data = L_data_new