### Imports

In [87]:
import pandas as pd
import itertools
from snorkel.labeling import labeling_function
from snorkel.labeling import LabelingFunction
from snorkel.labeling.model import LabelModel
from snorkel.labeling import PandasLFApplier
import snorkel_lfsmall_2

### Data and Labeling Function Loading

In [93]:
# Read in data
# df_train = pd.read_json(r"C:\Users\ARalevski\Documents\Petal\data-collection-and-prep\new_golden.json")
df_train = pd.read_csv(r"C:\Users\ARalevski\Documents\Petal\Snorkel-PeTaL\labeled_data.csv")
df_train = df_train.fillna("")


# List labeling functions
lfs = []
new_labels = []
for method in dir(snorkel_lfsmall_2):
    if "lf_" in method:
        lfs.append(getattr(snorkel_lfsmall_2, method))
        new_labels.append(method[3:])
    elif "keyword_" in method:
        lfs.append(getattr(snorkel_lfsmall_2, method))
        new_labels.append(method[8:])

len(lfs)

15

In [94]:
print(lfs)

[<function keyword_lookup at 0x00000153C5813820>, LabelingFunction lf_protect_from_animals, Preprocessors: [], LabelingFunction lf_protect_from_chemicals, Preprocessors: [], LabelingFunction lf_protect_from_excess_liquids, Preprocessors: [], LabelingFunction lf_protect_from_fire, Preprocessors: [], LabelingFunction lf_protect_from_fungi, Preprocessors: [], LabelingFunction lf_protect_from_ice, Preprocessors: [], LabelingFunction lf_protect_from_light, Preprocessors: [], LabelingFunction lf_protect_from_microbes, Preprocessors: [], LabelingFunction lf_protect_from_plants, Preprocessors: [], LabelingFunction lf_protect_from_radiation, Preprocessors: [], LabelingFunction lf_protect_from_solids, Preprocessors: [], LabelingFunction lf_protect_from_temperature, Preprocessors: [], LabelingFunction lf_protect_from_wind, Preprocessors: [], <function make_keyword_lf at 0x00000153C5813D30>]


Updated Function Enumerated CSV

In [69]:
form = pd.DataFrame()
new_rows = []
count = 0
for row in lfs:
    new_rows.append({"function": row.name[3:], "function_enumerated": count})
    count += 1
form = pd.concat([form, pd.DataFrame(new_rows)])

In [70]:
# The notebook should be restarted if you change this file
form.to_csv(r"C:\Users\ARalevski\Documents\Petal\Snorkel-PeTaL\formatted_enums.csv", index=False)

### Apply Labeling Functions

In [71]:
# Apply the LFs to the unlabled training data
applier = PandasLFApplier(lfs)
l_train = applier.apply(df_train)

100%|██████████| 1430/1430 [00:04<00:00, 339.72it/s]


In [72]:
l_train

array([[10, 23, 15, ..., 14, 19, 18],
       [10, 23, 15, ..., 14, 19, 18],
       [10, 23, 15, ..., 14, 19, 18],
       ...,
       [10, 23, 15, ..., 14, 19, 18],
       [10, 23, 15, ..., 14, 19, 18],
       [10, 23, 15, ..., 14, 19, 18]])

In [73]:
len(lfs)

13

### Train Model

In [76]:
# Train the label model and compute the training labels
# Cardinality = number of classes
label_model = LabelModel(cardinality=25, verbose=True)

In [77]:
label_model.fit(l_train, n_epochs=500, log_freq=50, seed=123)

### Predict and Display Results

In [78]:
df_train["label"] = label_model.predict(L=l_train, tie_break_policy="abstain")

In [79]:
# Number of unique labels, if this is only 1, every prediction was most likely abstained.
df_train["label"].nunique()

1

In [81]:
# Append Probabilities of labels to each record
probs = label_model.predict_proba(l_train)
for prob in probs:
    count = 0
    for name in lfs:
        df_train[name.name] = prob[count]
        count += 1
        # history.append(name)

Unnamed: 0,doi,url,title,abstract,full_doc_link,is_open_access,label_level_1,label_level_2,label_level_3,journal,...,lf_protect_from_harm3,lf_protect_from_ice,lf_protect_from_light,lf_protect_from_loss_liquids,lf_protect_from_microbes,lf_protect_from_plants,lf_protect_from_radiation,lf_protect_from_solids,lf_protect_from_temperature,lf_protect_from_wind
0,10.1098/rsos.140322,https://royalsocietypublishing.org/doi/10.1098...,Impact behaviour of freeze-dried and fresh pom...,Pomelos (Citrus maxima) are known for their th...,https://royalsocietypublishing.org/doi/10.1098...,True,['Maintain structural integrity'],"['Manage structural forces', 'Prevent structur...","['Manage impact', 'Prevent fracture/rupture']","['Advanced Engineering Materials', 'Royal Soci...",...,0.038205,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567
1,10.1098/rsbl.2004.0269,https://royalsocietypublishing.org/doi/10.1098...,Distribution of unique red feather pigments in...,"In many birds, red, orange and yellow feathers...",https://royalsocietypublishing.org/doi/10.1098...,True,"['Sense send or process information', 'Change ...","['Send signals', 'Modify color']","['Send light signals in the visible spectrum',...",['Biology Letters'],...,0.038205,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567
2,10.1644/08-MAMM-A-108.1,https://academic.oup.com/jmammal/article/90/2/...,"Home Ranges, Movement, and Den Use in Long-Bea...","Abstract Long-beaked echidnas (Zaglossus), whi...",https://academic.oup.com//jmammal/article-pdf/...,False,"['Process resources', 'Protect from harm']","['Distribute resources', 'Protect from non-liv...","['Distribute gases', 'Protect from solids']","['Journal of Experimental Biology (JEB)', 'Jou...",...,0.038205,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567
3,10.1038/35099670,https://www.nature.com/articles/35099670,Energy saving in flight formation,Many species of large bird fly together in for...,https://www.nature.com/articles/35099670.pdf,True,"['Move', 'Maintain ecological community']",['Passive movement'],"['Coordinate by self-organization', 'Passively...",['Nature'],...,0.038205,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567
4,10.1242/jeb.036871,https://jeb.biologists.org/content/213/2/288,Correlation between aquaporin and water permea...,SUMMARY The ventral pelvic skin of the tree fr...,https://journals.biologists.com/jeb/article-pd...,True,['Process resources'],"['Capture resources', 'Absorb and/or filter re...",['Absorb and/or filter liquids'],"['Journal of Experimental Biology (JEB)', 'End...",...,0.038205,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1425,,,,,,,,,,,...,0.038205,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567
1426,,,,,,,,,,,...,0.038205,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567
1427,,,,,,,,,,,...,0.038205,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567
1428,,,,,,,,,,,...,0.038205,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567,0.066567


### Save Model and Results

In [82]:
label_model.save("snorkel_2.pkl")

In [83]:
df_train.to_csv("results2.csv")