In [None]:
YOUR_NAME = ""

In [None]:
import sys
sys.path.append('..')
from data.preparer import load_amazon_dataset

from babble import Explanation
from babble import BabbleStream
from babble.Candidate import Candidate 

from analyzer import upload_data

from metal.analysis import lf_summary
from metal.analysis import label_coverage
from metal import LabelModel
from metal.tuners import RandomSearchTuner
from babble.utils import ExplanationIO
from snorkel.labeling import filter_unlabeled_dataframe

import nltk
nltk.download("punkt")

import pandas as pd
from datetime import datetime
stat_history = pd.DataFrame()

In [None]:
DELIMITER = "#"
df_train, df_dev, df_valid, df_test, _ = load_amazon_dataset(delimiter=DELIMITER)
print("{} training examples".format(len(df_train)))
print("{} development examples".format(len(df_dev)))
print("{} validation examples".format(len(df_valid)))
print("{} test examples".format(len(df_test)))

Transform the data into a format compatible with Babble Labble:

In [None]:
dfs = [df_train, df_dev]
dfs[0]['label'] = -1

for df in dfs:
    df["id"] = range(len(df))
    df["label"] += 1

Cs = [df.apply(lambda x: Candidate(x), axis=1) for df in dfs]

# babble labble uses 1 and 2 for labels, while our data uses 0 and 1
# add 1 to convert
Ys = [df.label.values for df in dfs]
Ys[0] -= 1 # no label (training set) should be set to -1

# Amazon Customer Reviews Classification with Babble

### For this task, you will work with Amazon Customer Reviews, writing explanations about how to classify them as positive or negative sentiment.

Only 1 star and 5 star reviews are included.

In [None]:
# Start the timer!
stat_history = stat_history.append({
    "time": datetime.now(), 
    "num_lfs": 0,
    "f1": 0.0,
    "precision": 0.0,
    "recall": 0.0,
    "training_label_coverage": 0.0,
    "training_label_size": 0.0
}, ignore_index=True)

In [None]:
#define labels
ABSTAIN = 0
NEGATIVE = 1
POSITIVE = 2

Recall that aliases are a way to refer to a set of words in a rule. 

For example, with
`aliases = {"couples": ["girlfriend", "boyfriend", "wife", "husband"]}` 

--> now you can refer to "couples" in a rule, and the parser will know you mean any of these terms.


In [None]:
babbler = BabbleStream(Cs, Ys, balanced=True, shuffled=True, seed=456)

In [None]:
aliases = {} 
babbler.add_aliases(aliases)

In [None]:
def prettyprint(candidate):
    # just a helper function to print the candidate nicely
    print("MENTION ID {}".format(candidate.mention_id))
    print()
    print(candidate.text)

Let's see an example candidate!

In [None]:
candidate = babbler.next()
prettyprint(candidate)

## Labeling Instructions

All reviews were submitted with either 1 star (negative) or 5 star (positive) ratings. 

Your task is to __create labeling functions__ by writing natural language explanations of labeling rules. Try to write them as quickly and accurately as possible.

You may consult the internet at any time.

## Create Explanations

Creating explanations generally happens in five steps:
1. View candidates
2. Write explanations
3. Get feedback
4. Update explanations 
5. Apply label aggregator

Steps 3-5 are optional; explanations may be submitted without any feedback on their quality. However, in our experience, observing how well explanations are being parsed and what their accuracy/coverage on a dev set are (if available) can quickly lead to simple improvements that yield significantly more useful labeling functions. Once a few labeling functions have been collected, you can use the label aggregator to identify candidates that are being mislabeled and write additional explanations targeting those failure modes.

### Collection

Use `babbler` to show candidates

In [None]:
candidate = babbler.next()
prettyprint(candidate)

If you don't know whether it's positive or negative, it's okay to make your best guess or skip an example.
For a candidate you decide to label, write an explanation of why you chose that label.

You can consult the internet or refer to the babble tutorial notebook.

In [None]:
e0 = Explanation(
    # feel free to change the name to something that describes your rule better.
    name = "e0", 
    label = ABSTAIN, 
    condition = "", 
    # remember that is argument (candidate) is optional. 
    # You can use it to make sure the explanation applies to the candidate you pass as an argument.
    #candidate = candidate.mention_id 
)

In [None]:
e1 = Explanation(
    name = "e1", 
    label = ABSTAIN, 
    condition = "", 
    #candidate = candidate.mention_id 
)

In [None]:
e2 = Explanation(
    name = "e2", 
    label = ABSTAIN, 
    condition = "", 
    #candidate = candidate.mention_id 
)

In [None]:
e3 = Explanation(
    name = "e3", 
    label = ABSTAIN, 
    condition = "", 
    #candidate = candidate.mention_id 
)

In [None]:
e4 = Explanation(
    name = "e4", 
    label = ABSTAIN, 
    condition = "", 
    #candidate = candidate.mention_id 
)

In [None]:
e5 = Explanation(
    name = "e5", 
    label = ABSTAIN, 
    condition = "", 
    #candidate = candidate.mention_id 
)

In [None]:
e6 = Explanation(
    name = "e6", 
    label = ABSTAIN, 
    condition = "", 
    #candidate = candidate.mention_id 
)

In [None]:
e7 = Explanation(
    name = "e7", 
    label = ABSTAIN, 
    condition = "", 
    #candidate = candidate.mention_id 
)

In [None]:
e8 = Explanation(
    name = "e8", 
    label = ABSTAIN, 
    condition = "", 
    #candidate = candidate.mention_id 
)

In [None]:
e9 = Explanation(
    name = "e9", 
    label = ABSTAIN, 
    condition = "", 
    #candidate = candidate.mention_id 
)

In [None]:
e10 = Explanation(
    name = "e10", 
    label = ABSTAIN, 
    condition = "", 
    #candidate = candidate.mention_id 
)

Babble will parse your explanations into functions, then filter out functions that are duplicates, incorrectly label their given candidate, or assign the same label to all examples.

In [None]:
# Add any explanations that you haven't committed yet
explanations = [e0, e1, e2, e3, e4, e5, e6, e7, e8, e9]

parses, filtered = babbler.apply(explanations)
stat_history = stat_history.append({
    "time": datetime.now(), 
    "num_lfs": len(parses),
    "num_explanations": len(explanations),
    "num_filtered": len(filtered)
}, ignore_index=True)

### Analysis
See how your explanations were parsed and filtered

In [None]:
try: 
    dev_analysis = babbler.analyze(parses)
    display(dev_analysis)
    dev_analysis['time'] = datetime.now()
    dev_analysis['eval'] = "dev"
    dev_analysis["lf_id"] = dev_analysis.index
    stat_history = stat_history.append(dev_analysis, sort=False, ignore_index=True)
except ValueError as e:
    print("It seems as though none of your labeling functions were parsed. See the cells above and below for more information.")
    print("ERROR:")
    print(e)

In [None]:
babbler.filtered_analysis(filtered)

In [None]:
babbler.commit()

### Evaluation
Get feedback on the performance of your explanations

In [None]:
Ls = [babbler.get_label_matrix(split) for split in [0,1,2]]
lf_names = [lf.__name__ for lf in babbler.get_lfs()]
lf_summary(Ls[1], Ys[1], lf_names=lf_names)

In [None]:
search_space = {
    'n_epochs': [50, 100, 500],
    'lr': {'range': [0.01, 0.001], 'scale': 'log'},
    'show_plots': False,
}

tuner = RandomSearchTuner(LabelModel, seed=123)

label_aggregator = tuner.search(
    search_space, 
    train_args=[Ls[0]], 
    X_dev=Ls[1], Y_dev=Ys[1], 
    max_search=20, verbose=False, metric='f1')

# record statistics over time
pr, re, f1, acc = label_aggregator.score(Ls[1], Ys[1], metric=['precision', 'recall', 'f1', 'accuracy'])
stats = {
    "precision": pr,
    "recall": re,
    "f1": f1,
    "accuracy": acc,
    "eval": "dev",
    "model": "label_aggregator",
    "time": datetime.now(),
    "training_label_coverage": label_coverage(Ls[0]),
    "training_label_size": label_coverage(Ls[0])*len(dfs[0])
}
stat_history = stat_history.append(stats, ignore_index=True)

In [None]:
# view some incorrectly labeled examples for a given LF
j = 0
print(lf_names[j])
# set j to match the value of the LF you're interested in
L_dev = Ls[1].todense()
display(df_dev[L_dev[:,j].A1==abs(df_dev["label"]-3)])

## Train Model
We can train a simple bag of words model on these labels, and see test accuracy.

(This step may take a while).

In [None]:
L_train = Ls[0].todense()
probs_train = label_aggregator.predict_proba(L=L_train)
mask = (L_train != 0).any(axis=1).A1
df_train_filtered = df_train.iloc[mask]
probs_train_filtered = probs_train[mask]
print("{} out of {} examples used for training data".format(len(df_train_filtered), len(df_train)))

In [None]:
from analyzer import train_model_from_probs
stats = train_model_from_probs(df_train_filtered, probs_train_filtered, df_valid, df_test)
stats["time"] = datetime.now()
stat_history = stat_history.append(stats, ignore_index=True)

## Save
When your time is up, please save your explanations and model!

In [None]:
!mkdir babble_amazon

In [None]:
stat_history.to_csv("babble_amazon/statistics_history.csv")
%history -p -o -f babble_amazon/history.log
!cp babble_amazon_task.ipynb babble_amazon/notebook.ipynb

In [None]:
# save explanations
FILE = "babble_amazon/explanations.tsv"
from types import SimpleNamespace
exp_io = ExplanationIO()
for exp in explanations:
    if exp.candidate is None:
        exp.candidate = SimpleNamespace(mention_id = None)
exp_io.write(explanations, FILE)
explanations = exp_io.read(FILE)

# save label model
label_aggregator.save("babble_amazon/lfmodel.pkl")

In [None]:
!zip -r babble_amazon.zip babble_amazon

In [None]:
assert len(YOUR_NAME) > 0
upload_data("babble_amazon.zip", YOUR_NAME + "_babble_amazon.zip")