## Goals
 - The other techniques for finding anaphoric concept codes did not work well. Why not instead use the anaphora tagging model and just use the most recently predicted concept code as the antecedent to grad the code from? 

In [88]:
import dill
import pandas as pd

from Settings import Settings
from collections import defaultdict
from BrattEssay import ANAPHORA

from results_common import get_essays, validate_essays, tally_essay_attributes
from process_essays_coref import get_coref_processed_essays
from metrics import get_metrics_raw

# progress bar widget
from ipywidgets import IntProgress
from IPython.display import display

DATASET = "SkinCancer" # CoralBleaching | SkinCancer

settings = Settings()
root_folder = settings.data_directory + DATASET + "/Thesis_Dataset/"
stanford_coref_predictions_folder = root_folder + "CoReference/"
berkeley_coref_predictions_folder = root_folder + "CoReference/Berkeley/"
# Which algorithm?
coref_predictions_folder = stanford_coref_predictions_folder
print("CoRef Data: ", coref_predictions_folder)

Results Dir: /Users/simon.hughes/Google Drive/Phd/Results/
Data Dir:    /Users/simon.hughes/Google Drive/Phd/Data/
Root Dir:    /Users/simon.hughes/GitHub/NlpResearch/
Public Data: /Users/simon.hughes/GitHub/NlpResearch/Data/PublicDatasets/
CoRef Data:  /Users/simon.hughes/Google Drive/Phd/Data/SkinCancer/Thesis_Dataset/CoReference/


In [89]:
training_essays = get_essays(coref_predictions_folder, "Training")

Found file /Users/simon.hughes/Google Drive/Phd/Data/SkinCancer/Thesis_Dataset/CoReference/training_processed.dill


In [90]:
test_essays = get_essays(coref_predictions_folder, "Test")

Found file /Users/simon.hughes/Google Drive/Phd/Data/SkinCancer/Thesis_Dataset/CoReference/test_processed.dill


In [71]:
all_essays = training_essays + test_essays

### Validate the Lengths

In [91]:
validate_essays(training_essays)
validate_essays(test_essays)

Essays validated
Essays validated


In [92]:
# ner_tally = tally_essay_attributes(all_essays, attribute_name="pred_ner_tags_sentences")
pos_tally = tally_essay_attributes(all_essays, attribute_name="pred_pos_tags_sentences")

## Look at the Anaphor Tags

In [93]:
from results_procesor import is_a_regular_code

cc_tally = defaultdict(int)
cr_tally = defaultdict(int)
reg_tally = defaultdict(int)
for e in all_essays:
    for sent in e.sentences:
        for wd, tags in sent:
            for t in tags:
                if is_a_regular_code(t):
                    reg_tally[t] += 1
                if ANAPHORA in t and "other" not in t:
                    if "->" in t:
                        cr_tally[t] += 1
                    elif "Anaphor:[" in t:
                        cc_tally[t] += 1

reg_tags = sorted(reg_tally.keys())
all_ana_tags = sorted(cc_tally.keys())
assert len(reg_tags) == len(all_ana_tags)
all_ana_tags

['Anaphor:[11]',
 'Anaphor:[12]',
 'Anaphor:[13]',
 'Anaphor:[14]',
 'Anaphor:[1]',
 'Anaphor:[2]',
 'Anaphor:[3]',
 'Anaphor:[4]',
 'Anaphor:[50]',
 'Anaphor:[5]',
 'Anaphor:[5b]',
 'Anaphor:[6]',
 'Anaphor:[7]']

In [94]:
def blank_if_none(val):
    return "-" if (val is None or not val or str(val).lower() == "none") else val

def replace_if_blank(val, replace):
    if val == "" or val == "-":
        return replace
    return val

def process_sort_results(df_results):
    df_disp = df_results[["f1_score","precision","recall"]]
    return df_disp.sort_values("f1_score", ascending=False)

# Grid Search With Anaphora Prediction Filters

In [102]:
from process_essays_coref import fix_coref_ids, build_segmented_chain
from processessays import Essay
from CoRefHelper import EMPTY
from BrattEssay import ANAPHORA
from collections import defaultdict

def processed_essays_predict_most_recent_tag(essays, format_ana_tags=True):

    ana_tagged_essays = []
    for eix, e in enumerate(essays):

        fix_coref_ids(e)
        seq_pred_tags = [] # all predicted tags
        
        ana_tagged_e = Essay(e.name, e.sentences)
        ana_tagged_e.pred_tagged_sentences = []
        ana_tagged_e.pred_pos_tags_sentences = list(e.pred_pos_tags_sentences)
        ana_tagged_e.pred_ner_tags_sentences = list(e.pred_pos_tags_sentences)
        ana_tagged_e.ana_tagged_sentences    = list(e.ana_tagged_sentences)
        ana_tagged_e.pred_corefids           = list(e.pred_corefids)
        ana_tagged_essays.append(ana_tagged_e)

        # map coref ids to sent_ix, wd_ix tuples
        corefid_2_chain = build_segmented_chain(e)
    

        # now look for ana tags that are also corefs, and cross reference
        for sent_ix in range(len(e.sentences)):
            ana_tagged_sent = []
            ana_tagged_e.pred_tagged_sentences.append(ana_tagged_sent)

            sent = e.sentences[sent_ix]

            # SENTENCE LEVEL TAGS / PREDICTIONS
            ana_tags = e.ana_tagged_sentences[sent_ix]
            coref_ids = e.pred_corefids[sent_ix]
            # ner_tags = e.pred_ner_tags_sentences[sent_ix]
            pos_tags = e.pred_pos_tags_sentences[sent_ix]
            ptags = e.pred_tagged_sentences[sent_ix]

            for wd_ix in range(len(sent)):
                pos_tag = pos_tags[wd_ix]  # POS tag

                word, _ = sent[wd_ix]  # ignore actual tags
                pred_cc_tag = ptags[wd_ix]  # predict cc tag

                is_ana_tag = ana_tags[wd_ix] == ANAPHORA
                wd_coref_ids = coref_ids[wd_ix]  # Set[str]

                # note we are changing this to a set rather than a single string
                wd_ptags = set()
                # initialize predicted tags, inc. cc tag
                # DON'T run continue until after this point
                ana_tagged_sent.append(wd_ptags)

                # add predicted concept code tag (filtered out by evaluation code, which filters to specific tags)
                if pred_cc_tag != EMPTY:
                    seq_pred_tags.append(pred_cc_tag)
                    wd_ptags.add(pred_cc_tag)
                # else here because we don't want to assign additional cc tags if there are already ones
                elif is_ana_tag and len(seq_pred_tags) > 0:
                    code=seq_pred_tags[-1]
                    if format_ana_tags:
                        code = "{anaphora}:[{code}]".format(anaphora=ANAPHORA, code=code)
                    wd_ptags.add(code)
#                     if len(seq_pred_tags) > 1:     
#                         code = "{anaphora}:[{code}]".format(
#                                             anaphora=ANAPHORA, code=seq_pred_tags[-2])
#                         wd_ptags.add(code)
                
    # validation check
    #   check essay and sent lengths align
    for e in ana_tagged_essays:
        assert len(e.sentences) == len(e.pred_tagged_sentences)
        for ix in range(len(e.sentences)):
            assert len(e.sentences[ix]) == len(e.pred_tagged_sentences[ix])

    return ana_tagged_essays

In [111]:
def grid_search(essays, expected_tags, format_ana_tags=True):

    rows_ana = []
    proc_essays = processed_essays_predict_most_recent_tag(essays=essays, format_ana_tags=format_ana_tags)

    metrics = get_metrics_raw(proc_essays, expected_tags=expected_tags,  micro_only=True)
    row = metrics["MICRO_F1"]
    rows_ana.append(row)

    df_results = pd.DataFrame(rows_ana)
    return df_results

In [104]:
def get_metrics(essays, expected_tags):

    proc_essays = processed_essays_predict_most_recent_tag(essays=essays)
                        
    metrics = get_metrics_raw(proc_essays, expected_tags=expected_tags,  micro_only=True)
    row = metrics["MICRO_F1"]
    df_results = pd.DataFrame([row])
    return df_results

### Training

In [114]:
df_train = grid_search(essays=training_essays, expected_tags=all_ana_tags)
process_sort_results(df_train)

Unnamed: 0,f1_score,precision,recall
0,0.235469,0.39899,0.167019


## Test

In [113]:
df_test = grid_search(essays=test_essays, expected_tags=all_ana_tags)
process_sort_results(df_test)

Unnamed: 0,f1_score,precision,recall
0,0.294737,0.337349,0.261682


# Compute Overall Accuracy

## Training

In [118]:
df_train_all = grid_search(essays=training_essays, expected_tags=reg_tags, format_ana_tags=False)
process_sort_results(df_train_all)

Unnamed: 0,f1_score,precision,recall
0,0.819851,0.835073,0.805174


## Test

In [119]:
df_test_all = grid_search(essays=test_essays, expected_tags=reg_tags, format_ana_tags=False)
process_sort_results(df_test_all)

Unnamed: 0,f1_score,precision,recall
0,0.835622,0.841029,0.830285
