## Goals
 - The other techniques for finding anaphoric concept codes did not work well. Why not instead use the anaphora tagging model and just use the most recently predicted concept code as the antecedent to grad the code from? 

In [1]:
import dill
import pandas as pd

from Settings import Settings
from collections import defaultdict
from BrattEssay import ANAPHORA

from results_common import get_essays, validate_essays, tally_essay_attributes
from process_essays_coref import get_coref_processed_essays, processed_essays_predict_most_recent_tag
from metrics import get_metrics_raw

# progress bar widget
from ipywidgets import IntProgress
from IPython.display import display

DATASET = "CoralBleaching" # CoralBleaching | SkinCancer

settings = Settings()
root_folder = settings.data_directory + DATASET + "/Thesis_Dataset/"
stanford_coref_predictions_folder = root_folder + "CoReference/"
berkeley_coref_predictions_folder = root_folder + "CoReference/Berkeley/"
# Which algorithm?
coref_predictions_folder = berkeley_coref_predictions_folder
print("CoRef Data: ", stanford_coref_predictions_folder)

Results Dir: /Users/simon.hughes/Google Drive/Phd/Results/
Data Dir:    /Users/simon.hughes/Google Drive/Phd/Data/
Root Dir:    /Users/simon.hughes/GitHub/NlpResearch/
Public Data: /Users/simon.hughes/GitHub/NlpResearch/Data/PublicDatasets/
CoRef Data:  /Users/simon.hughes/Google Drive/Phd/Data/CoralBleaching/Thesis_Dataset/CoReference/


In [2]:
training_essays = get_essays(coref_predictions_folder, "Training")

Found file /Users/simon.hughes/Google Drive/Phd/Data/CoralBleaching/Thesis_Dataset/CoReference/Berkeley/training_processed.dill


In [3]:
test_essays = get_essays(coref_predictions_folder, "Test")

Found file /Users/simon.hughes/Google Drive/Phd/Data/CoralBleaching/Thesis_Dataset/CoReference/Berkeley/test_processed.dill


In [4]:
all_essays = training_essays + test_essays

### Validate the Lengths

In [5]:
validate_essays(training_essays)
validate_essays(test_essays)

Essays validated
Essays validated


In [6]:
# ner_tally = tally_essay_attributes(all_essays, attribute_name="pred_ner_tags_sentences")
pos_tally = tally_essay_attributes(all_essays, attribute_name="pred_pos_tags_sentences")

## Look at the Anaphor Tags

In [7]:
from results_procesor import is_a_regular_code

cc_tally = defaultdict(int)
cr_tally = defaultdict(int)
reg_tally = defaultdict(int)
for e in all_essays:
    for sent in e.sentences:
        for wd, tags in sent:
            for t in tags:
                if is_a_regular_code(t):
                    reg_tally[t] += 1
                if ANAPHORA in t and "other" not in t:
                    if "->" in t:
                        cr_tally[t] += 1
                    elif "Anaphor:[" in t:
                        cc_tally[t] += 1

reg_tags = sorted(reg_tally.keys())
all_ana_tags = sorted(cc_tally.keys())
assert len(reg_tags) == len(all_ana_tags)
all_ana_tags

['Anaphor:[11]',
 'Anaphor:[12]',
 'Anaphor:[13]',
 'Anaphor:[14]',
 'Anaphor:[1]',
 'Anaphor:[2]',
 'Anaphor:[3]',
 'Anaphor:[4]',
 'Anaphor:[50]',
 'Anaphor:[5]',
 'Anaphor:[5b]',
 'Anaphor:[6]',
 'Anaphor:[7]']

In [8]:
def blank_if_none(val):
    return "-" if (val is None or not val or str(val).lower() == "none") else val

def replace_if_blank(val, replace):
    if val == "" or val == "-":
        return replace
    return val

def process_sort_results(df_results):
    df_disp = df_results[["f1_score","precision","recall"]]
    return df_disp.sort_values("f1_score", ascending=False)

# Grid Search With Anaphora Prediction Filters

In [9]:
def grid_search(essays, expected_tags, format_ana_tags=True):

    rows_ana = []
    proc_essays = processed_essays_predict_most_recent_tag(essays=essays, format_ana_tags=format_ana_tags)

    metrics = get_metrics_raw(proc_essays, expected_tags=expected_tags,  micro_only=True)
    row = metrics["MICRO_F1"]
    rows_ana.append(row)

    df_results = pd.DataFrame(rows_ana)
    return df_results

In [10]:
def get_metrics(essays, expected_tags):

    proc_essays = processed_essays_predict_most_recent_tag(essays=essays)
                        
    metrics = get_metrics_raw(proc_essays, expected_tags=expected_tags,  micro_only=True)
    row = metrics["MICRO_F1"]
    df_results = pd.DataFrame([row])
    return df_results

### Training

In [11]:
df_train = grid_search(essays=training_essays, expected_tags=all_ana_tags)
process_sort_results(df_train)

Unnamed: 0,f1_score,precision,recall
0,0.262243,0.287197,0.241279


## Test

In [12]:
df_test = grid_search(essays=test_essays, expected_tags=all_ana_tags)
process_sort_results(df_test)

Unnamed: 0,f1_score,precision,recall
0,0.323529,0.37931,0.282051


# Compute Overall Accuracy

## Training

In [15]:
# df_train_all = grid_search(essays=training_essays, expected_tags=reg_tags, format_ana_tags=False)
# process_sort_results(df_train_all)

## Test

In [16]:
# df_test_all = grid_search(essays=test_essays, expected_tags=reg_tags, format_ana_tags=False)
# process_sort_results(df_test_all)