In [1]:
import dill
import pandas as pd

from Settings import Settings
from collections import defaultdict
from BrattEssay import ANAPHORA

from results_common import get_essays, validate_essays
from process_essays_coref import get_coref_processed_essays, processed_essays_predict_most_recent_tag
from metrics import get_metrics_raw
from results_common import tally_essay_attributes

# Configure
- Set values from the hyper parameter tuning results

In [2]:
DATASET = "SkinCancer" # CoralBleaching | SkinCancer

In [3]:
settings = Settings()
root_folder = settings.data_directory + DATASET + "/Thesis_Dataset/"
stanford_coref_predictions_folder = root_folder + "CoReference/"
berkeley_coref_predictions_folder = root_folder + "CoReference/Berkeley/"

Results Dir: /Users/simon.hughes/Google Drive/Phd/Results/
Data Dir:    /Users/simon.hughes/Google Drive/Phd/Data/
Root Dir:    /Users/simon.hughes/GitHub/NlpResearch/
Public Data: /Users/simon.hughes/GitHub/NlpResearch/Data/PublicDatasets/


In [4]:
# Which algorithm?
coref_predictions_folder = berkeley_coref_predictions_folder
print("CoRef Data: ", coref_predictions_folder)

CoRef Data:  /Users/simon.hughes/Google Drive/Phd/Data/SkinCancer/Thesis_Dataset/CoReference/Berkeley/


## Load Essays

In [5]:
training_essays = get_essays(coref_predictions_folder, "Training")

Found file /Users/simon.hughes/Google Drive/Phd/Data/SkinCancer/Thesis_Dataset/CoReference/Berkeley/training_processed.dill


In [6]:
test_essays = get_essays(coref_predictions_folder, "Test")

Found file /Users/simon.hughes/Google Drive/Phd/Data/SkinCancer/Thesis_Dataset/CoReference/Berkeley/test_processed.dill


In [7]:
all_essays = training_essays + test_essays

## Look at the Anaphor Tags

In [8]:
from results_procesor import is_a_regular_code

cc_tally = defaultdict(int)
cr_tally = defaultdict(int)
reg_tally = defaultdict(int)
for e in all_essays:
    for sent in e.sentences:
        for wd, tags in sent:
            for t in tags:
                if is_a_regular_code(t):
                    reg_tally[t] += 1
                if ANAPHORA in t and "other" not in t:
                    if "->" in t:
                        cr_tally[t] += 1
                    elif "Anaphor:[" in t and "rhetorical" not in t:
                        cc_tally[t] += 1

reg_tags = sorted(reg_tally.keys())
all_ana_tags = sorted(cc_tally.keys())
assert len(reg_tags) == len(all_ana_tags)
all_ana_tags

['Anaphor:[11]',
 'Anaphor:[12]',
 'Anaphor:[1]',
 'Anaphor:[2]',
 'Anaphor:[3]',
 'Anaphor:[4]',
 'Anaphor:[50]',
 'Anaphor:[5]',
 'Anaphor:[6]']

In [9]:
def blank_if_none(val):
    return "-" if (val is None or not val or str(val).lower() == "none") else val

In [10]:
def get_metrics(essays, format_ana_tags, expected_tags):
    
    proc_essays = processed_essays_predict_most_recent_tag(
        essays=essays, format_ana_tags=format_ana_tags)                
    
    metrics = get_metrics_raw(proc_essays, expected_tags=expected_tags,  micro_only=True)
    return metrics

In [15]:
def aggregate_metrics(metrics_dict, codes):
    aggregate = {'data_points': 0,
                 'num_codes': 0,             
                 'fn': 0.0,
                 'fp': 0.0,
                 'tn': 0.0,
                 'tp': 0.0}
    for tag in sorted(codes):    
        m = metrics_dict[tag]
        for k in list(aggregate.keys()):
            aggregate[k] += m[k]
    return aggregate

def combine_metrics(metricsa, metricsb):
    aggregate = dict(metricsa)    
    for k in list(aggregate.keys()):
        aggregate[k] += metricsb[k]
    return aggregate

In [16]:
def calc_metrics_from_counts(aggregate):
    tp, tn, fp, fn = aggregate["tp"], aggregate["tn"],aggregate["fp"],aggregate["fn"]
    prec = tp / (tp + fp)
    rec =  tp / (tp + fn)
    f1 = 2 * prec * rec / (prec + rec)

    return {"f1": f1, "prec": prec, "rec": rec}

## Get Data From Mongo

#### Get Ana Resolution Counts

In [22]:
df_vd_metrics_ana = get_metrics(essays=training_essays, 
        format_ana_tags=True, expected_tags=all_ana_tags)

vd_ana_counts = aggregate_metrics(df_vd_metrics_ana, all_ana_tags)
vd_ana_counts

{'data_points': 1309239,
 'num_codes': 473,
 'fn': 394.0,
 'fp': 119.0,
 'tn': 1308647.0,
 'tp': 79.0}

In [23]:
counts_to_metrics_df(vd_ana_counts)

Unnamed: 0,f1,prec,rec
0,0.235469,0.39899,0.167019


## Test Metrics

In [24]:
PREFIX = "CB" if DATASET == "CoralBleaching" else "SC"
test_collection = "TEST_" + PREFIX + "_TAGGING_VD_RNN_MOST_COMMON_TAG"

In [26]:
df_test_metrics_ana = get_metrics(essays=test_essays, 
        format_ana_tags=True, expected_tags=all_ana_tags)

test_ana_counts = aggregate_metrics(df_test_metrics_ana, all_ana_tags)
test_ana_counts

{'data_points': 318618,
 'num_codes': 107,
 'fn': 79.0,
 'fp': 55.0,
 'tn': 318456.0,
 'tp': 28.0}

In [27]:
counts_to_metrics_df(test_ana_counts)

Unnamed: 0,f1,prec,rec
0,0.294737,0.337349,0.261682


## Get Predictions

In [30]:
from results_procesor import ResultsProcessor
from metrics import get_wd_level_preds

proc_tr_essays = processed_essays_predict_most_recent_tag(essays=training_essays, format_ana_tags=True)   
tr_ysbycode  = ResultsProcessor.get_wd_level_lbs(proc_tr_essays,  expected_tags=all_ana_tags)
tr_predsbycode = get_wd_level_preds(proc_tr_essays, expected_tags=all_ana_tags)

ResultsProcessor.compute_mean_metrics(tr_ysbycode, tr_predsbycode)["MICRO_F1"]

{'recall': 0.16701902748414377,
 'precision': 0.398989898989899,
 'f1_score': 0.23546944858420268,
 'accuracy': 0.9996081693258451,
 'num_codes': 473.0,
 'data_points': 1309239.0}

In [31]:
proc_test_essays = processed_essays_predict_most_recent_tag(essays=test_essays, format_ana_tags=True) 
test_ysbycode  = ResultsProcessor.get_wd_level_lbs(proc_test_essays,  expected_tags=all_ana_tags)
test_predsbycode = get_wd_level_preds(proc_test_essays, expected_tags=all_ana_tags)

ResultsProcessor.compute_mean_metrics(test_ysbycode, test_predsbycode)["MICRO_F1"]

{'recall': 0.2616822429906542,
 'precision': 0.3373493975903614,
 'f1_score': 0.29473684210526313,
 'accuracy': 0.9995794336792021,
 'num_codes': 107.0,
 'data_points': 318618.0}

In [32]:
ResultsProcessor.persist_predictions("COREF_SC_NEAREST_TAG_TD", tr_predsbycode, tr_ysbycode)

In [33]:
ResultsProcessor.persist_predictions("COREF_SC_NEAREST_TAG_VD", test_predsbycode, test_ysbycode)