## NOTE - Use Python 3.6

In [69]:
import numpy as np
import pandas as pd
from collections import defaultdict
import pymongo
import dill
import os

from BrattEssay import load_bratt_essays
from load_data import load_process_essays
from window_based_tagger_config import get_config
from FindFiles import find_files
from DirUtils import dir_exists
from results_procesor import ResultsProcessor, __MICRO_F1__
from Settings import Settings

In [70]:
FOLDER = "/Users/simon.hughes/GitHub/NlpResearch/PythonNlpResearch/Predictions/"

In [71]:
def filter_by_str(s, files, exclude=False):   
    return  [f for f in files if (s in f) != exclude]

In [72]:
files = os.listdir(FOLDER)
print(len(files))
files = filter_by_str("_VD_", files)
files = filter_by_str("_VD_", files)
print(len(files))
files = filter_by_str("2019", files, exclude=True)
print(len(files))

102
50
18


In [73]:
sorted(files)

['TEST_CB_TAGGING_VD_AVG_PERCEPTRON_MOST_COMMON_TAG_PREDS_.dill',
 'TEST_CB_TAGGING_VD_AVG_PERCEPTRON_MOST_COMMON_TAG_YS_.dill',
 'TEST_CB_TAGGING_VD_CRF_MOST_COMMON_TAG_FIXED_PREDS_.dill',
 'TEST_CB_TAGGING_VD_CRF_MOST_COMMON_TAG_FIXED_YS_.dill',
 'TEST_CB_TAGGING_VD_HMM_MOST_COMMON_TAG_MULTICLASS_FIXED_PREDS_.dill',
 'TEST_CB_TAGGING_VD_HMM_MOST_COMMON_TAG_MULTICLASS_FIXED_YS_.dill',
 'TEST_CB_TAGGING_VD_RNN_MOST_COMMON_TAG_PREDS_.dill',
 'TEST_CB_TAGGING_VD_RNN_MOST_COMMON_TAG_YS_.dill',
 'TEST_CB_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS_PREDS_.dill',
 'TEST_CB_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS_YS_.dill',
 'TEST_SC_TAGGING_VD_AVG_PERCEPTRON_MOST_COMMON_TAG_PREDS_.dill',
 'TEST_SC_TAGGING_VD_AVG_PERCEPTRON_MOST_COMMON_TAG_YS_.dill',
 'TEST_SC_TAGGING_VD_CRF_MOST_COMMON_TAG_PREDS_.dill',
 'TEST_SC_TAGGING_VD_CRF_MOST_COMMON_TAG_YS_.dill',
 'TEST_SC_TAGGING_VD_HMM_MOST_COMMON_TAG_MULTICLASS_PREDS_.dill',
 'TEST_SC_TAGGING_VD_HMM_MOST_COMMON_TAG_MULT

In [74]:
sc_files = filter_by_str("_SC_", files)
cb_files = filter_by_str("_CB_", files)
len(cb_files), len(sc_files)

(10, 8)

In [75]:
filters = {
    "PERCEPTRON":"PERCEPTRON",
    "WINDOW_CLASSIFIER":"WINDOW_CLASSIFIER",
    "CRF": "_CRF_",
    "HMM" : "_HMM_",
    "RNN" : "_RNN_"
}

In [76]:
from pprint import pprint

def load_predictions(input_files):
    algo2preds = dict()
    pred_files = filter_by_str("_PREDS", input_files)
    for algo_name, fltr in filters.items():
        f_files = filter_by_str(fltr, pred_files)
        assert len(f_files) == 1
        fname = FOLDER + f_files[0]
        with open(fname, "rb+") as f:
            algo2preds[algo_name] = dill.load(f)
    return algo2preds

In [77]:
cb_preds = load_predictions(cb_files)
# sc_preds = load_predictions(sc_files)

In [78]:
with open(FOLDER + "TEST_CB_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS_YS_.dill", "rb+") as f:
    cb_ysbytag = dill.load(f)
    
with open(FOLDER + "TEST_SC_TAGGING_VD_WINDOW_CLASSIFIER_MOST_COMMON_TAG_MULTICLASS_YS_.dill", "rb+") as f:
    sc_ysbytag = dill.load(f)

In [79]:
# sorted(files)

In [80]:
from scipy import stats

In [81]:
def get_all_preds(predsbytag):
    all_p = []
    for k, vals in sorted(predsbytag.items(), key = lambda tpl: tpl[0]):
        all_p.extend(vals)
    return all_p

In [82]:
def compute_p_value_binomial_test(ysbytag, predsbytaga, predsbytagb, alternative='two-sided'):
    assert len(ysbytag.keys()) == len(predsbytaga.keys()), (len(ysbytag.keys()),len(predsbytaga.keys())) 
    assert len(ysbytag.keys()) == len(predsbytagb.keys()), (len(ysbytag.keys()),len(predsbytagb.keys())) 

    first = list(ysbytag.keys())[0]
    assert len(ysbytag[first]) == len(predsbytaga[first])
    assert len(ysbytag[first]) == len(predsbytagb[first])

    ys = get_all_preds(ysbytag)
    aas = get_all_preds(predsbytaga)
    bbs = get_all_preds(predsbytagb)

    assert len(ys) == len(aas) == len(bbs)

    successes = defaultdict(int)
    for y,a,b in zip(ys,aas,bbs):    
        if a != b:
            if a == y:
                successes["a"] += 1
            else:
                successes["b"] += 1

    p_value = stats.binom_test(successes["a"], sum(successes.values()), p=0.5, alternative=alternative)
    return p_value

## Get Predicted Tags from Labelled Essays

In [83]:
def get_label(tag, expected_tag_set):
    if tag in expected_tag_set:
        return 1
    else:
        return 0

def get_wd_level_lbs(essays, expected_tags):
    expected_tags = set(expected_tags)
    ysbycode = defaultdict(list)
    for e in essays:
        for sent in e.sentences:
            for wd, tag_set in sent:
                for etag in expected_tags:
                    ysbycode[etag].append(get_label(etag, tag_set))
    return ysbycode    

# for pred tags
def get_wd_level_preds(essays, expected_tags):
    expected_tags = set(expected_tags)
    ysbycode = defaultdict(list)
    for e in essays:
        for sentix in range(len(e.sentences)):
            p_ccodes = e.pred_tagged_sentences[sentix]            
            for wordix in range(len(p_ccodes)):
                ptag_set = set([p_ccodes[wordix]])
                assert len(ptag_set) >=1, "No tags found"
                for exp_tag in expected_tags:
                    ysbycode[exp_tag].append(get_label(exp_tag, ptag_set))    
    return ysbycode  

def compute_metrics_from_essays(tagged_esssays, expected_tags):
    #TODO - get predicted ccodes and anaphora labels, merge into one set of preds and filter by expected_tags.
    # this give flexibility to look at anaphora, cc or both
    act_ys_bycode  = get_wd_level_lbs(  tagged_esssays, expected_tags)
    pred_ys_bycode = get_wd_level_preds(tagged_esssays, expected_tags)

    assert len(act_ys_bycode.keys()) == len(pred_ys_bycode.keys()) == len(expected_tags), "Miss-matched codes"
    first_tag = list(expected_tags)[0]
    last_tag  = list(expected_tags)[-1]
    assert len(act_ys_bycode[first_tag]) == len(pred_ys_bycode[first_tag]), "Different numbers of words"
    assert len(act_ys_bycode[last_tag])  == len(pred_ys_bycode[last_tag]), "Different numbers of words"

    #print(len(act_ys_bycode), len(act_ys_bycode[first_tag]), len(pred_ys_bycode), len(pred_ys_bycode[first_tag]))
    
#     metrics = ResultsProcessor.compute_metrics(act_ys_bycode, pred_ys_bycode)
    mean_metrics = ResultsProcessor.compute_mean_metrics(act_ys_bycode, pred_ys_bycode)
    return mean_metrics

In [21]:
import sys
cm_folder = "/Users/simon.hughes/GitHub/NlpResearch/PythonNlpResearch/notebooks/Causal Model/"
src_path = os.path.join(cm_folder, "src")
sys.path.append(src_path)

In [56]:
settings = Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
# used as inputs to parsing model
rnn_predictions_folder = root_folder + "Predictions/Bi-LSTM-4-SEARN/"
test_fname = rnn_predictions_folder + "essays_test_bi_directional-True_hidden_size-256_merge_mode-sum_num_rnns-2_use_pretrained_embedding-True.dill"
with open(test_fname, "rb") as f:
    cb_essays = dill.load(f)

Results Dir: /Users/simon.hughes/Google Drive/Phd/Results/
Data Dir:    /Users/simon.hughes/Google Drive/Phd/Data/
Root Dir:    /Users/simon.hughes/GitHub/NlpResearch/
Public Data: /Users/simon.hughes/GitHub/NlpResearch/Data/PublicDatasets/


In [57]:
settings = Settings()
root_folder = settings.data_directory + "SkinCancer/Thesis_Dataset/"
# used as inputs to parsing model
rnn_predictions_folder = root_folder + "Predictions/Bi-LSTM-4-SEARN/"
test_fname = rnn_predictions_folder + "essays_test_bi_directional-True_hidden_size-256_merge_mode-sum_num_rnns-2_use_pretrained_embedding-True.dill"
with open(test_fname, "rb") as f:
    sc_essays = dill.load(f)

Results Dir: /Users/simon.hughes/Google Drive/Phd/Results/
Data Dir:    /Users/simon.hughes/Google Drive/Phd/Data/
Root Dir:    /Users/simon.hughes/GitHub/NlpResearch/
Public Data: /Users/simon.hughes/GitHub/NlpResearch/Data/PublicDatasets/


In [62]:
# cb_preds["RNN"] = get_wd_level_preds(cb_essays, cb_ysbytag.keys())
# sc_preds["RNN"] = get_wd_level_preds(sc_essays, sc_ysbytag.keys())

In [84]:
def print_comparison(ysbytag, algo2preds):
    algo2metrics = {}
    for algo, preds in algo2preds.items():
        mean_metrics = ResultsProcessor.compute_mean_metrics(ysbytag, preds)
        algo2metrics[algo] = mean_metrics[__MICRO_F1__]

    matrix = dict()
    for algo_name_a, predsbytaga in algo2preds.items():
        for algo_name_b, predsbytagb in algo2preds.items():
            if algo_name_a == algo_name_b:
                continue
            f1_a = algo2metrics[algo_name_a]["f1_score"]
            f1_b = algo2metrics[algo_name_b]["f1_score"]
            pval = compute_p_value_binomial_test(ysbytag, predsbytaga, predsbytagb)
            print(f"{algo_name_a.ljust(20)} {f1_a:.4f}\t{algo_name_b.ljust(20)} {f1_b:.4f}\t {pval}")
            matrix[(algo_name_a, algo_name_b)] = pval
        print()

In [85]:
print_comparison(cb_ysbytag, cb_preds)

PERCEPTRON           0.8399	WINDOW_CLASSIFIER    0.8413	 0.5990715121670374
PERCEPTRON           0.8399	CRF                  0.8380	 0.3953317058722892
PERCEPTRON           0.8399	HMM                  0.7471	 1.840272414942264e-204
PERCEPTRON           0.8399	RNN                  0.8422	 0.46371986491039574

WINDOW_CLASSIFIER    0.8413	PERCEPTRON           0.8399	 0.5990715121670374
WINDOW_CLASSIFIER    0.8413	CRF                  0.8380	 0.16554440323794242
WINDOW_CLASSIFIER    0.8413	HMM                  0.7471	 6.539263608822287e-216
WINDOW_CLASSIFIER    0.8413	RNN                  0.8422	 0.23461093020583862

CRF                  0.8380	PERCEPTRON           0.8399	 0.3953317058722892
CRF                  0.8380	WINDOW_CLASSIFIER    0.8413	 0.16554440323794242
CRF                  0.8380	HMM                  0.7471	 3.4081766479273455e-186
CRF                  0.8380	RNN                  0.8422	 0.9570199756080899

HMM                  0.7471	PERCEPTRON           0.8399	 1.840272414

In [65]:
print_comparison(sc_ysbytag, sc_preds)

PERCEPTRON           0.8064	WINDOW_CLASSIFIER    0.8144	 0.02561138530508483
PERCEPTRON           0.8064	CRF                  0.8043	 0.4171435908787128
PERCEPTRON           0.8064	HMM                  0.6754	 1.8570135796000486e-301
PERCEPTRON           0.8064	RNN                  0.7894	 1.508910809355667e-36

WINDOW_CLASSIFIER    0.8144	PERCEPTRON           0.8064	 0.02561138530508483
WINDOW_CLASSIFIER    0.8144	CRF                  0.8043	 0.003608884280891862
WINDOW_CLASSIFIER    0.8144	HMM                  0.6754	 0.0
WINDOW_CLASSIFIER    0.8144	RNN                  0.7894	 1.6224648439134976e-47

CRF                  0.8043	PERCEPTRON           0.8064	 0.4171435908787128
CRF                  0.8043	WINDOW_CLASSIFIER    0.8144	 0.003608884280891862
CRF                  0.8043	HMM                  0.6754	 3.4009044173226684e-281
CRF                  0.8043	RNN                  0.7894	 4.478991686048078e-32

HMM                  0.6754	PERCEPTRON           0.8064	 1.857013579600048