In [1]:
import datetime
import logging
from collections import defaultdict

import dill
import numpy as np
import pymongo
import pandas as pd
from sklearn.linear_model import LogisticRegression
from typing import Any

from CrossValidation import cross_validation
from Settings import Settings
from cost_functions import *
from crel_helper import get_cr_tags, get_tag_freq
from function_helpers import get_function_names, get_functions_by_name
from results_procesor import ResultsProcessor, __MICRO_F1__
from searn_parser import SearnModelTemplateFeatures
from template_feature_extractor import *
from window_based_tagger_config import get_config
from wordtagginghelper import merge_dictionaries

In [2]:
# Data Set Partition
CV_FOLDS = 5
MIN_FEAT_FREQ = 5

# Global settings
settings = Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
training_folder = root_folder + "Training" + "/"
test_folder = root_folder + "Test" + "/"

coref_root = root_folder + "CoReference/"
coref_output_folder = coref_root + "CRel/"

config = get_config(training_folder)

Results Dir: /Users/simon.hughes/Google Drive/Phd/Results/
Data Dir:    /Users/simon.hughes/Google Drive/Phd/Data/
Root Dir:    /Users/simon.hughes/GitHub/NlpResearch/
Public Data: /Users/simon.hughes/GitHub/NlpResearch/Data/PublicDatasets/


In [3]:
train_fname = coref_output_folder + "training_crel_anatagged_essays_most_recent_code.dill"
with open(train_fname, "rb") as f:
    pred_tagged_essays_train = dill.load(f)

test_fname = coref_output_folder + "test_crel_anatagged_essays_most_recent_code.dill"
with open(test_fname, "rb") as f:
    pred_tagged_essays_test = dill.load(f)

len(pred_tagged_essays_train),len(pred_tagged_essays_test)

(902, 226)

In [4]:
EMPTY = "Empty"
from BrattEssay import ANAPHORA

def to_is_valid_crel(tags):
    filtered = set()
    for t in tags:
        t_lower = t.lower()
        if "rhetorical" in t_lower or "change" in t_lower or "other" in t_lower:
            continue
        if "->" in t and ANAPHORA not in t:
            filtered.add(t)
    return filtered

def get_crel_tags_by_sent(essays_a):
    crels_by_sent = []
    for ea in essays_a:
        for asent in ea.sentences:
            all_atags = set()
            for awd, atags in asent:
                all_atags.update(to_is_valid_crel(atags))
            crels_by_sent.append(all_atags)
    return crels_by_sent

In [5]:
tag_freq = get_tag_freq(train_tagged_essays=pred_tagged_essays_train, tag_essays_test=pred_tagged_essays_test)

In [6]:
cr_tags = get_cr_tags(train_tagged_essays=pred_tagged_essays_train, tag_essays_test=pred_tagged_essays_test)
set_cr_tags = set(cr_tags)
cr_tags[0:10]

['Causer:5->Result:50',
 'Causer:7->Result:50',
 'Causer:3->Result:4',
 'Causer:11->Result:50',
 'Causer:1->Result:50',
 'Causer:13->Result:50',
 'Causer:6->Result:50',
 'Causer:3->Result:5',
 'Causer:4->Result:14',
 'Causer:3->Result:1']

In [7]:
total = 0
for cr in cr_tags:
    l,r = cr.replace("Causer:","").replace("Result:","").split("->")
    total += tag_freq[cr]
    if l == r:
        print(cr, tag_freq[cr])
total

Causer:50->Result:50 19
Causer:11->Result:11 2


43227

In [877]:
def evaluate_model(
        folds: List[Tuple[Any, Any]],
        max_epochs: int) -> float:

    serial_results = [
        model_train_predict(essays_TD, essays_VD, max_epochs)
        for essays_TD, essays_VD in folds
    ]

    cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag = defaultdict(list), defaultdict(list)
    cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag = defaultdict(list), defaultdict(list)

    # record the number of features in each fold
    number_of_feats = []

    # Parallel is almost 5X faster!!!
    cv_td_preds_by_sent = []
    cv_vd_preds_by_sent = []
    for (num_feats,
         sent_td_ys_bycode, sent_vd_ys_bycode,
         sent_td_pred_ys_bycode, sent_vd_pred_ys_bycode, td_preds_by_sent, vd_preds_by_sent) in serial_results:
        number_of_feats.append(num_feats)

        merge_dictionaries(sent_td_ys_bycode, cv_sent_td_ys_by_tag)
        merge_dictionaries(sent_vd_ys_bycode, cv_sent_vd_ys_by_tag)
        merge_dictionaries(sent_td_pred_ys_bycode, cv_sent_td_predictions_by_tag)
        merge_dictionaries(sent_vd_pred_ys_bycode, cv_sent_vd_predictions_by_tag)
        
        cv_td_preds_by_sent.append(td_preds_by_sent)
        cv_vd_preds_by_sent.append(vd_preds_by_sent)

    # print(processor.results_to_string(sent_td_objectid, CB_SENT_TD, sent_vd_objectid, CB_SENT_VD, "SENTENCE"))
    return cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_td_preds_by_sent, cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag, cv_vd_preds_by_sent

def add_cr_labels(observed_tags, ys_bytag_sent):
    global set_cr_tags
    for tag in set_cr_tags:
        if tag in observed_tags:
            ys_bytag_sent[tag].append(1)
        else:
            ys_bytag_sent[tag].append(0)

def get_label_data(tagged_essays):
    global set_cr_tags
    # outputs
    ys_bytag_sent = defaultdict(list)

    for essay in tagged_essays:
        for sentence in essay.sentences:
            unique_cr_tags = set()
            for word, tags in sentence:
                unique_cr_tags.update(set_cr_tags.intersection(tags))
            add_cr_labels(unique_cr_tags, ys_bytag_sent)
    return ys_bytag_sent

def get_label_data_essay_level(tagged_essays):
    global set_cr_tags
    # outputs
    ys_bytag_essay = defaultdict(list)

    for essay in tagged_essays:
        unique_cr_tags = set()
        for sentence in essay.sentences:
            for word, tags in sentence:
                unique_cr_tags.update(set_cr_tags.intersection(tags))
        add_cr_labels(unique_cr_tags, ys_bytag_essay)
    return ys_bytag_essay

def essay_to_crels(tagged_essays):
    global set_cr_tags
    # outputs
    name2crels = defaultdict(set)
    for essay in tagged_essays:
        unique_cr_tags = set()
        for sentence in essay.sentences:
            for word, tags in sentence:
                unique_cr_tags.update(set_cr_tags.intersection(tags))
        name2crels[essay.name] = unique_cr_tags
    return dict(name2crels)

In [9]:
from featurevectorizer import FeatureVectorizer

def metrics_to_df(metrics):
    import Rpfa

    rows = []
    for k,val in metrics.items():
        if type(val) == Rpfa.rpfa:
            d = dict(val.__dict__) # convert obj to dict
        elif type(val) == dict:
            d = dict(val)
        else:
            d = dict()
        d["code"] = k
        rows.append(d)
    return pd.DataFrame(rows)

def get_micro_metrics(df):
    return df[df.code == "MICRO_F1"][["accuracy", "f1_score", "recall", "precision"]]

In [88]:
from collections import defaultdict

from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression

from Data.featurevectorizer import FeatureVectorizer

class DependencyFeatureInputs(object):
    def __init__(self, essay_name, lsent_ix, rsent_ix, causer_tag, result_tag, causer_words, between_words,
                 result_words,
                 causer_first, between_codes, num_sentences_between):
        self.essay_name = essay_name
        self.lsent_ix = lsent_ix
        self.rsent_ix = rsent_ix
        self.num_sentences_between = num_sentences_between
        self.between_codes = between_codes
        self.causer_first = causer_first
        self.result_words = result_words
        self.between_words = between_words
        self.causer_words = causer_words
        self.result_tag = result_tag
        self.causer_tag = causer_tag
        self.crel = "Causer:{a}->Result:{b}".format(a=causer_tag, b=result_tag)

class DependencyClassifier(object):
    def __init__(self, classifier_fn=LogisticRegression, negative_label=0, sentence_span=2,
                 min_feat_freq=10, log_fn=lambda s: print(s), ):
        self.log = log_fn
        self.epoch = 0
        self.negative_label = negative_label
        self.sentence_span = sentence_span
        self.min_feat_freq = min_feat_freq
        self.vectorizer = FeatureVectorizer(min_feature_freq=min_feat_freq)
        self.clf = classifier_fn()
        self.fit_vectorizer = False
            
    def __fill_in_gaps__(self, tag_seq):
        new_tag_seq = []
        for i, tag in enumerate(tag_seq):
            if tag == EMPTY \
                    and i > 0 \
                    and tag_seq[i - 1] != EMPTY \
                    and i < len(tag_seq) - 1 \
                    and tag_seq[i - 1] == tag_seq[i + 1]:
                tag = tag_seq[i - 1]

            new_tag_seq.append(tag)
        return new_tag_seq

    def __compute_tag_2_spans__(self, essay):
        sent_tag2spans = []
        wd_ix = -1
        essay_words = []
        essay_ptags = []
        for sent_ix in range(len(essay.sentences)):
            words, tag_seq = zip(*essay.sentences[sent_ix])

            tag2spans = []  # maps to list of start and end spans for each tag
            sent_tag2spans.append(tag2spans)

            last_tag = EMPTY
            tag_start = None
            ptags_sent = self.__fill_in_gaps__(essay.pred_tagged_sentences[sent_ix])
            current_crel_tags = set()
            for i, ptag in enumerate(ptags_sent):
                wd_ix += 1
                essay_words.append(words[i])
                essay_ptags.append(ptag)
                # Tag changed
                if ptag != last_tag:
                    if last_tag != EMPTY:
                        tag2spans.append((last_tag, tag_start, wd_ix - 1, sent_ix, current_crel_tags))
                    tag_start = wd_ix
                    current_crel_tags = set()
                current_crel_tags.update(to_is_valid_crel(tag_seq[i]))
                last_tag = ptag
            if last_tag != EMPTY:
                tag2spans.append((last_tag, tag_start, wd_ix, len(essay.sentences) - 1, current_crel_tags))
        assert len(essay_words) == len(essay_ptags)
        return sent_tag2spans, essay_words, essay_ptags

    def __combine_feats__(self, ftsa, ftsb):
        fts = {}
        for a, aval in ftsa.items():
            for b, bval in ftsb.items():
                fts[a + "|" + b] = aval * bval
        return fts

    def create_features(self, feat_inp):
        feats = {}
        feats[feat_inp.crel] = 1
        feats["Causer:{tag}".format(tag=feat_inp.causer_tag)] = 1
        feats["Result:{tag}".format(tag=feat_inp.result_tag)] = 1
        cs_fts, res_fts = {}, {}
        for wd in feat_inp.causer_words:
            cs_fts["Causer:{wd}".format(wd=wd)] = 1
        feats.update(cs_fts)
        for wd in feat_inp.result_words:
            res_fts["Result:{wd}".format(wd=wd)] = 1
        feats.update(res_fts)
        feats.update(self.__combine_feats__(cs_fts, res_fts))
        btwn_fts = {}
        for wd in feat_inp.between_words:
            btwn_fts["Between:{wd}".format(wd=wd)] = 1
        feats.update(btwn_fts)
        #         feats.update(self.__combine_feats__(cs_fts, btwn_fts))
        #         feats.update(self.__combine_feats__(res_fts, btwn_fts))
        if feat_inp.causer_first:
            feats["Left2Right"] = 1
        else:
            feats["Right2Left"] = 1

        if feat_inp.num_sentences_between == 0:
            feats["SameSentence"] = 1
        feats["SentBetween"] = feat_inp.num_sentences_between
        if feat_inp.num_sentences_between <= 1:
            feats["SentBetween<=1"] = 1
        if feat_inp.num_sentences_between <= 2:
            feats["SentBetween<=2"] = 1
        else:
            feats["SentBetween>2"] = 1

        num_codes_between = len(feat_inp.between_codes)
        feats["CodesBetween"] = num_codes_between
        if num_codes_between <= 1:
            feats["CodesBetween<=1"] = 1
        if num_codes_between <= 2:
            feats["CodesBetween<=2"] = 1
        else:
            feats["CodesBetween>2"] = 1
        return feats

    def __generate_training_data__(self, essays):
        xs, ys, essay_sent_feat_inpts = [], [], []
        for essay_ix, essay in enumerate(essays):
            sent_tag2spans, essay_words, essay_ptags = self.__compute_tag_2_spans__(essay)
            for sent_ix in range(len(sent_tag2spans)):
                # tag 2 spans for sentence
                next_tag2spans = []
                # grab next few sentences' predicted tags
                for offset in range(0, self.sentence_span + 1):
                    if (sent_ix + offset) < len(sent_tag2spans):
                        next_tag2spans.extend(sent_tag2spans[sent_ix + offset])

                for ltag_ix, (ltag, lstart_ix, lend_ix, lsent_ix, lcrels) in enumerate(sent_tag2spans[sent_ix]):
                    for rtag, rstart_ix, rend_ix, rsent_ix, rcrels in next_tag2spans[ltag_ix + 1:]:
                        num_sent_between = rsent_ix - lsent_ix

                        ltag_words = essay_words[lstart_ix:lend_ix + 1]
                        between_words = essay_words[lend_ix + 1:rstart_ix]
                        rtag_words = essay_words[rstart_ix:rend_ix + 1]
                        between_codes = essay_ptags[lend_ix + 1:rstart_ix]

                        lbls = set(lcrels).union(rcrels)

                        feat_ext_inp = DependencyFeatureInputs(essay_name=essay.name, lsent_ix=lsent_ix,
                                                               rsent_ix=rsent_ix,
                                                               causer_tag=ltag, result_tag=rtag,
                                                               causer_words=ltag_words, between_words=between_words,
                                                               result_words=rtag_words, causer_first=True,
                                                               between_codes=between_codes,
                                                               num_sentences_between=num_sent_between)
                        x = self.create_features(feat_ext_inp)
                        xs.append(x)
                        ys.append(1 if feat_ext_inp.crel in lbls else self.negative_label)
                        essay_sent_feat_inpts.append(feat_ext_inp)

                        feat_ext_inp = DependencyFeatureInputs(essay_name=essay.name, lsent_ix=lsent_ix,
                                                               rsent_ix=rsent_ix,
                                                               causer_tag=rtag, result_tag=ltag,
                                                               causer_words=rtag_words, between_words=between_words,
                                                               result_words=ltag_words, causer_first=False,
                                                               between_codes=between_codes,
                                                               num_sentences_between=num_sent_between)
                        x = self.create_features(feat_ext_inp)
                        xs.append(x)
                        ys.append(1 if feat_ext_inp.crel in lbls else self.negative_label)
                        essay_sent_feat_inpts.append(feat_ext_inp)
        
        if not self.fit_vectorizer:
            xs_array = self.vectorizer.fit_transform(xs)
            self.fit_vectorizer = True
        else:            
            xs_array = self.vectorizer.transform(xs)
        return xs_array, ys, essay_sent_feat_inpts

    def train(self, train_essays):
        # Note that there are a small number of crels that span 2 sentences
        xs, ys, essay_sent_crel = self.__generate_training_data__(essays=train_essays)
        self.clf.fit(X=xs, y=ys)

    def __group_predictions_by_essay__(self, essay_sent_feat_inpts, preds, threshold):
        name2pred = defaultdict(set)
        for feat_inputs, pred in zip(essay_sent_feat_inpts, preds):
            if pred >= threshold:
                name2pred[feat_inputs.essay_name].add(feat_inputs.crel)
        return name2pred

    def __group_predictions_by_sentence__(self, essay_sent_feat_inpts, preds, threshold):
        namesent2pred = defaultdict(set)
        for feat_inputs, pred in zip(essay_sent_feat_inpts, preds):
            if pred >= threshold:
                namesent2pred[(feat_inputs.essay_name, feat_inputs.lsent_ix)].add(feat_inputs.crel)
                namesent2pred[(feat_inputs.essay_name, feat_inputs.rsent_ix)].add(feat_inputs.crel)
        return namesent2pred

    def predict_probability(self, tagged_essays, min_prob=0.1):
        # Get predicted probabilities
        xs, _, essay_sent_feat_inpts = self.__generate_training_data__(essays=tagged_essays)
        probs = self.clf.predict_proba(xs)[:, 1]
        name2pred = defaultdict(list)
        for feat_inputs, prob in zip(essay_sent_feat_inpts, probs):
            if prob >= min_prob:
                name2pred[feat_inputs.essay_name].append((feat_inputs, prob))
        return name2pred

    def evaluate(self, tagged_essays, print_classification_report=True):
        # Note that there are a small number of crels that span 2 sentences
        xs, ys, essay_sent_feat_inpts = self.__generate_training_data__(essays=tagged_essays)
        preds = self.clf.predict(xs)
        if print_classification_report:
            print(classification_report(y_true=ys, y_pred=preds))

        namesent2pred = self.__group_predictions_by_sentence__(
            essay_sent_feat_inpts=essay_sent_feat_inpts, preds=preds, threshold=1.0)

        pred_ys_bytag_sent = defaultdict(list)
        for essay in tagged_essays:
            for sent_ix, sentence in enumerate(essay.sentences):
                unique_cr_tags = namesent2pred[(essay.name, sent_ix)]
                add_cr_labels(unique_cr_tags, pred_ys_bytag_sent)
        return pred_ys_bytag_sent

    def evaluate_essay_level(self, tagged_essays, print_classification_report=True):
        # Note that there are a small number of crels that span 2 sentences
        xs, ys, essay_sent_feat_inpts = self.__generate_training_data__(essays=tagged_essays)
        preds = self.clf.predict(xs)
        if print_classification_report:
            print(classification_report(y_true=ys, y_pred=preds))

        namesent2pred = self.__group_predictions_by_essay__(
            essay_sent_feat_inpts=essay_sent_feat_inpts, preds=preds, threshold=1.0)

        pred_ys_bytag_essay = defaultdict(list)
        for essay in tagged_essays:
            unique_cr_tags = namesent2pred[essay.name]
            add_cr_labels(unique_cr_tags, pred_ys_bytag_essay)
        return pred_ys_bytag_essay


In [81]:
def compute_sentence_accuracy(parser, essays):
    ys_bytag_sent = get_label_data(tagged_essays=essays)
    pred_ys_bytag_sent = parser.evaluate(tagged_essays=essays, print_classification_report=False)
    mean_metrics = ResultsProcessor.compute_mean_metrics(ys_bytag_sent, pred_ys_bytag_sent)
    return get_micro_metrics(metrics_to_df(mean_metrics))

def compute_essay_accuracy(parser, essays):
    ys_bytag_essay = get_label_data_essay_level(tagged_essays=essays)
    pred_ys_bytag_essay = parser.evaluate_essay_level(tagged_essays=essays, print_classification_report=False)
    mean_metrics = ResultsProcessor.compute_mean_metrics(ys_bytag_essay, pred_ys_bytag_essay)
    return get_micro_metrics(metrics_to_df(mean_metrics))


## Sentence Level Accuracy is Equivalent to Parser Model (Or Very Close) When We Don't Look Across Sentences

In [89]:
parser = DependencyClassifier(sentence_span=0)
parser.train(pred_tagged_essays_train)
pred_ys_bytag_sent = parser.evaluate(pred_tagged_essays_train)
pred_ys_bytag_sent_test = parser.evaluate(pred_tagged_essays_test)

compute_sentence_accuracy(parser, pred_tagged_essays_test)

             precision    recall  f1-score   support

          0       0.97      0.96      0.97      9036
          1       0.89      0.91      0.90      2916

avg / total       0.95      0.95      0.95     11952

             precision    recall  f1-score   support

          0       0.93      0.91      0.92      1868
          1       0.73      0.78      0.75       586

avg / total       0.88      0.88      0.88      2454



Unnamed: 0,accuracy,f1_score,recall,precision
95,0.997892,0.691275,0.651899,0.735714


In [90]:
compute_essay_accuracy(parser, pred_tagged_essays_test)

Unnamed: 0,accuracy,f1_score,recall,precision
95,0.985948,0.723973,0.69287,0.758


In [1022]:
parser2 = DependencyClassifier(sentence_span=2)
parser2.train(pred_tagged_essays_train)
pred_ys_bytag_sent2 = parser2.evaluate(pred_tagged_essays_train)
pred_ys_bytag_sent_test2 = parser2.evaluate(pred_tagged_essays_test)

compute_sentence_accuracy(parser2, pred_tagged_essays_test)

             precision    recall  f1-score   support

          0       0.97      0.98      0.97     44627
          1       0.85      0.80      0.82      7487

avg / total       0.95      0.95      0.95     52114

             precision    recall  f1-score   support

          0       0.94      0.94      0.94      9280
          1       0.61      0.60      0.60      1484

avg / total       0.89      0.89      0.89     10764



Unnamed: 0,accuracy,f1_score,recall,precision
95,0.993337,0.435711,0.710443,0.314206


In [1023]:
compute_essay_accuracy(parser2, pred_tagged_essays_test)

Unnamed: 0,accuracy,f1_score,recall,precision
95,0.980891,0.6661,0.716636,0.622222


## Prepare Training Data (Predictions from First Model)

In [1025]:
essay2crels_train = essay_to_crels(pred_tagged_essays_train)
essay2crels_test  = essay_to_crels(pred_tagged_essays_test)

In [1063]:
probs_train = parser.predict_probability(pred_tagged_essays_train)
probs_test  = parser.predict_probability(pred_tagged_essays_test)

In [1064]:
probs_train2 = parser2.predict_probability(pred_tagged_essays_train)
probs_test2  = parser2.predict_probability(pred_tagged_essays_test)

# Train Structured Perceptron

In [1353]:
from collections import defaultdict
import pickle

from collections import defaultdict
import pickle

class StructuredPerceptron(object):
    '''A structured perceptron, as implemented by Matthew Honnibal.
    See more implementation details here:
        http://honnibal.wordpress.com/2013/09/11/a-good-part-of-speechpos-tagger-in-about-200-lines-of-python/
    '''

    def __init__(self, learning_rate=0.3, max_update_items=1):
        # Each feature gets its own weight
        # needs to be non zero otherwise first
        self.weights = defaultdict(lambda : 1.0)
        self.learning_rate = learning_rate
        # The accumulated values, for the averaging. These will be keyed by
        # feature/clas tuples
        self._totals = defaultdict(int)
        # The last time the feature was changed, for the averaging. Also
        # keyed by feature/clas tuples
        # (tstamps is short for timestamps)
        self._tstamps = defaultdict(int)
        # Number of instances seen
        self.i = 0
        # how many items do we use to update the weights?
        self.max_update_items = max_update_items

    def clone(self):
        p = StructuredPerceptron(self.learning_rate)
        p.weights.update(self.weights)
        p._totals.update(self._totals)
        p._tstamps.update(self._tstamps)
        p.i = self.i
        return p

    def rank(self, features_array, existence_check=True):
        '''Dot-product the features and current weights and return the best label.'''
        scores2index = {}
        for i, feats in enumerate(features_array):
            scores2index[i] = self.decision_function(feats, existence_check)
        # return a ranking of the scores, by best to worse
        return [ix for ix, score in sorted(scores2index.items(), key=lambda tpl: -tpl[-1])]

    def decision_function(self, features, existence_check=True):
        '''Dot-product the features and current weights and return the score.'''
        score = 0.0
        for feat, value in features.items():
            if value == 0:
                continue
            if existence_check and feat not in self.weights:
                continue
            score += self.weights[feat] * value
        return score

    def train(self, best_feats, other_feats_array):
        feats_array = [best_feats] + list(other_feats_array)
        ixs = self.rank(feats_array, existence_check=False)

        # go thru up to |max_update_items| items ranked above the best, and update the weights
        best_ix = ixs[0]
        if best_ix != 0:
            for rank, ix in enumerate(ixs):
                # don't update items ranked below the best parse
                if ix == 0 or rank >= self.max_update_items:
                    break

                self.update(best_feats=best_feats, highest_ranked_feats=feats_array[ix])

    def __upd_feat__(self, feat, val):
        w = self.weights[feat]
        # update the totals by the number of timestamps the current value has survived * val
        self._totals[feat] += (self.i - self._tstamps[feat]) * w
        # store latest update timestamp
        self._tstamps[feat] = self.i
        # finally, update the current weight
        self.weights[feat] = w + (self.learning_rate * val)

    def update(self, best_feats, highest_ranked_feats):
        '''Update the feature weights.'''

        self.i += 1
        for feat, weight in self.weights.items():
            val = best_feats[feat] - highest_ranked_feats[feat]
            self.__upd_feat__(feat, val)
        return None

    def average_weights(self):
        '''Average weights from all iterations.'''
        new_feat_weights = defaultdict(float)
        for feat, weight in self.weights.items():
            total = self._totals[feat]
            total += (self.i - self._tstamps[feat]) * weight
            averaged = round(total / float(self.i), 5)
            if averaged != 0.0:
                new_feat_weights[feat] = averaged
        self.weights = new_feat_weights
        return None

    def save(self, path):
        '''Save the pickled model weights.'''
        return pickle.dump(dict(self.weights), open(path, 'w'))

    def load(self, path):
        '''Load the pickled model weights.'''
        self.weights = pickle.load(open(path))
        return None

class CostSensitiveStructuredPerceptron(StructuredPerceptron):
    def __init__(self, *args, **kwargs):
        super(CostSensitiveStructuredPerceptron, self).__init__(*args, **kwargs)

    def train(self, best_feats, other_feats_array, other_costs_array):
        
        feats_array = [best_feats] + list(other_feats_array)
        costs_array = [0] + other_costs_array
        ixs = self.rank(feats_array, existence_check=False)

        # go thru up to |max_update_items| items ranked above the best, and update the weights
        best_ix = ixs[0]
        if best_ix != 0:
            for rank, ix in enumerate(ixs):
                # don't update items ranked below the best parse
                if ix == 0 or rank >= self.max_update_items:
                    break

                self.update(best_feats=best_feats, 
                            highest_ranked_feats=feats_array[ix], highest_ranked_cost=costs_array[ix])

    def update(self, best_feats, highest_ranked_feats, highest_ranked_cost):
        '''Update the feature weights.'''

        self.i += 1
        for feat, weight in self.weights.items():
            val = (best_feats[feat] - highest_ranked_feats[feat]) * highest_ranked_cost
            self.__upd_feat__(feat, val)
        return None

In [1030]:
from pprint import pprint

# look at the number of predicted items
lens = []
for ename, lst in probs_train.items():
    crels = set()
    for fts, p in lst:
        crels.add(fts.crel)
    lens.append(len(crels))

np.mean(lens), np.median(lens), np.max(lens), np.percentile(lens, 75)

(4.204109589041096, 4.0, 16, 6.0)

In [1031]:
from itertools import combinations

def get_all_combos(items):
    # enforces a consistent ordering for the resulting tuples
    items = sorted(items) 
    cbos = [()] # seed with the empty combo
    for i in range(1, len(items)+1):
        cbos.extend(combinations(items,i))
    return cbos

cbos = get_all_combos([3,2,1])
print(len(cbos)) # 2**len(items)-1
if len(cbos) < 1000:
    for cbo in sorted(cbos, key = lambda l: (len(l), l)):
        print(cbo)

8
()
(1,)
(2,)
(3,)
(1, 2)
(1, 3)
(2, 3)
(1, 2, 3)


In [1111]:
def sample_top_parses(crel2maxprobs, top_n):

    max_parses = 2**len(crel2maxprobs) # maximum parse combinations
    assert max_parses > top_n, (max_parses, top_n) # otherwise brute force it

    top_parses = set([()]) # always seed with the empty parse
    probs = []
    while len(top_parses) < top_n:
        new_parse = []
        for crel, prob in crel2maxprobs.items():
            rand_val = np.random.random() # random number >= 0 and < 1
            if rand_val < prob:
                new_parse.append(crel)
        # make hashable and enforce consistent order
        top_parses.add(tuple(sorted(new_parse)))
    
    return list(top_parses)

def get_top_parses(crel2maxprobs, threshold=0.5):

    top_parse = [crel for crel, prob in crel2maxprobs.items() if prob >= threshold]
    if top_parse:
        return [tuple(sorted(top_parse))]
    else:
        return [()]

crel_probs = {
    "1->2":   0.8,
    "2->3":   0.01,
    "5->8":   0.25,
    "10->12": 0.75,
    "12->50": 0.99,
}

# important - should see a lot more of the more probable codes
sample_top_parses(crel_probs, 8)

[('12->50',),
 ('1->2', '10->12', '12->50', '5->8'),
 ('10->12', '12->50'),
 ('10->12', '12->50', '5->8'),
 ('1->2', '12->50', '5->8'),
 (),
 ('1->2', '12->50'),
 ('1->2', '10->12', '12->50')]

In [1112]:
from NgramGenerator import compute_ngrams

def to_short_tag(tag):
    return tag.replace("Causer:","").replace("Result:", "")

def build_chains_inner(tree, l, visited, depth=0):
    chains = []
    if l not in tree:
        return chains
    for r in tree[l]:
        if r in visited:
            continue
        visited.add(r) # needed to prevent cycles, which cause infinite recursion
        extensions = build_chains_inner(tree, r, visited, depth+1)
        visited.remove(r)
        for ch in extensions:
            chains.append([r] + ch)
        if not extensions:
            chains.append([r])
    return chains

def build_chains(tree):    
    lhs_items = set(tree.keys())
    rhs_items = set()
    for l,rhs in tree.items():        
        rhs_items.update(rhs)
    
    chains = []
    # starting positions of each chain are those appearing on the lhs but not the rhs
    start_codes = lhs_items - rhs_items    
    for l in start_codes:
        rhs = tree[l]
        for r in rhs:
            for ch in build_chains_inner(tree, r, {l,r}, 0):
                chains.append([l,r] + ch)
    return chains

def extend_chains(chains):
    ext_chains = set()
    for tokens in chains:
        ext_chains.add(",".join(tokens))
        ngrams = compute_ngrams(tokens,max_len=None, min_len=3)
        for t in ngrams:
            ext_chains.add(",".join(t))
    return ext_chains

def extract_features_from_parse(parse, crel2probs):
    
    feats = defaultdict(float)
    tree = defaultdict(set) # maps causers to effects for building chains
    max_probs = []    
    code_tally = defaultdict(float)
    
    pairs = set()
    inverted_count = 0
    for crel in parse:
        probs = crel2probs[crel]
        max_p = max(probs)
        max_probs.append(max_p)
        feats["{crel}-MAX(prob)".format(crel=crel)] = max_p
        feats["{crel}-MIN(prob)".format(crel=crel)] = min(probs)
        feats["{crel}-pred-count".format(crel=crel)] = len(probs)
        feats["{crel}-pred-count={count}".format(crel=crel, count=len(probs))] = 1
        
        # with type
        l,r = crel.split("->")
        code_tally[l] +=1
        code_tally[r] +=1
        
        # without type
        l_short, r_short = to_short_tag(l), to_short_tag(r)
        code_tally[l_short] +=1
        code_tally[r_short] +=1
        # ordering of the codes, ignoring the causal direction
        feats[l_short + ":" + r_short] = 1
        
        # build tree structure so we can retrieve the chains
        tree[l_short].add(r_short)
        
        # track whether the rule exists in the opposite direction
        pairs.add((l_short,r_short))
        if (r_short,l_short) in pairs:
            inverted_count += 1
            
    if inverted_count:
        feats["inverted"] = 1
        feats["num_inverted"] = inverted_count
    else:
        feats["not_inverted"] = 1
    
    # counts
    feats.update(code_tally)
    num_crels = len(parse)
    feats["num_crels"] = num_crels
    feats["num_crels="+str(len(parse))] = 1 # includes a tag for the empty parse
    for i in range(1,11):
        if num_crels <= i:
            feats["num_crels<={i}".format(i=i)] = 1
        else:
            feats["num_crels>{i}".format(i=i)] = 1
        
    # combination of crels
    # need to sort so that order of a and b is consistent across parses
    pairs = combinations(sorted(parse), r=2)
    for a, b in pairs:
        feats["{a}|{b}".format(a=a, b=b)] = 1
        
    #chains
    causer_chains = extend_chains(build_chains(tree))
    for ch in causer_chains:
        feats["CChain:" + ch] = 1
    
    if max_probs: # might be an empty parse
        for cutoff in [0.2, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95]:
            above =  len([p for p in max_probs if p >=cutoff])
            feats["Above-{cutoff}".format(cutoff=cutoff)] = above
            feats["%-Above-{cutoff}".format(cutoff=cutoff)] = above/len(max_probs)
            if above == len(max_probs):
                feats["All-Above-{cutoff}".format(cutoff=cutoff)] = 1
        
        feats["avg-prob"] = np.mean(max_probs)
        feats["med-prob"] = np.median(max_probs)
        feats["prod-prob"]= np.product(max_probs)
        feats["min-prob"] = np.min(max_probs)
        feats["max-prob"] = np.max(max_probs)
        for p in [5, 10, 25, 75, 90, 95]:
            feats["{p}%-prob".format(p=p)] = np.percentile(max_probs, p)
        # geometric mean
        feats["geo-mean"] = np.prod(max_probs)**(1/len(max_probs))
    return feats

def additional_features(parse, feats_input):
    #TODO - ratio of number of concept codes to number of relations
    #TODO - average, min and max word distance between codes in a relation
    #TODO - average, min and sentence distance between codes in a relation
    pass

In [1210]:
def evaluate_ranker(model, xs, essay2crels, ys_bytag):
    clone = model.clone()
    if hasattr(model, "average_weights"):
        clone.average_weights()
    rank_acc = []
    pred_ys_bytag = defaultdict(list)
    ename2inps = dict()
    for parser_input in xs:
        ename2inps[parser_input.essay_name] = parser_input
    
    for ename, act_crels in essay2crels.items():        
        if ename not in ename2inps:
            # no predicted crels for this essay
            highest_ranked = set()
        else:
            parser_input = ename2inps[ename]
            ixs = clone.rank(parser_input.all_feats_array)
            highest_ranked = parser_input.all_parses[ixs[0]] # type: Tuple[str]        
            rank_acc.append(1 if highest_ranked == parser_input.opt_parse else 0)
            
        add_cr_labels(set(highest_ranked), pred_ys_bytag)

    mean_metrics = ResultsProcessor.compute_mean_metrics(ys_bytag, pred_ys_bytag)
    df = get_micro_metrics(metrics_to_df(mean_metrics))
    df["rank_acc"] = np.mean(rank_acc)
    return df

# straw man model - use predicted parse instead
def eval_use_best_parse(xs, essay2crels, prob_threshold=0.5):
    preds = []
    for parser_inp in xs:
        act_crels = essay2crels[parser_inp.essay_name]
        crel2maxprob = defaultdict(float)
        for crel, probs in parser_inp.crel2probs.items():
            crel2maxprob[crel] = max(probs)

        pred_crels = [crel for crel, p in crel2maxprob.items() if p >= prob_threshold]
        parse = tuple(sorted(pred_crels))
        opt_parse = tuple(sorted(act_crels.intersection(crel2maxprob.keys())))
        preds.append(1 if opt_parse == parse else 0)
    return np.mean(preds)

## Extract Features

In [1366]:
def compute_costs(parser_input):
    opt_parse = parser_input.opt_parse
    other_parses = parser_input.other_parses

    other_costs = []
    op = set(opt_parse)
    for p in other_parses:
        p = set(p)
        fp = p - op
        fn = op - p
        cost = len(fp) + len(fn)
        other_costs.append(cost)
    return other_costs

class ParserInputs(object):
    def __init__(self, essay_name, opt_parse, all_parses, crel2probs, dep_features):
        self.essay_name = essay_name
        self.opt_parse = opt_parse
        self.opt_features = extract_features_from_parse(opt_parse, crel2probs)
        self.crel2probs = crel2probs
        
        other_parses = []
        other_feats_array = []
        all_feats_array = []
        for p in all_parses:
            feats = extract_features_from_parse(p, crel2probs)
            all_feats_array.append(feats)
            if p != opt_parse:
                other_parses.append(p)
                other_feats_array.append(feats)
        
        self.all_parses = all_parses
        self.all_feats_array = all_feats_array
        self.other_parses = other_parses
        self.other_features_array = other_feats_array
        self.dep_features = dep_features
        self.other_costs_array = compute_costs(self)

def to_freq_feats(feats, freq_feats):
    new_feats = defaultdict(float)
    for f, v in feats.items():
        if f in freq_feats:
            new_feats[f] = v
    return new_feats

def filter_by_min_freq(xs, feat_freq, min_freq):
    if min_freq <= 1:
        return xs
    freq_feats = set((f for f, cnt in feat_freq.items() if cnt >= min_freq))
    for parser_input in xs:
        parser_input.opt_features = to_freq_feats(parser_input.opt_features, freq_feats)
        parser_input.other_features_array = [to_freq_feats(x, freq_feats)
                                             for x in parser_input.other_features_array]
    return xs

def get_features_from_probabilities(probs, essay2lbls, top_n, min_feat_freq=1):
    xs = []
    feat_freq = defaultdict(int)
    
    for ename, lst in probs.items():
        
        act_crels = essay2lbls[ename]
        crel2probs = defaultdict(list)
        crel2maxprob = defaultdict(float)
        dep_features = defaultdict(list)
        for fts, prob in lst:
            crel2probs[fts.crel].append(prob)
            crel2maxprob[fts.crel] = max(crel2maxprob[fts.crel], prob)
            dep_features[fts.crel].append((prob, fts))
        
        crel2probs = dict(crel2probs)
        crel2maxprob = dict(crel2maxprob)

        num_crels = len(crel2probs)
        max_parses = 2 ** num_crels
        if max_parses > 2 * top_n:
            #parses = sample_top_parses(crel2maxprob, top_n)
            #parses.extend(get_top_parses(crel2maxprob))  # just get the predicted parses (probability >= 0.5)
            parses = get_top_parses(crel2maxprob)  # just get the predicted parses (probability >= 0.5)
        else:
            # brute force it
            parses = get_all_combos(crel2probs.keys())

        # constrain optimal parse to only those crels that are predicted
        opt_parse = tuple(sorted(act_crels.intersection(crel2probs.keys())))
        xs.append(ParserInputs(essay_name=ename, opt_parse=opt_parse, all_parses=parses, crel2probs=crel2probs, dep_features=dep_features))

        # Get unique features for essay
        all_feats = set(opt_feats.keys())
        for fts in feats_array:
            all_feats.update(fts.keys())

        for ft in all_feats:
            feat_freq[ft] += 1

    assert len(xs) == len(probs), "Parses for all essays should be generated"
    return filter_by_min_freq(xs, feat_freq, min_feat_freq)

In [1367]:
xs_train = get_features_from_probabilities(probs_train, essay2crels_train, top_n=500, min_feat_freq=1) # better with min feat freq of 11

In [1368]:
xs_test  = get_features_from_probabilities(probs_test,  essay2crels_test,  top_n=500, min_feat_freq=1)

In [1371]:
for pi in xs_train:
    assert pi.essay_name in essay2crels_train

In [1372]:
for pi in xs_test:
    assert pi.essay_name in essay2crels_test

## Train Re-Ranker - Early Stopping

In [1393]:
from numpy.random import shuffle

def train_instance(parser_input, model):
    model.train(best_feats=parser_input.opt_features, other_feats_array=parser_input.other_features_array)

def train_cost_sensitive_instance(parser_input, model):
    model.train(best_feats=parser_input.opt_features, 
                other_feats_array=parser_input.other_features_array, other_costs_array=parser_input.other_costs_array)
        
def train_model(model, xs_train, xs_test, essay2crels_train, essay2crels_test, max_epochs=30, early_stop_iters=8, train_instance_fn=train_instance):
    test_accs = [-1]
    best_model = None
    best_test_accuracy = None
    num_declining_acc = 0

    xs_train_copy = list(xs_train)    
    for i in range(max_epochs):
        shuffle(xs_train_copy)
        for parser_input in xs_train_copy:
            if len(parser_input.other_parses) > 0:
                train_instance_fn(parser_input, model)

        train_accuracy_df = evaluate_ranker(model, xs_train, essay2crels_train, ys_by_tag_train)
        test_accuracy_df  = evaluate_ranker(model, xs_test,  essay2crels_test,  ys_by_tag_test)
        train_accuracy = train_accuracy_df.iloc[0].to_dict()["f1_score"]
        test_accuracy  = test_accuracy_df.iloc[0].to_dict()["f1_score"]
        print("Epoch: {epoch} Train Accuracy: {train_acc:.4f} Test Accuracy: {test_acc:.4f}".format(
            epoch=i,  train_acc=train_accuracy, test_acc=test_accuracy))
        if test_accuracy > max(test_accs):
            best_model = model.clone()
            best_test_accuracy = test_accuracy_df
            num_declining_acc = 0
        else:
            num_declining_acc += 1
            if num_declining_acc >= early_stop_iters:
                break
        test_accs.append(test_accuracy)
    print("Best Test Acc: {acc:.4f}".format(acc=max(test_accs)))
    return best_model, best_test_accuracy

In [1311]:
# compute what is achieved if using only most probably crels
eval_use_best_parse(xs_train, essay2crels_train), eval_use_best_parse(xs_test, essay2crels_test, prob_threshold=0.5)

(0.6287671232876713, 0.45454545454545453)

In [1382]:
ys_by_tag_train = get_label_data_essay_level(pred_tagged_essays_train)
ys_by_tag_test  = get_label_data_essay_level(pred_tagged_essays_test)

In [1385]:
model = StructuredPerceptron(learning_rate=0.1, max_update_items=1) # best if learning_rate = 0.1 (0.05 also works well)
best_model, test_acc_df = train_model(model, xs_train=xs_train, xs_test=xs_test, 
        essay2crels_train=essay2crels_train, essay2crels_test=essay2crels_test, 
        max_epochs=30, early_stop_iters=10)

Epoch: 0 Train Accuracy: 0.7700 Test Accuracy: 0.7232
Epoch: 1 Train Accuracy: 0.7766 Test Accuracy: 0.7290
Epoch: 2 Train Accuracy: 0.7828 Test Accuracy: 0.7266
Epoch: 3 Train Accuracy: 0.7871 Test Accuracy: 0.7269
Epoch: 4 Train Accuracy: 0.7906 Test Accuracy: 0.7285
Epoch: 5 Train Accuracy: 0.7940 Test Accuracy: 0.7292
Epoch: 6 Train Accuracy: 0.7965 Test Accuracy: 0.7294
Epoch: 7 Train Accuracy: 0.7988 Test Accuracy: 0.7280
Epoch: 8 Train Accuracy: 0.7993 Test Accuracy: 0.7311
Epoch: 9 Train Accuracy: 0.8002 Test Accuracy: 0.7324
Epoch: 10 Train Accuracy: 0.8026 Test Accuracy: 0.7324
Epoch: 11 Train Accuracy: 0.8038 Test Accuracy: 0.7318
Epoch: 12 Train Accuracy: 0.8050 Test Accuracy: 0.7318
Epoch: 13 Train Accuracy: 0.8074 Test Accuracy: 0.7313
Epoch: 14 Train Accuracy: 0.8085 Test Accuracy: 0.7306
Epoch: 15 Train Accuracy: 0.8086 Test Accuracy: 0.7306
Epoch: 16 Train Accuracy: 0.8089 Test Accuracy: 0.7301
Epoch: 17 Train Accuracy: 0.8100 Test Accuracy: 0.7308
Epoch: 18 Train Accu

In [1386]:
test_acc_df

Unnamed: 0,accuracy,f1_score,recall,precision,rank_acc
95,0.986531,0.732367,0.69287,0.776639,0.465241


In [1387]:
best_model.average_weights()

In [1388]:
sorted(best_model.weights.items(), key = lambda tpl: -abs(tpl[1]))[0:10]

[('num_crels', -5.7953),
 ('num_crels<=7', 2.83349),
 ('num_crels<=8', 2.55895),
 ('num_crels<=6', 2.41824),
 ('num_crels<=9', 1.95066),
 ('num_crels<=5', 1.87638),
 ('num_crels>1', 1.75379),
 ('Above-0.2', -1.66547),
 ('Causer:7->Result:50-pred-count=1', 1.64672),
 ('num_crels>2', 1.62354)]

In [1389]:
sorted(best_model.weights.items(), key = lambda tpl: abs(tpl[1]))[0:10]

[('Causer:3->Result:5-pred-count', -0.00645),
 ('4', -0.01951),
 ('6', 0.02208),
 ('%-Above-0.7', -0.0223),
 ('num_crels>9', 0.04934),
 ('Result:7', -0.06707),
 ('All-Above-0.9', 0.08252),
 ('Above-0.9', 0.08303),
 ('12', 0.09229),
 ('5b', -0.09333)]

## Cost Sensitive Perceptron

In [1394]:
model = CostSensitiveStructuredPerceptron(learning_rate=0.1, max_update_items=1) #  update_items = 1 is best
best_model, test_acc_df = train_model(model, xs_train=xs_train, xs_test=xs_test, 
        essay2crels_train=essay2crels_train, essay2crels_test=essay2crels_test, 
        max_epochs=30, early_stop_iters=10, train_instance_fn = train_cost_sensitive_instance)

Epoch: 0 Train Accuracy: 0.7915 Test Accuracy: 0.7294
Epoch: 1 Train Accuracy: 0.7970 Test Accuracy: 0.7255
Epoch: 2 Train Accuracy: 0.7999 Test Accuracy: 0.7336
Epoch: 3 Train Accuracy: 0.8051 Test Accuracy: 0.7248
Epoch: 4 Train Accuracy: 0.8063 Test Accuracy: 0.7237
Epoch: 5 Train Accuracy: 0.8072 Test Accuracy: 0.7207
Epoch: 6 Train Accuracy: 0.8083 Test Accuracy: 0.7257
Epoch: 7 Train Accuracy: 0.8110 Test Accuracy: 0.7220
Epoch: 8 Train Accuracy: 0.8113 Test Accuracy: 0.7227
Epoch: 9 Train Accuracy: 0.8130 Test Accuracy: 0.7237
Epoch: 10 Train Accuracy: 0.8128 Test Accuracy: 0.7230
Epoch: 11 Train Accuracy: 0.8133 Test Accuracy: 0.7230
Epoch: 12 Train Accuracy: 0.8148 Test Accuracy: 0.7237
Best Test Acc: 0.7336


# ALMA
- See p 175 - 176 of my structured learning book

In [1395]:
from collections import defaultdict
import pickle

class ALMA(object):
    ''' ALMA Algorithm - see pages 175-176 in my structured learning book
    '''

    def __init__(self, features, alpha=1.0, B=None, C=2**0.5):
        # Each feature gets its own weight
        # needs to be non zero otherwise first
        if B is None:
            B = 1/alpha
        self.C = C
        self.B = B
        self.alpha = alpha
        self.features = features
        self.weights = self.proj(dict([(f,1) for f in features]))
        self.k = 1

    def l2_norm(self, weights):
        return sum((v ** 2 for v in weights.values())) ** 0.5

    def to_unitl2_norm(self, fts):
        if type(fts) == dict or type(fts) == defaultdict:
            norm = self.l2_norm(fts)
            return self.update_dict(fts, norm)
        elif type(fts) == list:
            a = []
            for item in fts:
                a.append(self.to_unitl2_norm(item))
            return a
        else:
            raise Exception("Unexpected type: " + str(type(fts)))

    def proj(self, weights):
        l2_n = self.l2_norm(weights)
        denom = max(1, l2_n)
        return self.update_dict(weights, denom)

    def update_dict(self, dct, denom):
        u = defaultdict(float)
        for k, v in dct.items():
            u[k] = v / denom
        return u

    def clone(self):
        clone = ALMA(self.features)
        clone.weights.update(self.weights)
        clone.k = self.k
        return clone

    def rank(self, features_array):
        normed_array = self.to_unitl2_norm(features_array)
        '''Dot-product the features and current weights and return the best label.'''
        scores2index = {}
        for i, feats in enumerate(normed_array):
            scores2index[i] = self.decision_function(feats)
        # return a ranking of the scores, by best to worse
        return [ix for ix, score in sorted(scores2index.items(), key=lambda tpl: -tpl[-1])]

    def decision_function(self, features):
        '''Dot-product the features and current weights and return the score.'''
        score = 0.0
        for feat, value in features.items():
            if value == 0 or feat not in self.features:
                continue
            score += self.weights[feat] * value
        return score

    def weight_product(self, features):
        '''Dot-product the features and current weights and return the score.'''
        prod = defaultdict(float)
        for feat, value in features.items():
            if value == 0:
                continue
            prod[feat] = self.weights[feat] * value
        return prod

    def add_dicts(self, d1, d2):
        for k,v in d2.items():
            d1[k] += v
        return d1

    def train(self, best_feats, other_feats_array):

        feats_array = [best_feats] + list(other_feats_array)
        ixs = self.rank(feats_array)

        # go thru up to |max_update_items| items ranked above the best, and update the weights
        best_ix = ixs[0]
        if best_ix == 0:
            return

        other_feats_array = [feats_array[best_ix]]
        best_fts_prod = self.weight_product(self.to_unitl2_norm(best_feats))
        num_other_feats = len(other_feats_array)
        assert num_other_feats > 0
        # total up other features vectors
        other_feats_product = defaultdict(float)
        for fts in other_feats_array:
            product = self.weight_product(self.to_unitl2_norm(fts))
            self.add_dicts(other_feats_product, product)

        delta = dict()
        # normalize by the number of other features
        for feat in self.features:
            delta[feat] = best_fts_prod[feat] - (other_feats_product[feat] / num_other_feats) # need to normalize the other feats value

        proj_delta = self.proj(delta)
        new_weights = defaultdict(float)
        for ft in self.features:
            new_weights[ft] = self.weights[ft] + (self.C * self.k**-0.5 * proj_delta[ft])
        new_weights = self.proj(new_weights)
        self.weights = new_weights
        self.k += 1
        
    def save(self, path):
        '''Save the pickled model weights.'''
        return pickle.dump(dict(self.weights), open(path, 'w'))

    def load(self, path):
        '''Load the pickled model weights.'''
        self.weights = pickle.load(open(path))
        return None


p = ALMA(features={"a","b","c"})
best = defaultdict(float)
best.update({ "a": 1, "b": 2})

rest = defaultdict(float)
rest.update({"a": -1, "b": 3, "c": 4})

p.train(best, [rest])

In [1326]:
def get_all_feats(xs):
    fts = set()
    for parser_input in xs:
        fts.update(parser_input.opt_features.keys())
        for other_feats in parser_input.other_features_array:
            fts.update(other_feats.keys())
    return fts

In [1327]:
all_feats = get_all_feats(xs_train)

In [1329]:
model = ALMA(features=all_feats)
best_model, test_acc_df = train_model(model, xs_train=xs_train, xs_test=xs_test, 
        essay2crels_train=essay2crels_train, essay2crels_test=essay2crels_test, 
        max_epochs=30, early_stop_iters=10)

Epoch: 0 Train Accuracy: 0.7273 Test Accuracy: 0.7000
Epoch: 1 Train Accuracy: 0.7447 Test Accuracy: 0.7044
Epoch: 2 Train Accuracy: 0.7453 Test Accuracy: 0.7030
Epoch: 3 Train Accuracy: 0.7481 Test Accuracy: 0.7092
Epoch: 4 Train Accuracy: 0.7543 Test Accuracy: 0.7109
Epoch: 5 Train Accuracy: 0.7520 Test Accuracy: 0.7099
Epoch: 6 Train Accuracy: 0.7482 Test Accuracy: 0.7090
Epoch: 7 Train Accuracy: 0.7526 Test Accuracy: 0.7106
Epoch: 8 Train Accuracy: 0.7559 Test Accuracy: 0.7153
Epoch: 9 Train Accuracy: 0.7518 Test Accuracy: 0.7123
Epoch: 10 Train Accuracy: 0.7595 Test Accuracy: 0.7156
Epoch: 11 Train Accuracy: 0.7614 Test Accuracy: 0.7198
Epoch: 12 Train Accuracy: 0.7558 Test Accuracy: 0.7184
Epoch: 13 Train Accuracy: 0.7498 Test Accuracy: 0.7091
Epoch: 14 Train Accuracy: 0.7589 Test Accuracy: 0.7213
Epoch: 15 Train Accuracy: 0.7597 Test Accuracy: 0.7217
Epoch: 16 Train Accuracy: 0.7518 Test Accuracy: 0.7126
Epoch: 17 Train Accuracy: 0.7571 Test Accuracy: 0.7168
Epoch: 18 Train Accu

In [1331]:
test_acc_df

Unnamed: 0,accuracy,f1_score,recall,precision,rank_acc
95,0.986288,0.721893,0.669104,0.783726,0.433155


# MIRA

In [1396]:
from collections import defaultdict
import math
import numpy as np

class MIRA(StructuredPerceptron):
    ''' MIRA Algorithm for multi-class classification as detailed in p 569 of
        http://jmlr.csail.mit.edu/papers/volume7/crammer06a/crammer06a.pdf
    '''

    def __init__(self, C=0.01, max_update_items=1, pa_type=1, initial_weight=1):
        self.C = C
        # Each feature gets its own weight
        # needs to be non zero otherwise first
        assert initial_weight >= 0.0
        self.weights = defaultdict(lambda: initial_weight)
        # The accumulated values, for the averaging. These will be keyed by
        # feature/clas tuples
        self._totals = defaultdict(int)
        # The last time the feature was changed, for the averaging. Also
        # keyed by feature/clas tuples
        # (tstamps is short for timestamps)
        self._tstamps = defaultdict(int)
        # Number of instances seen
        self.i = 0
        # how many items do we use to update the weights?
        self.max_update_items = max_update_items
        self.pa_type = pa_type
        assert self.max_update_items >= 1, "Max update items must be at least 1"
        assert self.pa_type in {0, 1, 2}  # PA I or PA II

        # This isn't used, so set to 1
        self.learning_rate = 1
        self.train_feats = set()

    def clone(self):
        cloney = MIRA(C=self.C, max_update_items=self.max_update_items, pa_type=self.pa_type)
        cloney.weights.update(self.weights)
        cloney._totals.update(self._totals)
        cloney._tstamps.update(self._tstamps)
        cloney.i = self.i
        cloney.train_feats.update(self.train_feats)
        return cloney

    def train(self, best_feats, other_feats_array):
        if len(other_feats_array) == 0:
            return

        best_feats_score = self.decision_function(best_feats, existence_check=False)
        scores = [self.decision_function(feats, existence_check=False) for feats in other_feats_array]
        ixs = np.argsort(scores)[::-1]

        # go thru up to |max_update_items| items ranked above the best, and update the weights
        for rank, ix in enumerate(ixs):
            if rank >= self.max_update_items:
                break
            feats_score = scores[ix]
            diff = best_feats_score - feats_score
            hinge_loss = 0 if diff >= 1 else 1 - diff
            if hinge_loss > 0:
                self.update(loss=hinge_loss, best_feats=best_feats, highest_ranked_feats=other_feats_array[ix])

    def update(self, loss, best_feats, highest_ranked_feats):
        self.i += 1
        feats_union = set(best_feats.keys()).union(highest_ranked_feats.keys())
        sum_sq_diff = 0
        for ft in feats_union:
            sum_sq_diff += (best_feats[ft] - highest_ranked_feats[ft]) ** 2
        l2_norm_of_diffs = (sum_sq_diff ** 0.5)

        if sum_sq_diff == 0 and self.pa_type in {0, 1}:
            tau = self.C
        elif self.pa_type == 0:
            tau = loss / l2_norm_of_diffs
        elif self.pa_type == 1:
            tau = min(self.C, loss / l2_norm_of_diffs)
        else:
            tau = loss / (l2_norm_of_diffs + 1 / (2 * self.C))

        for feat, weight in self.weights.items():
            self.train_feats.add(feat)
            val = tau * (best_feats[feat] - highest_ranked_feats[feat])
            self.__upd_feat__(feat, val)
        return None

class CostSensitiveMIRA(MIRA):

    def __init__(self, C=0.01, max_update_items=1, pa_type=1, loss_type="pb", initial_weight=1):

        assert loss_type in {"pb","ml"}, "Unrecognized loss type: {loss_type}".format(loss_type=loss_type)

        self.loss_type = loss_type
        super(CostSensitiveMIRA, self).__init__(
            C=C, max_update_items=max_update_items, pa_type=pa_type, initial_weight=initial_weight)

    def clone(self):
        cloney = CostSensitiveMIRA(
            C=self.C, max_update_items=self.max_update_items, pa_type=self.pa_type, loss_type=self.loss_type)
        cloney.weights.update(self.weights)
        cloney._totals.update(self._totals)
        cloney._tstamps.update(self._tstamps)
        cloney.i = self.i
        cloney.train_feats.update(self.train_feats)
        return cloney

    def train(self, best_feats, other_feats_array, other_costs_array):

        if len(other_feats_array) == 0:
            return

        best_feats_score = self.decision_function(best_feats, existence_check=False)
        other_feat_scores = [self.decision_function(feats, existence_check=False) for feats in other_feats_array]
        cs_losses = np.asarray(other_feat_scores) - best_feats_score + (np.asarray(other_costs_array) ** 0.5)

        if self.loss_type == "ml":
            ixs = np.argsort(cs_losses)[::-1]
        else:
            ixs = np.argsort(other_feat_scores)[::-1]

        # go thru up to |max_update_items| items ranked above the best, and update the weights
        for rank, ix in enumerate(ixs):
            if rank >= self.max_update_items:
                break

            cost_sensitive_loss = cs_losses[ix]
            if cost_sensitive_loss > 0:
                self.update(loss=cost_sensitive_loss, best_feats=best_feats, highest_ranked_feats=other_feats_array[ix])

In [1399]:
model = MIRA(C=0.01, pa_type=1,  max_update_items=1, initial_weight=1)
best_model, test_acc_df = train_model(model, xs_train=xs_train, xs_test=xs_test, 
        essay2crels_train=essay2crels_train, essay2crels_test=essay2crels_test, 
        max_epochs=30, early_stop_iters=10)

Epoch: 0 Train Accuracy: 0.7613 Test Accuracy: 0.7189
Epoch: 1 Train Accuracy: 0.7676 Test Accuracy: 0.7218
Epoch: 2 Train Accuracy: 0.7686 Test Accuracy: 0.7221
Epoch: 3 Train Accuracy: 0.7704 Test Accuracy: 0.7222
Epoch: 4 Train Accuracy: 0.7742 Test Accuracy: 0.7256
Epoch: 5 Train Accuracy: 0.7774 Test Accuracy: 0.7293
Epoch: 6 Train Accuracy: 0.7781 Test Accuracy: 0.7293
Epoch: 7 Train Accuracy: 0.7778 Test Accuracy: 0.7338
Epoch: 8 Train Accuracy: 0.7791 Test Accuracy: 0.7353
Epoch: 9 Train Accuracy: 0.7796 Test Accuracy: 0.7342
Epoch: 10 Train Accuracy: 0.7821 Test Accuracy: 0.7311
Epoch: 11 Train Accuracy: 0.7845 Test Accuracy: 0.7323
Epoch: 12 Train Accuracy: 0.7868 Test Accuracy: 0.7312
Epoch: 13 Train Accuracy: 0.7871 Test Accuracy: 0.7324
Epoch: 14 Train Accuracy: 0.7869 Test Accuracy: 0.7293
Epoch: 15 Train Accuracy: 0.7893 Test Accuracy: 0.7290
Epoch: 16 Train Accuracy: 0.7904 Test Accuracy: 0.7304
Epoch: 17 Train Accuracy: 0.7919 Test Accuracy: 0.7304
Epoch: 18 Train Accu

In [1400]:
best_model.average_weights()

In [1404]:
best_wts = sorted([(wt,v) 
                   for (wt,v) in best_model.weights.items() 
                   if wt in best_model.train_feats and v != 0.01],
                  key = lambda tpl: -abs(tpl[1]))
best_wts[0:10]

[('num_crels', -4.65007),
 ('num_crels<=7', 2.46899),
 ('num_crels<=6', 2.45953),
 ('num_crels<=5', 2.02019),
 ('num_crels=0', 1.97637),
 ('num_crels<=8', 1.81892),
 ('Above-0.2', -1.79778),
 ('All-Above-0.2', 1.60926),
 ('num_crels=4', 1.53875),
 ('num_crels>1', 1.53757)]

In [1405]:
best_wts[-10:][::-1]

[('Result:50', -0.0144),
 ('13', -0.01768),
 ('num_crels>5', -0.02019),
 ('14', 0.04293),
 ('All-Above-0.9', -0.06717),
 ('All-Above-0.95', 0.0795),
 ('4', 0.10343),
 ('Result:7', 0.10739),
 ('7', -0.11044),
 ('50', -0.13242)]

## Cost Sensitive MIRA

In [None]:
model = CostSensitiveMIRA(C=0.01, pa_type=1, loss_type="pb", max_update_items=1, initial_weight=1)
best_model, test_acc_df = train_model(model, xs_train=xs_train, xs_test=xs_test, 
        essay2crels_train=essay2crels_train, essay2crels_test=essay2crels_test, 
        max_epochs=30, early_stop_iters=10, train_instance_fn = train_cost_sensitive_instance)

Epoch: 0 Train Accuracy: 0.7602 Test Accuracy: 0.7195


In [None]:
model = CostSensitiveMIRA(C=0.01, pa_type=1, loss_type="ml", max_update_items=1, initial_weight=1)
best_model, test_acc_df = train_model(model, xs_train=xs_train, xs_test=xs_test, 
        essay2crels_train=essay2crels_train, essay2crels_test=essay2crels_test, 
        max_epochs=30, early_stop_iters=10, train_instance_fn = train_cost_sensitive_instance)

## Test Chains Logic

In [448]:
parse = (
'Causer:1->Result:2',
'Causer:2->Result:3',
'Causer:3->Result:50',
'Causer:2->Result:4',
'Causer:3->Result:5',
'Causer:5->Result:6',
'Causer:6->Result:50',
'Causer:7->Result:11',
'Causer:11->Result:12',
)

In [450]:
tree = defaultdict(set) # maps causers to effects for building chains
for crel in parse:
    # with type
    l,r = crel.split("->")
    l_short, r_short = to_short_tag(l), to_short_tag(r)
    tree[l_short].add(r_short)

In [458]:
chains = build_chains(tree)
print(len(extend_chains(chains)))
extend_chains(chains)

14


{'1,2,3',
 '1,2,3,5',
 '1,2,3,5,6',
 '1,2,3,5,6,50',
 '1,2,3,50',
 '1,2,4',
 '2,3,5',
 '2,3,5,6',
 '2,3,5,6,50',
 '2,3,50',
 '3,5,6',
 '3,5,6,50',
 '5,6,50',
 '7,11,12'}

## TODO
- If there is a large number of predicted crels, in some cases we only have one optional parse, fix this
  - Instead try picking the top parses greedily, by going from most to least probable
- Add a feature to both models that determines if the crel(s) are same code to same code.
- If both directions of a crel are predicted for the same concept code pair, enforce only the more probable one.
- Do feature normalization using zscore
- Add in additional features for re-ranker
- Do feature selection for both algorithms
- Implement early stopping to judge optimal number of epochs
    - Change code to use this value to retrain on all the data
  

In [1152]:
16**(-0.5)

0.25