In [None]:
import sys
sys.path.append('../code')
import os
from pathlib import Path
import json
import random
import numpy as np
import spacy


from tqdm import tqdm
import pickle
import spacy
import json
import random
import re
import pandas as pd
import numpy as np
from copy import deepcopy
from sklearn import model_selection
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English
from spacy.symbols import ORTH
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import tree
import matplotlib.pyplot as plt
from spacy.language import Language
from luima_sbd import sbd_utils as luima
import math
import fasttext

from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix
from sklearn import tree
import random
from joblib import dump, load

random.seed(42)
np.random.seed(42)

%matplotlib inline

## Some necessary functions

In [None]:
def plot_confusion_matrix(y_true, y_pred, classes,
                          title=None,
                          cmap=plt.cm.Blues):
    cm = confusion_matrix(y_true, y_pred)
    fig, ax = plt.subplots(figsize=(8, 8))
    im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
    ax.figure.colorbar(im, ax=ax)
    ax.set(xticks=np.arange(cm.shape[1]),
           yticks=np.arange(cm.shape[0]),
           # ... and label them with the respective list entries
           xticklabels=classes, yticklabels=classes,
           title=title,
           ylabel='True label',
           xlabel='Predicted label')
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
             rotation_mode="anchor")
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            ax.text(j, i, format(cm[i, j], 'd'),
                    ha="center", va="center",
                    color="white" if cm[i, j] > thresh else "black")
    fig.tight_layout()
    return ax

In [None]:
def top_tfidf_features(row, features, top_n=15):
    ''' Get top n tfidf values in row and return them with their corresponding feature names.'''
    topn_ids = np.argsort(row)[::-1][:top_n]
    top_feats = [(features[i], row[i]) for i in topn_ids]
    df = pd.DataFrame(top_feats)
    df.columns = ['feature', 'tfidf']
    return df


def top_features_in_doc(Xtr, features, row_id, top_n=15):
    ''' Top tfidf features in specific document (matrix row) '''
    xtr_row = Xtr[row_id]
    if type(xtr_row) is not np.ndarray:
        xtr_row = xtr_row.toarray()
    row = np.squeeze(xtr_row)
    return top_tfidf_features(row, features, top_n)


def top_mean_features(Xtr, features, grp_ids=None, min_tfidf=0.1, top_n=25):
    ''' Return the top n features that on average are most important amongst documents in rows
        indentified by indices in grp_ids. '''
    if grp_ids:
        D = Xtr[grp_ids]
    else:
        D = Xtr
    if type(D) is not np.ndarray:
        D = D.toarray()
    D[D < min_tfidf] = 0
    tfidf_means = np.mean(D, axis=0)
    return top_tfidf_features(tfidf_means, features, top_n)


def top_features_by_class(Xtr, y, features, min_tfidf=0.1, top_n=25):
    ''' Return a list of dfs, where each df holds top_n features and their mean tfidf value
        calculated across documents with the same class label. '''
    dfs = {}
    labels = np.unique(y)
    for label in labels:
        ids = np.where(y==label)
        feats_df = top_mean_features(Xtr, features, ids, min_tfidf=min_tfidf, top_n=top_n)
        feats_df.label = label
        dfs[label] = feats_df
    return dfs


def span_top_tfidf(spans_txt, spans_tfidf, features, index):
    print('span text:\n'+spans_txt[index]+'\n')
    print(top_features_in_doc(spans_tfidf, features, index))

In [None]:
nlp = spacy.load("en_core_web_sm")
nlp.tokenizer.add_special_case('Vet. App.', [{ORTH: 'Vet. App.'}])
nlp.tokenizer.add_special_case('Fed. Cir.', [{ORTH: 'Fed. Cir.'}])

def custom_spacy_tokenize(txt):
    nlp.disable_pipes('parser')
    doc = nlp.pipe(txt, n_process=4)
    doc = nlp(txt)
    tokens = list(doc)
    clean_tokens = []
    for i, token in enumerate(tokens):
        if token.pos_ == 'PUNCT' and not re.search("^[0-9]{2}/[0-9]{2}/([0-9]{2}|[0-9]{4})$", token.text):
            pass
        
        elif token.pos_ == 'NUM':
            refined_token = re.sub(r'\W', '', token.text)
            clean_tokens.append(f'<NUM{len(refined_token)}>')
            
        elif token.text == "\'s" and token.pos_ == 'PART':
            pos_token = tokens[i-1].text + token.text
            clean_tokens.pop(len(clean_tokens)-1)
            clean_tokens.append(pos_token.lower())
                   
        elif "-" in token.text:
            splitted_tokens = token.text.split("-")

            for sp_token in splitted_tokens:
                refined_token = re.sub(r'\W', '', sp_token.lower())
                if refined_token != "":
                    if refined_token.isnumeric():
                        refined_token = f'<NUM{len(refined_token)}>'
                    clean_tokens.append(refined_token)
        elif token.text in ("Vet. App.", "Fed. Cir."):
            clean_tokens.append(token.lemma_.lower())

        else:
            refined_token = re.sub(r'\W', '', token.lemma_.lower())
            if re.search('\d+', refined_token) and re.search('[a-zA-Z]+', refined_token):
                continue
            elif refined_token != "" and refined_token.isnumeric():
                refined_token = f'<NUM{len(refined_token)}>'
                clean_tokens.append(refined_token)

            elif refined_token != "":
                clean_tokens.append(refined_token)
                    
    return clean_tokens

def custom_spans_add_spacy_tokens(spans):
    for s in tqdm(spans):
        s['tokens_spacy'] = custom_spacy_tokenize(s['txt'])
        s['token_count'] = len(s['tokens_spacy'])

In [None]:
# get all sentences assuming every annotation is a sentence
def make_span_data(documents_by_id, types_by_id, annotations, doc_ids):
    span_data = []
    for doc_id in doc_ids:
        for a in annotations:
            if a['document'] == doc_id:
                start = a['start']
                end = a['end']
                document_txt = documents_by_id[a['document']]['plainText']
                atype = a['type']
                sd = {'txt': document_txt[start:end],
                      'document': a['document'],
                      'type': types_by_id[atype]['name'],
                      'start': a['start'],
                      'start_normalized': a['start'] / len(document_txt),
                      'end': a['end']}
                span_data.append(sd)
    return span_data

## Data loading and preparation

In [None]:
# Loading the JSON file containing the data and the annotations
CURATED_ANN_PATH = "../Data/ldsi_w21_curated_annotations_v2.json"
with open(CURATED_ANN_PATH, 'r') as j:
     data = json.loads(j.read())
        
annotations = data['annotations']
documents_by_id = {d['_id']: d for d in data['documents']}
types_by_id = {t['_id']: t for t in data['types']}
type_ids_by_name = {t['name']: t['_id'] for t in data['types']}
type_names_by_id = {t['_id']: t['name'] for t in data['types']}
doc_id_by_name = {d['name']: d['_id'] for d in data['documents']}
doc_name_by_id = {d['_id']: d['name'] for d in data['documents']}

granted_doc_ids = set([doc['_id'] for doc in data['documents'] if doc['outcome'] == 'granted'])
denied_doc_ids = set([doc['_id'] for doc in data['documents'] if doc['outcome'] == 'denied'])
print(len(granted_doc_ids), len(denied_doc_ids))

# Filter out the IDs of the 141 documents from a total of 540
ids_annotated_docs = set([ann['document'] for ann in data['annotations']])
print(len(ids_annotated_docs))

granted_ids = sorted(list(granted_doc_ids.intersection(ids_annotated_docs)))
denied_ids = sorted(list(denied_doc_ids.intersection(ids_annotated_docs)))
print(len(granted_ids), len(denied_ids))

os.environ['PYTHONHASHSEED'] = str(42)
random.seed(42)
np.random.seed(42)


random.shuffle(granted_ids)
random.shuffle(denied_ids)
granted_train, granted_val, granted_test = np.split(granted_ids, [int(len(granted_ids)*0.8), int(len(granted_ids)*0.9)])
denied_train, denied_val, denied_test = np.split(denied_ids, [57, 64])

train_set, dev_set, test_set = np.concatenate((granted_train, denied_train), axis=0), \
                                np.concatenate((granted_val, denied_val), axis=0), \
                                    np.concatenate((granted_test, denied_test), axis=0), \

print(train_set.shape, dev_set.shape, test_set.shape)

In [None]:
train_ids, dev_ids, test_ids = train_set, dev_set, test_set
train_ids.shape, dev_ids.shape, test_ids.shape

In [None]:
# # Loading the train, dev and test ids saved in phase 1
# train_ids, dev_ids, test_ids = np.load('../Data/train.npy'), np.load('../Data/dev.npy'), np.load('../Data/test.npy')
# train_ids.shape, dev_ids.shape, test_ids.shape

In [None]:
# Creating train, dev and test spans
train_spans = make_span_data(documents_by_id, types_by_id, annotations, train_ids)
dev_spans = make_span_data(documents_by_id, types_by_id, annotations, dev_ids)
test_spans = make_span_data(documents_by_id, types_by_id, annotations, test_ids)

train_spans_txt = [s['txt'] for s in train_spans]
dev_spans_txt = [s['txt'] for s in dev_spans]
test_spans_txt = [s['txt'] for s in test_spans]


train_spans_labels = np.array([s['type'] for s in train_spans])
test_spans_labels = np.array([s['type'] for s in test_spans])
dev_spans_labels = np.array([s['type'] for s in dev_spans])
len(train_spans), len(dev_spans), len(test_spans), len(train_spans_txt), len(dev_spans_txt), len(test_spans_txt) 

In [None]:
# Adding the spacy tokens to the span data: contains two fields now, txt and the spacy tokens
custom_spans_add_spacy_tokens(train_spans)
custom_spans_add_spacy_tokens(dev_spans)
custom_spans_add_spacy_tokens(test_spans)

### Create TFIDF feature vectors

In [None]:
# def make_tfidf_feature_vectors_and_labels(spans, vectorizer, train_mean, train_std):
#     # function takes long to execute
#     # note: we un-sparse the matrix here to be able to manipulate it
    
#     df = pd.DataFrame([s['token_count'] for s in spans])
#     df.columns = ['token_count']
# #     token_count_mean, token_count_std = df['token_count'].mean(), df['token_count'].std()
#     token_count_mean, token_count_std = train_mean, train_std

#     tfidf = vectorizer.transform([s['txt'] for s in spans]).toarray()
#     starts_normalized = np.array([s['start_normalized'] for s in spans])
#     token_count_normalized = np.array([(s['token_count']-token_count_mean)/token_count_std for s in spans])

#     y = np.array([s['type'] for s in spans])
#     X = np.concatenate((tfidf, np.expand_dims(starts_normalized, axis=1), np.expand_dims(token_count_normalized, axis=1)), axis=1)
#     return X, y

In [None]:
# spacy_tfidf_vectorizer = TfidfVectorizer(tokenizer=custom_spacy_tokenize,
#                                          min_df=3,
#                                          ngram_range=(1,1))

# spacy_tfidf_vectorizer = spacy_tfidf_vectorizer.fit(train_spans_txt)

In [None]:
# # Extend by adding a a single float variable representing the number of tokens in the sentence, normalized
# # by subtracting the mean and dividing by the standard deviation across all sentence
# # tokens counts in the training data

# df = pd.DataFrame([s['token_count'] for s in train_spans])
# df.columns = ['token_count']
# train_token_count_mean, train_token_count_std = df['token_count'].mean(), df['token_count'].std()

# print(train_token_count_mean, train_token_count_std)

In [None]:
# train_X_tfidf, train_y_tfidf = make_tfidf_feature_vectors_and_labels(train_spans, spacy_tfidf_vectorizer, train_token_count_mean, train_token_count_std)
# dev_X_tfidf, dev_y_tfidf = make_tfidf_feature_vectors_and_labels(dev_spans, spacy_tfidf_vectorizer, train_token_count_mean, train_token_count_std)
# test_X_tfidf, test_y_tfidf = make_tfidf_feature_vectors_and_labels(test_spans, spacy_tfidf_vectorizer, train_token_count_mean, train_token_count_std)

# print(f'{train_X_tfidf.shape} {train_y_tfidf.shape}')
# print(f'{dev_X_tfidf.shape} {dev_y_tfidf.shape}')
# print(f'{test_X_tfidf.shape} {test_y_tfidf.shape}')

### Create Word embedding vectors

In [None]:
# This was calculated from the training data
train_token_count_mean, train_token_count_std = 21.035180722891567, 15.719815094996603

In [None]:
def make_word_embedded_feature_vectors_and_labels(spans, vectorizer, train_mean, train_std):
    df = pd.DataFrame([s['token_count'] for s in spans])
    df.columns = ['token_count']
#     token_count_mean, token_count_std = df['token_count'].mean(), df['token_count'].std()
    token_count_mean, token_count_std = train_mean, train_std
#     print(f"mean token count across the sentences: {token_count_mean}, std of the token counts: {token_count_std}")
    final_word_vector = []
    for s in spans:
        if (len(s['tokens_spacy'])):
            word_vector = np.mean(np.array([vectorizer.get_word_vector(token) for token in s['tokens_spacy']]), axis=0)
            final_word_vector.append(word_vector)
            
    starts_normalized = np.array([s['start_normalized'] for s in spans])
    token_count_normalized = np.array([(s['token_count']-token_count_mean)/token_count_std for s in spans])
    y = np.array([s['type'] for s in spans])
    X = np.concatenate((np.array(final_word_vector), np.expand_dims(starts_normalized, axis=1), np.expand_dims(token_count_normalized, axis=1)), axis=1)
    return X, y

In [None]:
vectorizer = fasttext.load_model("../models/ft_word_embedding_model.bin")
print(len(vectorizer.get_words(on_unicode_error='ignore')))

train_X_wv, train_y_wv = make_word_embedded_feature_vectors_and_labels(train_spans, vectorizer, train_token_count_mean, train_token_count_std)
dev_X_wv, dev_y_wv = make_word_embedded_feature_vectors_and_labels(dev_spans, vectorizer, train_token_count_mean, train_token_count_std)
test_X_wv, test_y_wv = make_word_embedded_feature_vectors_and_labels(test_spans, vectorizer, train_token_count_mean, train_token_count_std)

print(f'{train_X_wv.shape} {train_y_wv.shape}')
print(f'{dev_X_wv.shape} {dev_y_wv.shape}')
print(f'{test_X_wv.shape} {test_y_wv.shape}')

### Loading saved featurized vectors of TF-IDF and Word Embedding

In [None]:
# # Loading the train, dev and test data of Word embedding featurizer
# train_X_wv, train_y_wv = np.load('../Data/train_X_wv.npy'), np.load('../Data/train_y_wv.npy')
# dev_X_wv, dev_y_wv = np.load('../Data/dev_X_wv.npy'), np.load('../Data/dev_y_wv.npy')
# test_X_wv, test_y_wv = np.load('../Data/test_X_wv.npy'), np.load('../Data/test_y_wv.npy')

In [None]:
# print(f'{train_X_wv.shape} {train_y_wv.shape}')
# print(f'{dev_X_wv.shape} {dev_y_wv.shape}')
# print(f'{test_X_wv.shape} {test_y_wv.shape}')

In [None]:
# # Loading the train, dev and test data of TF-IDF featurizer
# train_X_tfidf, train_y_tfidf = np.load('../Data/train_X_tfidf.npy'), np.load('../Data/train_y_tfidf.npy')
# dev_X_tfidf, dev_y_tfidf = np.load('../Data/dev_X_tfidf.npy'), np.load('../Data/dev_y_tfidf.npy')
# test_X_tfidf, test_y_tfidf = np.load('../Data/test_X_tfidf.npy'), np.load('../Data/test_y_tfidf.npy')

In [None]:
# print(f'{train_X_tfidf.shape} {train_y_tfidf.shape}')
# print(f'{dev_X_tfidf.shape} {dev_y_tfidf.shape}')
# print(f'{test_X_tfidf.shape} {test_y_tfidf.shape}')

## Testing the Best model on dev and test data

In [None]:
# Loading the best model on word embedding features
# vectorizer = fasttext.load_model("../models/ft_word_embedding_model.bin")
# feature_vector = make_word_embedded_feature_vectors_and_labels(spans, vectorizer)
clf_wv = load('../models/word_embedding_best_model_svc_rbf_notebook.joblib') 

train_preds = clf_wv.predict(train_X_wv)
dev_preds = clf_wv.predict(dev_X_wv)
test_preds = clf_wv.predict(test_X_wv)

print('TRAIN:\n'+classification_report(train_y_wv, train_preds, zero_division=1))
print('DEV:\n'+classification_report(dev_y_wv, dev_preds, zero_division=1))
print('TEST:\n'+classification_report(test_y_wv, test_preds, zero_division=1))

plot_confusion_matrix(train_y_wv, train_preds, classes=list(clf_wv.classes_),
                      title='Confusion matrix for train data')
plot_confusion_matrix(dev_y_wv, dev_preds, classes=list(clf_wv.classes_),
                      title='Confusion matrix of Radial Kernel SVM\n on dev data with Word Embedding featurization')

plot_confusion_matrix(test_y_wv, test_preds, classes=list(clf_wv.classes_),
                      title='Confusion matrix for test data')

plt.savefig('confusion_matrix_best_model_word_embedding_dev_set.jpg')
plt.show()

In [None]:
# # Loading the best model on TF-IDF features
# # vectorizer = fasttext.load_model("../models/ft_word_embedding_model.bin")
# # feature_vector = make_word_embedded_feature_vectors_and_labels(spans, vectorizer)
# clf_tfidf = load('../models/tfidf_best_model_svc_rbf.joblib') 

# train_preds = clf_tfidf.predict(train_X_tfidf)
# dev_preds = clf_tfidf.predict(dev_X_tfidf)
# test_preds = clf_tfidf.predict(test_X_tfidf)

# print('TRAIN:\n'+classification_report(train_y_tfidf, train_preds, zero_division=1))
# print('DEV:\n'+classification_report(dev_y_tfidf, dev_preds, zero_division=1))
# print('TEST:\n'+classification_report(test_y_tfidf, test_preds, zero_division=1))

# # plot_confusion_matrix(train_y_tfidf, train_preds, classes=list(clf.classes_),
# #                       title='Confusion matrix for TFIDF train data')

# plot_confusion_matrix(dev_y_tfidf, dev_preds, classes=list(clf.classes_),
#                       title='Confusion matrix of Radial Kernel SVM\n on dev data with TFIDF featurization')

# # plot_confusion_matrix(test_y_tfidf, test_preds, classes=list(clf.classes_),
# #                       title='Confusion matrix for TFIDF test data')

# plt.savefig('confusion_matrix_best_model_tfidf_dev_set.jpg')
# plt.show()

## Error analysis for individual classes

In [None]:
def make_word_embedded_feature_vectors_and_labels(spans, model):
    df = pd.DataFrame([s['token_count'] for s in spans])
    df.columns = ['token_count']
    token_count_mean, token_count_std = df['token_count'].mean(), df['token_count'].std()
    final_word_vector = []
    for s in spans:
        if (len(s['tokens_spacy'])):
            word_vector = np.mean(np.array([model.get_word_vector(token) for token in s['tokens_spacy']]), axis=0)
            final_word_vector.append(word_vector)
    starts_normalized = np.array([s['start_normalized'] for s in spans])
    token_count_normalized = np.array([(s['token_count']-token_count_mean)/token_count_std for s in spans])
    y = np.array([s['type'] for s in spans])
    X = np.concatenate((np.array(final_word_vector), np.expand_dims(starts_normalized, axis=1), np.expand_dims(token_count_normalized, axis=1)), axis=1)
    return X, y

In [None]:
def prediction_errors(clf, eval_spans, vectorizer, 
                      select_true_label=None, 
                      select_pred_label=None):
    eval_X, eval_y = make_word_embedded_feature_vectors_and_labels(eval_spans, vectorizer)
    eval_spans_txt = [s['txt'] for s in eval_spans]
    eval_spans_labels = [s['type'] for s in eval_spans]
    pred_y = clf.predict(eval_X)
    for i in range(len(eval_spans)):
        true_label = eval_spans_labels[i]
        pred_label = pred_y[i]
        if true_label != pred_label:
            if select_true_label and true_label != select_true_label: continue
            if select_pred_label and pred_label != select_pred_label: continue
            doc_name = documents_by_id[eval_spans[i]['document']]['name']
            print('sentence # '+str(i)+' / case '+doc_name+' / @'+str(eval_spans[i]['start']))
            print('pred: '+pred_label+' / true: '+true_label)
            print(eval_spans[i]['txt'])
            print()

In [None]:
# spacy_tfidf_vectorizer = load('../models/tfidf_featurizer.joblib')
wv_vectorizer = fasttext.load_model("../models/ft_word_embedding_model_notebook.bin")
clf_wv = load('../models/word_embedding_best_model_svc_rbf_notebook.joblib')

In [None]:
most_misclassified_labels = [
    'RemandInstructions', 
    'PolicyBasedReasoning', 
    'LegislationAndPolicy', 
    'EvidenceBasedReasoning'
    
]

In [None]:
def sentence_sampler_for_testing(spans, label, no_of_samples=None, shuffle=True):
    sampled_spans = []
    sample_count = 0
    for s in spans:
        if s['type'] == label:
            sampled_spans.append(s)
            sample_count+=1
#             if sample_count == no_of_samples:
#                 print(f"Found {sample_count} samples and returning")
#                 return sampled_spans

    if no_of_samples is None:
        return sampled_spans
    if shuffle and len(sampled_spans) >= no_of_samples:
        return random.sample(sampled_spans, no_of_samples)
    if not shuffle and len(sampled_spans) >= no_of_samples:
        return sampled_spans[0:no_of_samples]
    if len(sampled_spans) < no_of_samples:        
        return sampled_spans
    

In [None]:
sentences = sentence_sampler_for_testing(dev_spans, 'PolicyBasedReasoning', shuffle=True)

for sent in sentences:
    print(sent['txt'], "--->", sent['type'], '====>', sent['document'], end="\n\n")

In [None]:
# Test for RemandInstructions

prediction_errors(clf_wv,
#                   random.sample(train_spans, 500),
                  sentence_sampler_for_testing(dev_spans, 'LegalRule', shuffle=True),
                  wv_vectorizer,
                  select_pred_label='EvidenceBasedReasoning')

In [None]:
# Test for RemandInstructions
prediction_errors(clf_wv,
#                   random.sample(train_spans, 500),
                  sentence_sampler_for_testing(dev_spans, 'LegalRule', shuffle=True),
                  wv_vectorizer,
                  select_pred_label='LegislationAndPolicy')

In [None]:
# Test for RemandInstructions

prediction_errors(clf_wv,
#                   random.sample(train_spans, 500),
                  sentence_sampler_for_testing(dev_spans, 'EvidenceBasedReasoning', shuffle=True),
                  wv_vectorizer,
                  select_pred_label='EvidenceBasedOrIntermediateFinding')

In [None]:
# Test for RemandInstructions

prediction_errors(clf_wv,
#                   random.sample(train_spans, 500),
                  sentence_sampler_for_testing(dev_spans, 'Evidence', shuffle=True),
                  wv_vectorizer,
                  select_pred_label='Citation')

In [None]:
# Test for RemandInstructions

prediction_errors(clf_wv,
#                   random.sample(train_spans, 500),
                  sentence_sampler_for_testing(dev_spans, 'CaseHeader', shuffle=True),
                  wv_vectorizer,
                  select_pred_label='Header')

In [None]:
# Test for RemandInstructions

prediction_errors(clf_wv,
#                   random.sample(train_spans, 500),
                  sentence_sampler_for_testing(dev_spans, 'EvidenceBasedReasoning', shuffle=True),
                  wv_vectorizer,
                  select_pred_label='LegalRule')

In [None]:
# Test for RemandInstructions

prediction_errors(clf_wv,
#                   random.sample(train_spans, 500),
                  sentence_sampler_for_testing(dev_spans, 'ConclusionOfLaw', shuffle=True),
                  wv_vectorizer,
                  select_pred_label='LegalRule')

In [None]:
# Test for RemandInstructions

prediction_errors(clf_wv,
#                   random.sample(train_spans, 500),
                  sentence_sampler_for_testing(dev_spans, 'EvidenceBasedReasoning', shuffle=True),
                  wv_vectorizer,
                  select_pred_label='Procedure')

In [None]:
# Test for PolicyBasedReasoning

prediction_errors(clf_wv,
                  random.sample(train_spans, 500),
                  wv_vectorizer,
                  select_pred_label='PolicyBasedReasoning')

In [None]:
# Test for LegislationAndPolicy

prediction_errors(clf_wv,
                  random.sample(train_spans, 500),
                  wv_vectorizer,
                  select_pred_label='LegislationAndPolicy')

In [None]:
# Test for LegislationAndPolicy

prediction_errors(clf_wv,
                  random.sample(train_spans, 500),
                  wv_vectorizer,
                  select_pred_label='EvidenceBasedReasoning')

In [None]:
# Test for EvidenceBasedReasoning

prediction_errors(clf_wv,
                  sentence_sampler_for_testing(train_spans, 'EvidenceBasedOrIntermediateFinding', shuffle=True),
                  wv_vectorizer,
                  select_pred_label='EvidenceBasedReasoning')