In [1]:
import benepar, spacy
import os
import re
from typing import List, Set, Dict, Tuple
from spacy import displacy
from spacy.matcher import Matcher
from spacy.tokenizer import Tokenizer
import warnings
import csv
import pandas as pd
import math

warnings.filterwarnings('ignore')

In [2]:
benepar.download('benepar_en3')
nlp = spacy.load('en_core_web_md')

if spacy.__version__.startswith('2'):
    nlp.add_pipe(benepar.BeneparComponent("benepar_en3"))
else:
    nlp.add_pipe("benepar", config={"model": "benepar_en3"})

[nltk_data] Downloading package benepar_en3 to
[nltk_data]     C:\Users\sivan\AppData\Roaming\nltk_data...
[nltk_data]   Package benepar_en3 is already up-to-date!
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [3]:
#process golden sets for eval

gold_sets = pd.read_json('../Annotation/golden_sets_flattened.json').transpose()

gold_annotations = []
for index, row in gold_sets.iterrows():
  entry = {'sentence': row['sentence'], 'predicate': row['predicate'], 'clause': row['clause'], 'type': row['type']}
  gold_annotations.append(entry)
  print(entry)

gold_sentences = list(dict.fromkeys([g['sentence'] for g in gold_annotations]))
print(gold_sentences)


advers_sets = pd.read_json('../Annotation/adversarials_golden_set.json').transpose()
advers_sentences = []
for index, row in advers_sets.iterrows():
  advers_sentences.append(row['sentence'])

print(advers_sentences)


{'sentence': "Luminosity Gaming may be about to recruit a new CS:GO player, as VPEsports reports that Ricardo 'boltz' Prass is set to rejoin the side to replace Gustavo 'yeL' Knittel.", 'predicate': [{'str': 'reports', 'lemma': 'report', 'POS': 'VERB'}], 'clause': "that Ricardo 'boltz' Prass is set to rejoin the side to replace Gustavo 'yeL' Knittel", 'type': 'declarative'}
{'sentence': 'I have always loved fashion and believe that it plays an integral role in self expression', 'predicate': [{'str': 'believe', 'lemma': 'believe', 'POS': 'VERB'}], 'clause': 'that it plays an integral role in self expression', 'type': 'declarative'}
{'sentence': 'Being a Conroe and Woodlands Family Photographer, I find that emotions are the most noticeable thing in my photographs (especially since I shoot with a candid approach to my sessions)', 'predicate': [{'str': 'find', 'lemma': 'find', 'POS': 'VERB'}], 'clause': 'that emotions are the most noticeable thing in my photographs (especially since I shoo

In [5]:
#get verb list from MegaAcceptability
mega = pd.read_csv('mega-acceptability-v2\mega-acceptability-v2.tsv', sep='\t', header=0)
mega_verbs = mega['verb'].unique()

mega_singles = []
mega_doubles = []

for v in mega_verbs:
    if "_" in v:
        mega_doubles.append(v.split("_"))
    else:
        mega_singles.append(v)

print(mega_singles)
print(mega_doubles)

for d in mega_doubles:
    print(d)

doubles_end = [m[1] for m in mega_doubles]
doubles_start = [m[0] for m in mega_doubles]

['abhor', 'absolve', 'accept', 'acclaim', 'accredit', 'acknowledge', 'add', 'address', 'admire', 'admit', 'admonish', 'adore', 'advertise', 'advise', 'advocate', 'affect', 'affirm', 'afford', 'affront', 'aggravate', 'aggrieve', 'agitate', 'agonize', 'agree', 'aim', 'alarm', 'alert', 'allege', 'allow', 'alter', 'amaze', 'amuse', 'analyze', 'anger', 'anguish', 'annotate', 'announce', 'annoy', 'answer', 'anticipate', 'apologize', 'appall', 'appeal', 'appear', 'appease', 'applaud', 'apply', 'appoint', 'appraise', 'appreciate', 'approach', 'approve', 'argue', 'arouse', 'arrange', 'articulate', 'ascertain', 'ask', 'assert', 'assess', 'assign', 'assume', 'assure', 'astonish', 'astound', 'attempt', 'attest', 'audit', 'authorize', 'awe', 'babble', 'back', 'badger', 'baffle', 'bandy', 'banter', 'bargain', 'bark', 'be', 'beam', 'bear', 'befuddle', 'beg', 'begin', 'believe', 'belittle', 'bellow', 'beseech', 'bet', 'bewilder', 'bicker', 'bitch', 'blame', 'blare', 'blast', 'bleat', 'bless', 'blog', 

In [8]:
#verb matching - the baseline heuristics
verb_matcher = Matcher(nlp.vocab)

verb_patterns = []
#SINGLES = ["believe", "think", "know", "wonder", "hope", "fear", "like", "regret", "see", "ask", "say", "tell", "check", "decide", "determine", "consider"]
#DOUBLES = ["certain", "happy", "curious"]
SINGLES = mega_singles
DOUBLES = mega_doubles

for sing in SINGLES:
  pattern = [{"LEMMA": sing, "POS": "VERB"}]
  verb_patterns.append(pattern)

#for doub in DOUBLES:
#  pattern = [{"LEMMA": "be", "POS": "AUX"}, {"OP": "?"}, {"OP": "?"}, {"OP": "?"}, {"LEMMA": doub}]
#  verb_patterns.append(pattern)
for doub in DOUBLES:
  pattern = [{"LEMMA": doub[0], "POS": "AUX"}, {"OP": "?"}, {"OP": "?"}, {"OP": "?"}, {"LEMMA": doub[1]}]
  verb_patterns.append(pattern)


verb_matcher.add("embedding_verb", verb_patterns)

#clause matching
clause_matcher = Matcher(nlp.vocab)
pattern_d = [{"TEXT": "that", "POS": "SCONJ"}]
clause_matcher.add("declarative", [pattern_d])

pattern_i1 = [{"TEXT": "whether", "POS": "SCONJ"}]
clause_matcher.add("polar", [pattern_i1])

pattern_c1 = [{"TEXT": "who"}]
pattern_c2 = [{"TEXT": "what"}]
pattern_c3 = [{"TEXT": "when"}]
pattern_c4 = [{"TEXT": "where"}]
pattern_c5 = [{"TEXT": "why"}]
pattern_c6 = [{"TEXT": "how"}]
pattern_c7 = [{"TEXT": "which"}]
clause_matcher.add("constituent", [pattern_c1, pattern_c2, pattern_c3, pattern_c4, pattern_c5, pattern_c6, pattern_c7])


#this function takes nlp("sentence") as input and prints annotations
def get_baseline(sent, print_negs = False):
  embeddings = []
  embedded_clauses = []

  #get embedding predicates
  verb_matches = verb_matcher(sent)
  for match_id, start, end in verb_matches:
      emb = dict.fromkeys(["sentence", "predicate", "clause", "type"])
      full_pred = []

      #default values for sentence and for predicate annotation
      emb["sentence"] = sent.text
      full_pred.append({"str": sent[start].text, "lemma": sent[start].lemma_, 'POS': sent[start].pos_})
      
      #doubles values for predicate annotation
      if (sent[start].text in doubles_start) and (sent[end-1].text in doubles_end):
        full_pred.append({"str": sent[end-1].text, "lemma": sent[end-1].lemma_, 'POS': sent[end-1].pos_})

      #edge cases where there is a preposition, adverb, pronoun, or noun between the verb and the clause
      if (len(sent) > end) and (sent[end].pos_ == "ADP"):
        full_pred.append({"str": sent[end].text, "lemma": sent[end].lemma_, 'POS': sent[end].pos_})
        end += 1

      if (len(sent) > end) and (sent[end].pos_ in ["ADV", "PRON", "PROPN"]) and (not end < len(sent)) and (sent[end+1].pos_ in ["SCONJ", "PRON"]):
        end += 1

      emb["predicate"] = full_pred


      #get the potential clause
      clause = sent[end:]
      if len(clause) < 1:
        continue
      if clause[-1].is_punct:
        clause = clause[:-1]
      emb["clause"] = clause.text

      #find clause type
      clause_matches = clause_matcher(clause)
      clause_pos = [token.pos_ for token in clause]

      for match_id, start, end in clause_matches:
        clause_type = nlp.vocab.strings[match_id]
        emb["type"] = clause_type

        #separate polars from alternatives
        if clause_type == "polar":
          if (" or " in clause.text) and not (" or not " in clause.text) and not (" or not." in sent[end:].text):
            emb["type"] = "alternative"

        break

      #type declaraties without marks
      if clause_matches == [] and (clause_pos.count("VERB") ==1 or clause_pos.count("AUX") ==1):
        emb["type"] = "declarative"

      #if it was a genuine embedded clause add to the sentence's set of annotations
      if emb["type"] != None:
        embedded_clauses.append(emb)



  if embedded_clauses != []:
    for e in embedded_clauses:
      print(e)
      embeddings.append(e)

  elif print_negs == True:
    embeddings.append({'sentence': sent, 'predicate': [], 'clause': '', 'type': ''})
    print({'sentence': sent, 'predicate': [], 'clause': '', 'type': ''})

  return embeddings

In [9]:
#running baseline on golden sets
baseline_results = []
for s in gold_sentences:
  my_sent = nlp(s)
  outs = get_baseline(my_sent, print_negs = False)
  baseline_results += outs

You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


{'sentence': "Luminosity Gaming may be about to recruit a new CS:GO player, as VPEsports reports that Ricardo 'boltz' Prass is set to rejoin the side to replace Gustavo 'yeL' Knittel.", 'predicate': [{'str': 'recruit', 'lemma': 'recruit', 'POS': 'VERB'}], 'clause': "a new CS:GO player, as VPEsports reports that Ricardo 'boltz' Prass is set to rejoin the side to replace Gustavo 'yeL' Knittel", 'type': 'declarative'}
{'sentence': "Luminosity Gaming may be about to recruit a new CS:GO player, as VPEsports reports that Ricardo 'boltz' Prass is set to rejoin the side to replace Gustavo 'yeL' Knittel.", 'predicate': [{'str': 'GO', 'lemma': 'go', 'POS': 'VERB'}], 'clause': "player, as VPEsports reports that Ricardo 'boltz' Prass is set to rejoin the side to replace Gustavo 'yeL' Knittel", 'type': 'declarative'}
{'sentence': "Luminosity Gaming may be about to recruit a new CS:GO player, as VPEsports reports that Ricardo 'boltz' Prass is set to rejoin the side to replace Gustavo 'yeL' Knittel."

In [10]:
#running baseline on adversarial set
baseline_advers = []
for s in advers_sentences:
  my_sent = nlp(s)
  outs = get_baseline(my_sent, print_negs = False)
  baseline_advers += outs

{'sentence': 'Shower Doors is a relatively small company that can offer you more personalized service, which is something not offered by many companies today.', 'predicate': [{'str': 'offer', 'lemma': 'offer', 'POS': 'VERB'}], 'clause': 'you more personalized service, which is something not offered by many companies today', 'type': 'constituent'}
{'sentence': 'Hardware is very well made, offering a diverse selection of style and color', 'predicate': [{'str': 'made', 'lemma': 'make', 'POS': 'VERB'}], 'clause': ', offering a diverse selection of style and color', 'type': 'declarative'}
{'sentence': 'Now that you know your priorities and price range, use the criteria to narrow your home search', 'predicate': [{'str': 'use', 'lemma': 'use', 'POS': 'VERB'}], 'clause': 'the criteria to narrow your home search', 'type': 'declarative'}
{'sentence': 'Start with my Featured Listings to see examples of beautiful North Shore homes', 'predicate': [{'str': 'Start', 'lemma': 'start', 'POS': 'VERB'}, 

In [11]:
#stats for evaluating baseline against golden set - detection

#get predicate lists for sentences
def get_preds(annotations):
  preds = dict.fromkeys(gold_sentences, [])
  for a in annotations:
    sentence = a['sentence']
    predicate = a['predicate']
    if preds[sentence] == []:
      preds[sentence] = [predicate]
    else:
      preds[sentence] += [predicate]
  return preds

preds_gold = get_preds(gold_annotations)
preds_baseline = get_preds(baseline_results)

#print(preds_gold)
#print(preds_baseline)

#get f1
def get_f1(tp, fp, fn):
  precision = tp/(tp+fp)
  recall = tp/(tp+fn)
  f1 = (2*precision*recall)/(precision + recall)
  return [precision, recall, f1]

#detection - single clause
tp = 0
fp = 0
fn = 0

for s in gold_sentences:
  gp = preds_gold[s]
  bp = preds_baseline[s]

  if len(gp) != 1:
    continue

  if len(gp) == len(bp):
    tp += len(gp)
  elif len(gp) > len(bp):
    fn += len(gp) - len(bp)
    tp += len(bp)
  elif len(gp) < len(bp):
    fp += len(bp) - len(gp)
    tp += len(gp)

print("---------Embedded Clause Detection, Single Clause F1---------")
print("True Positives: ", tp, "\nFalse Positives: ", fp, "\nFalse Negatives: ", fn)
print("Precision, Recall, F1: ", get_f1(tp, fp, fn))
print("Accuracy: ", (tp/(tp+fp+fn)))
print()

#detection - multiple clause
tp = 0
fp = 0
fn = 0

for s in gold_sentences:
  gp = preds_gold[s]
  bp = preds_baseline[s]

  if len(gp) == 1:
    continue

  if len(gp) == len(bp):
    tp += len(gp)
  elif len(gp) > len(bp):
    fn += len(gp) - len(bp)
    tp += len(bp)
  elif len(gp) < len(bp):
    fp += len(bp) - len(gp)
    tp += len(gp)

print("---------Embedded Clause Detection, Multi Clause F1---------")
print("True Positives: ", tp, "\nFalse Positives: ", fp, "\nFalse Negatives: ", fn)
print("Precision, Recall, F1: ", get_f1(tp, fp, fn))
print("Accuracy: ", (tp/(tp+fp+fn)))
print()

#detection - overall
tp = 0
tn = len(advers_sentences) - len(baseline_advers)
fp = len(baseline_advers)
fn = 0

for s in gold_sentences:
  gp = preds_gold[s]
  bp = preds_baseline[s]

  if len(gp) == len(bp):
    tp += len(gp)
  elif len(gp) > len(bp):
    fn += len(gp) - len(bp)
    tp += len(bp)
  elif len(gp) < len(bp):
    fp += len(bp) - len(gp)
    tp += len(gp)

print("---------Embedded Clause Detection, Overall F1---------")
print("True Positives: ", tp, "\nTrue Negatives", tn, "\nFalse Positives: ", fp, "\nFalse Negatives: ", fn)
print("Precision, Recall, F1: ", get_f1(tp, fp, fn))
print("Accuracy: ", ((tp+tn)/(tp+tn+fp+fn)))
print()



---------Embedded Clause Detection, Single Clause F1---------
True Positives:  354 
False Positives:  297 
False Negatives:  24
Precision, Recall, F1:  [0.543778801843318, 0.9365079365079365, 0.6880466472303207]
Accuracy:  0.5244444444444445

---------Embedded Clause Detection, Multi Clause F1---------
True Positives:  127 
False Positives:  47 
False Negatives:  22
Precision, Recall, F1:  [0.7298850574712644, 0.8523489932885906, 0.7863777089783281]
Accuracy:  0.6479591836734694

---------Embedded Clause Detection, Overall F1---------
True Positives:  481 
True Negatives 44 
False Positives:  411 
False Negatives:  46
Precision, Recall, F1:  [0.5392376681614349, 0.9127134724857685, 0.6779422128259337]
Accuracy:  0.5346232179226069



In [12]:
#stats for evaluating baseline against golden set - accuracy

gold_per_sent = dict.fromkeys(gold_sentences, [])
for g in gold_annotations:
  if gold_per_sent[g['sentence']] == []:
    gold_per_sent[g['sentence']] = [g]
  else:
    gold_per_sent[g['sentence']] += [g]

base_per_sent = dict.fromkeys(gold_sentences, [])
for g in baseline_results:
  if base_per_sent[g['sentence']] == []:
    base_per_sent[g['sentence']] = [g]
  else:
    base_per_sent[g['sentence']] += [g]

def get_counts(my_list):
  my_list = [str(item) for item in my_list]
  counts = dict.fromkeys(my_list, 0)
  for item in my_list:
    counts[item] += 1
  return counts

#predicate identification
true_preds = 0
for b in gold_sentences:
  golds = gold_per_sent[b]
  base = base_per_sent[b]
  gpred = ["".join([gg['lemma'] for gg in g['predicate']]) for g in golds]
  bpred = ["".join([bb['lemma'] for bb in b['predicate']]) for b in base]
  gfreq = get_counts(gpred)
  bfreq = get_counts(bpred)
  for key in gfreq:
    if key in bfreq:
      if gfreq[key] == bfreq[key]:
        true_preds += gfreq[key]
      elif gfreq[key] > bfreq[key]:
        true_preds += bfreq[key]
      else:
        true_preds += gfreq[key]

print("---------Embedding Predicate Accuracy---------")
print(true_preds/tp)
print()

#clause identification
true_clause = 0
for b in gold_sentences:
  golds = gold_per_sent[b]
  base = base_per_sent[b]
  gclause = [g['clause'] for g in golds]
  bclause = [b['clause'] for b in base]
  gfreq = get_counts(gclause)
  bfreq = get_counts(bclause)
  for key in gfreq:
    if key in bfreq:
      if gfreq[key] == bfreq[key]:
        true_clause += gfreq[key]
      elif gfreq[key] > bfreq[key]:
        true_clause += bfreq[key]
      else:
        true_clause += gfreq[key]

print("---------Embedded Clause Accuracy---------")
print(true_clause/tp)
print()

#typing
true_type = 0
for b in gold_sentences:
  golds = gold_per_sent[b]
  base = base_per_sent[b]
  gtype = [g['type'] for g in golds]
  btype = [b['type'] for b in base]
  gfreq = get_counts(gtype)
  bfreq = get_counts(btype)
  for key in gfreq:
    if key in bfreq:
      if gfreq[key] == bfreq[key]:
        true_type += gfreq[key]
      elif gfreq[key] > bfreq[key]:
        true_type += bfreq[key]
      else:
        true_type += gfreq[key]

print("---------Embedded Clause Type Accuracy---------")
print(true_type/tp)
print()

---------Embedding Predicate Accuracy---------
0.7941787941787942

---------Embedded Clause Accuracy---------
0.4968814968814969

---------Embedded Clause Type Accuracy---------
0.9355509355509356

