In [1]:
#ONLY IF RUNNING IN COLAB USING TRIP_PARSES FROM DRIVE
# from google.colab import drive
# drive.mount('/content/drive')
# !cp /content/drive/MyDrive/trip_parses.json /content

Mounted at /content/drive


In [None]:
!pip install neuralcoref spacy==2.1.0 numpy
!python -m spacy download en_core_web_sm

In [None]:
!git clone https://github.com/sled-group/Verifiable-Coherent-NLU.git

In [4]:
import json
import copy
import numpy as np
import spacy
import random
import neuralcoref
from collections import Counter

nlp = spacy.load("en_core_web_sm")
neuralcoref.add_to_pipe(nlp)

attrs = ["temperature", "clean", "h_wet", "mixed", "hygiene", "wearing", "contain", "wet", "solid", "edible", "pieces", "moveable", "power", "exist", "functional", "conscious", "running", "open"]
NATTRS = len(attrs)
#"location", "h_location"

sc_to_precond = ['_', 'f', 't', 't', 'f', '_', '_', 'f', 't']
sc_to_effect = ['_', 'f', 't', 'f', 't', 'f', 't', '_', '_']

attr_defaults = {attr: ('t' if attr in {'conscious', 'functional', 'exist', 'moveable'} else '_') for attr in attrs}

100%|██████████| 40155833/40155833 [00:01<00:00, 37495232.22B/s]


In [5]:
with open('Verifiable-Coherent-NLU/all_data/www.json') as f:
  trip = json.load(f)
with open('trip_parses.json') as f:
  parses = json.load(f)

for spl in list(trip.keys()):
  for suid in list(trip[spl].keys()):
    if 'states' not in trip[spl][suid] or trip[spl][suid]['length'] != len(trip[spl][suid]['states']) or trip[spl][suid]['length'] != len(trip[spl][suid]['sentences']):
      del trip[spl][suid]

test_aug = trip['test'].copy()
negkeys = [x for x in test_aug if '-' not in x]
numneg = len(negkeys)
numpos = len(test_aug) - numneg
for i in range(numpos - numneg):
  newk = random.choice(negkeys)
  i = 0
  while 1:
    if (newk + '_' + str(i)) not in test_aug: break
    i += 1
  test_aug[newk + '_' + str(i)] = test_aug[newk].copy()

In [6]:
def filter_state_changes(dstate, obj):
  rule_sc = []
  for attr, changes in dstate.items():
    for cobj, sc in changes:
      if cobj == obj and sc != 0:
        rule_sc.append((attr, sc))
  return rule_sc

def get_sc(dstate, obj, attr):
  if attr not in dstate: return 0
  for cobj, sc in dstate[attr]:
    if obj in cobj: return sc
  return 0

def spacy_is_ancestor(anc, child):
  while 1:
    if child == anc: return True
    if child == child.head: return False
    child = child.head

#Rule Class/Prototype Generation

In [7]:
rule_protos = []
spacy_cache = {}

for suid, story in trip['train'].items():
  for sentidx in range(story['length']):
    sentence = story['sentences'][sentidx]
    dstate = story['states'][sentidx]
    parse = parses[sentence]

    tracked_objects = set(story['objects'].split(', '))
    for sc in dstate.values():
      tracked_objects |= set([x[0] for x in sc])

    if sentence in spacy_cache: 
      doc = spacy_cache[sentence]
    else:
      doc = nlp(sentence)
      spacy_cache[sentence] = doc

    head_idx_to_chunk = {x.root.i: x for x in doc.noun_chunks}
    for chunk in doc.noun_chunks:
      obj = None
      for x in tracked_objects:
        if x in chunk.text:
          obj = x
          break
      if not obj: continue #this means this chunk isn't a tracked object
      
      head_start, head_end, headdep = None,None,None
      for src, dst, dep in parse: #find predicate (assume direct head)
        if dst-1 == chunk.root.i and doc[src-1].tag_.startswith('VB'):
          head_start = src-1
          head_end = head_start+1
          while head_end < len(doc) and doc[head_end].dep_ == 'prt': head_end += 1 #expand for phrasal verbs
          headdep = dep
          break

      if not headdep: continue

      aux_deps = []
      for src, dst, dep in parse: #find auxiliary dependants of predicate
        if src-1 == head_start and dst-1 != chunk.root.i and dst-1 in head_idx_to_chunk:
          aux_deps.append((dep, head_idx_to_chunk[dst-1].text))

      rule_crit = [doc[head_start:head_end].text, headdep, aux_deps] #(pred, target_dep, [(dep, head), ...])

      rule_sc = filter_state_changes(dstate, obj)

      # print()
      # print(rule_crit)
      # print(rule_sc)
      rule_protos.append((rule_crit, rule_sc, sentence))

In [8]:
print(random.choice(rule_protos))

(['jumped', 'nsubj', [('obj', 'the tree')]], [('h_location', 2), ('conscious', 2)], 'Ann jumped off the tree.')


##Get thist and rcrules
thist[ruleclass][attr][auxpair] = [0, 0, 0, 6, 6, 3, 0...] = (list of sc's)

rcrules[ruleclass][auxpair] = {(attr1, sc1), ...} (used ONLY in non-prob version)

In [9]:
thist = {}

for crit, sc, sent in rule_protos:
  pred, hdep, auxdep = crit

  #rattrs = {a: {} for a in attrs}
  for sattr in attrs:
    #if sattr not in rattrs: rattrs[sattr] = {}
    sctl = [x[1] for x in sc if x[0] == sattr] #get sct
    if sctl: sct = sctl[0]
    else: sct = 0

    #if auxdep and pred == 'put' and hdep == 'obj': print('start')
    #hist = {}
    if (pred, hdep) not in thist: thist[(pred, hdep)] = {}
    if sattr not in thist[(pred, hdep)]: thist[(pred, hdep)][sattr] = {}
    hist = thist[(pred, hdep)][sattr]

    for adep, aobj in auxdep:
      if adep == 'nsubj': continue #usually not relevant

      hn = (adep, aobj)
      if hn not in hist: hist[hn] = []
      hist[hn].append(sct)

      if (adep, '*') not in hist: hist[(adep, '*')] = []
      hist[(adep, '*')].append(sct)

      if ('*', '*') not in hist: hist[('*', '*')] = []
      hist[('*', '*')].append(sct)

      #if pred == 'put' and hdep == 'obj': print(adep, aobj, hist)

    #if hist: rattrs[sattr] = hist
  #thist[(pred, hdep)] = rattrs

rcrules = {}
for rp, rattrs in thist.items():
  rcrule = {}
  for attr, hist in rattrs.items():
    for k, v in hist.items():
      mcsc, mccount = Counter(v).most_common(1)[0]
      if mcsc != 0: #and mccount/len(v) > 0.9
        if k not in rcrule: rcrule[k] = set()
        rcrule[k].add((attr, mcsc))

  for k in list(rcrule.keys()): #remove redundancies
      if ('*', '*') in rcrule and k != ('*', '*'):
        rcrule[k] -= rcrule[('*', '*')]
      if k[1] != '*' and (k[0], '*') in rcrule:
        rcrule[k] -= rcrule[(k[0], '*')]
      if not rcrule[k]: del rcrule[k]
  if rcrule: rcrules[rp] = rcrule

# # print([(k, set(v), scipy.stats.entropy(list(Counter(v).values())), len(v)) for k, v in hist.items() if any(v)]) #ignore those with no SC
# # print([(k, set(v), scipy.stats.entropy(list(Counter(v).values())), len(v)) for k, v in histwc.items() if any(v)])
# print([(k, set(v), {k2: x/len(v) for k2, x in Counter(v).items()}, len(v)) for k, v in hist.items() if any(v)]) #ignore those with no SC
# #print([(k, set(v), {k2: x/len(v) for k2, x in Counter(v).items()}, len(v)) for k, v in histwc.items() if any(v)])

print(thist[('put', 'obj')])
print(rcrules[('put', 'obj')])
print()

print(thist[('washed', 'obj')])
print(rcrules.keys())
print(rcrules[('washed', 'obj')])

{'temperature': {('obl:in', 'the microwave'): [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6], ('obl:in', '*'): [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

##Predict prob state changes

In [10]:
def prob_scs_for_dataset(dataset, rules):

  num_total, num_correct = 0, 0

  res = {}

  for suid, story in dataset.items():
    
    tracked_objects = {x.strip() for x in story['objects'].split(',') if x.strip()} | {ob for sent in story['states'] for attrchanges in sent.values() for ob, _ in attrchanges}

    story_scs = []
    for sentidx, sentence in enumerate(story['sentences']):
      sent_scs = {}

      dstate = story['states'][sentidx]
      parse = parses[sentence]

      #doc = spacy_cache[sentence]
      if sentence in spacy_cache: 
        doc = spacy_cache[sentence]
      else:
        doc = nlp(sentence)
        spacy_cache[sentence] = doc

      if max(parse, key=lambda x: x[1])[1]-1 >= len(doc): #tokenization disagreement between spacy and corenlp (hyphen)
        story_scs.append({ob: {attr: np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0]) for attr in attrs} for ob in tracked_objects})
        continue

      head_idx_to_chunk = {x.root.i: x for x in doc.noun_chunks}

      for chunk in doc.noun_chunks:    
        obj = None
        for x in tracked_objects:
          # if (x not in chunk.text) and (chunk._.coref_cluster and x in chunk._.coref_cluster.main.text):
          #   print('successful coref:', chunk._.coref_cluster.main, chunk, sentence)
          if (x in chunk.text) or (chunk._.coref_cluster and x in chunk._.coref_cluster.main.text):
            obj = x
            break

        if not obj: continue #this means this chunk isn't a tracked object

        predicted_scs = {attr: np.zeros(9, dtype=np.float32) for attr in attrs} #for this object

        head_start, head_end, headdep = None,None,None
        for src, dst, dep in parse: #find predicate (assume direct head)
          #Below does not account for cases where larger noun phrase refers to object (eg "the rest of the milk")
          #if dst-1 != chunk.root.i: continue #edge destination is not target
          if not spacy_is_ancestor(doc[dst-1], chunk.root): continue

          head_start = src-1
          head_end = head_start+1
          while head_end < len(doc) and doc[head_end].dep_ == 'prt': head_end += 1 #expand for phrasal verbs
          pred = doc[head_start:head_end].text

          if (pred, dep) in rules: #matches rule prototype
            rcrule = rules[(pred, dep)]

            aux_match = ('*', '*') if ('*', '*') in rcrule.keys() else None #0 = no match, 1 = (*,*), 2 = (-,*), 3 = (-,-)
            for src2, dst2, dep2 in parse: #find a relevant edge for this auxdep
              for auxdep, auxval in rcrule.keys(): #FIX dep2=='*'
                if not (src2-1 == head_start and dep2 == auxdep): continue

                #if suid == '26-C0' and obj == 'milk':
                #  print((pred, dep), obj, (auxdep, auxval), head_idx_to_chunk[dst2-1].text if dst2-1 in head_idx_to_chunk else auxval == doc[dst2-1].text)

                if auxval == '*':
                  if aux_match[0] == '*': aux_match = (auxdep, auxval)
                elif (auxval == head_idx_to_chunk[dst2-1].text if dst2-1 in head_idx_to_chunk else auxval == doc[dst2-1].text): #bad workaround for tokenizing differences between corenlp and spacy
                  #if suid == '26-C0' and obj == 'milk': print('BIG MATCH!!')
                  aux_match = (auxdep, auxval)
                  break
            #if suid == '26-C0' and obj == 'milk': print(aux_match)
            if aux_match:
              for attr, ctr in rcrule[aux_match].items():
                cnts = np.array([ctr[x] for x in range(9)], dtype=np.float32)
                predicted_scs[attr] += cnts/(cnts.sum() + 1e-6)
        for attr in attrs:
          if not any(predicted_scs[attr]): del predicted_scs[attr]
          else: predicted_scs[attr] /= predicted_scs[attr].sum()
        sent_scs[obj] = predicted_scs
      story_scs.append(sent_scs)
    res[suid] = story_scs
  return res

In [11]:
#thist_cnt = {rc: {attr: {auxtup: Counter(hist) for auxtup, hist in attrhist.items() if any(hist)} for attr, attrhist in rchist.items()} for rc, rchist in thist.items()}
#thist_cnt[('bought', 'obj')]

thist_cnt = {}
for rc, rchist in thist.items():
  thist_cnt[rc] = {}
  for attr, attrhist in rchist.items():
    for auxtup, hist in attrhist.items():
      if any(hist): #ignore if all 0's/irrelevant
        if auxtup not in thist_cnt[rc]: thist_cnt[rc][auxtup] = {}
        thist_cnt[rc][auxtup][attr] = Counter(hist)

prob_scs = prob_scs_for_dataset(trip['test'], thist_cnt) #prob_scs[storyid][sentidx][obj][attr] = distribution

##Precision and recall for state change predictor

In [12]:
correct, total = 0, 0

gt = set()
gtprec, gteff = set(), set()
for suid, story in trip['test'].items():
  for i, sent in enumerate(story['states']):
    for attr, attrscs in sent.items():
      if attr not in attrs: continue
      for obj, sc in attrscs:
        if sc != 0: 
          gt.add((suid, i, obj, attr, sc))
          if sc_to_precond[sc] != '_': gtprec.add((suid, i, obj, attr, sc_to_precond[sc]))
          if sc_to_effect[sc] != '_': gteff.add((suid, i, obj, attr, sc_to_effect[sc]))

pscs = set()
pscprec, psceff = set(), set()
for suid, story in prob_scs.items():
  for i, sent in enumerate(story):
    for obj, objattrs in sent.items():
      for attr, scdist in objattrs.items():
        sc = np.argmax(scdist)
        if sc != 0: 
          pscs.add((suid, i, obj, attr, sc))
          if sc_to_precond[sc] != '_': pscprec.add((suid, i, obj, attr, sc_to_precond[sc]))
          if sc_to_effect[sc] != '_': psceff.add((suid, i, obj, attr, sc_to_effect[sc]))

fps = pscs - gt
fns = gt - pscs
tps = gt & pscs

print('overall states:')
print('\tprec:', len(tps)/(len(tps) + len(fps)))
print('\trec:', len(tps)/(len(tps) + len(fns)))

fpsprec = pscprec - gtprec
fnsprec = gtprec - pscprec
tpsprec = gtprec & pscprec

print('preconditions:')
print('\tprec:', len(tpsprec)/(len(tpsprec) + len(fpsprec)))
print('\trec:', len(tpsprec)/(len(tpsprec) + len(fnsprec)))

fpseff = psceff - gteff
fnseff = gteff - psceff
tpseff = gteff & psceff

print('effects:')
print('\tprec:', len(tpseff)/(len(tpseff) + len(fpseff)))
print('\trec:', len(tpseff)/(len(tpseff) + len(fnseff)))

overall states:
	prec: 0.8093389352762858
	rec: 0.5150525442945865
preconditions:
	prec: 0.8343577135832821
	rec: 0.5452703072793997
effects:
	prec: 0.836028158169719
	rec: 0.5372503975967485


##Prob story sim

In [13]:
def story_sim(story_scs, objs, actor):
  objs = set(objs) | set([ob for sent in story_scs for ob in sent])
  # for o in [ob for sent in story_scs for ob in sent]:
  #   if not any([x in o for x in objs]):
  #     objs.add(o)
  objs.discard('')
  #print(objs)
  
  states = [{ob: {attr: [1.0 if attr in {'conscious', 'exist', 'functional', 'moveable'} else 0.5, np.array([1.0] + [0.0]*len(story_scs)), '_', '_'] for attr in attrs} for ob in objs}] #last element = attribute was assumed (no affector)
  #below: potentially redudant??
  states[0][actor] = {attr: [1.0 if attr in {'conscious', 'exist', 'functional', 'moveable'} else 0.5, np.array([1.0] + [0.0]*len(story_scs)), '_', '_'] for attr in attrs}

  cfprobs = {}
  for i, scst in enumerate(story_scs):
    #print(states['milk']['exist'])
    states.append(copy.deepcopy(states[i]))

    for ob, ob_scs in scst.items():
      for attr, sc in ob_scs.items():
        if 'location' in attr: continue #ignore location for now

        if ob not in objs:
          for o in objs:
            if o in ob or ob in o: ob = o
        st = states[i][ob][attr][0]

        pcneg = sc[1] + sc[4] + sc[7]
        pcpos = sc[2] + sc[3] + sc[8]
        pcnone = sc[0] + sc[5] + sc[6]

        effneg = sc[1] + sc[3] + sc[5]
        effpos = sc[2] + sc[4] + sc[6]
        effunk = sc[7] + sc[8] #unknown (probability = 0.5)
        effna = sc[0] #irrelevant (probability = passthrough)

        cfprobs[(i, ob, attr)] = pcpos*(1 - st) + pcneg*st

        states[i+1][ob][attr][0] = effpos + 0.5*effunk + states[i][ob][attr][0]*effna #1.0*effpos + 0.0*effneg + 0.5*effunk + <last state>*effna
        states[i+1][ob][attr][1] *= effna #probability of affectors being previous sentences multiplied by likelihood of passthrough 
        states[i+1][ob][attr][1][i+1] = 1 - effna
        states[i+1][ob][attr][2] = ['f', 't', '_'][np.argmax([pcneg, pcpos, pcnone])]
        states[i+1][ob][attr][3] = ['f', 't', '_'][np.argmax([effneg, effpos, effunk+effna])]
  #print(states['milk']['exist'])
  return cfprobs, states

def get_best_conflict(cfprobs_src, states_src, cfprobs_other):
  if not cfprobs_src:
    return None, 0, 0
  for cfcause, cprob in sorted(cfprobs_src.items(), key=lambda x: x[1], reverse=True):
    #if (cfcause, cprob) in cfprobs_other.items(): continue #ignore if also present in other story

    bp, bpobj, bpattr = cfcause
    #if bp == 0: continue

    evidence = np.argmax(states_src[bp][bpobj][bpattr][1])
    if evidence == 0: continue #if evidence is defaults, ignore

    cf1cause, cf1max = cfcause, cprob
    cf1ev = evidence
    return cf1cause, cf1max, cf1ev

  #print('no good conflict found')
  cf1cause, cf1max = max(cfprobs_src.items(), key=lambda x: x[1])
  bp, bpobj, bpattr = cf1cause 
  ev = np.argmax(states_src[bp][bpobj][bpattr][1])
  return cf1cause, cf1max, ev

def which_is_plaus(res1, res2): #returns which one is PLAUSIBLE
  cfprobs1, states1 = res1
  cfprobs2, states2 = res2

  plaus_story = 0

  cf1cause, cf1max, cf1ev = get_best_conflict(cfprobs1, states1, cfprobs2)
  cf2cause, cf2max, cf2ev = get_best_conflict(cfprobs2, states2, cfprobs1)

  if not cf1cause: return 1, 0, 0, (None, None, '_'), (None, None, '_')
  if not cf2cause: return 2, 0, 0, (None, None, '_'), (None, None, '_')

  plaus_story = 1 if cf1max < cf2max else 2 #if max conflict likelihood is lower for cf1, say it is plausible
  i_cause, i_max, i_ev, i_states = (cf2cause, cf2max, cf2ev, states2) if cf1max < cf2max else (cf1cause, cf1max, cf1ev, states1) 
  i_bp, i_bpobj, i_bpattr = i_cause

  bpprecond = i_states[i_bp+1][i_bpobj][i_bpattr][2] #plus one because first entry is before anything - NOT ANYMORE!!
  eveffect = i_states[i_ev][i_bpobj][i_bpattr][3]

  return plaus_story, i_bp, i_ev, (i_bpobj, i_bpattr, bpprecond), (i_bpobj, i_bpattr, eveffect)

##Use ground truth state labels

In [14]:
#use ground truth state labels
#prob_scs[storyid][sentidx][obj][attr] = distribution
gt_prob_scs = {suid: [{obj: {attr: np.eye(9)[get_sc(sentstates, obj, attr)] for attr in attrs} for obj in {o for p in sentstates.values() for o, _ in p}} for sentstates in story['states']] for suid, story in trip['test'].items()}
#print(prob_scs['100'][0])

##Get set of stories with at least one evidence-breakpoint conflict

In [15]:
#determine how many sentences are actually verifiable with conflicting state changes

verf_stories = dict()

nevverf, nstoryverf, evtot, storytot = 0, 0, 0, 0
for suid, story in trip['test'].items():
  if story['plausible']: continue

  bp = story['breakpoint']
  bpscs = {attr: dict(attrscs) for attr, attrscs in story['states'][bp].items()}

  storyverf = False
  for ev in story['confl_sents']:
    evverf = False #if there is any conflict for this evidence
    for attr in attrs:
      for ob, evsc in story['states'][ev][attr]:
        if evsc == 0: continue #if doesn't affect attribute, can't be evidence
        if ob not in bpscs[attr]: continue #if breakpoint doesn't care about object, can't be evidence

        prec = sc_to_precond[bpscs[attr][ob]]
        if prec == '_': continue #if no precondition, can't be conflict for this (attr,ob)

        if sc_to_effect[evsc] != prec:
          evverf = True
          break
      if evverf: break
    if evverf:
      storyverf = True
      nevverf += 1
    evtot += 1
  if storyverf:
    nstoryverf += 1
    verf_stories[suid] = story
  storytot += 1

print(nevverf/evtot)
print(nstoryverf/storytot)

0.23859386686611816
0.2340909090909091


##Simulate!

In [16]:
USE_GROUND_TRUTH_STATE_CHANGES = False
USE_REFINED_TEST_SET = False

if USE_REFINED_TEST_SET:
  #use only stories that have a state confict between evidence and breakpoint
  spl = copy.deepcopy(verf_stories)
  spl.update({suid: story for suid, story in trip['test'].items() if story['plausible']})
else:
  spl = trip['test']

sim_prob_scs = gt_prob_scs if USE_GROUND_TRUTH_STATE_CHANGES else prob_scs

spairs = {p: [q for q in spl.keys() if q.startswith(p + '-')] for p in spl.keys() if '-' not in p}

correct, consis, verif, total = 0, 0, 0, 0

random.seed(73)

for plaus_id, confl_ids in spairs.items():
  p_res = story_sim(sim_prob_scs[plaus_id], [x.strip() for x in spl[plaus_id]['objects'].split(',')], spl[plaus_id]['actor'])
  for confl_id in confl_ids:
    c_res = story_sim(sim_prob_scs[confl_id], [x.strip() for x in spl[confl_id]['objects'].split(',')], spl[confl_id]['actor'])

    correct_answer = 1 if random.random() < 0.5 else 2

    pred_plaus, bp, ev, bpprecond, eveffect = which_is_plaus(p_res if correct_answer==1 else c_res, p_res if correct_answer==2 else c_res) 

    bpprecondobj, bpprecondattr, bpprecondstate = bpprecond
    eveffectobj, eveffectattr, eveffectstate = eveffect

    if pred_plaus == correct_answer: 
      correct += 1
      #print(spl[confl_id]['breakpoint'], bp, spl[confl_id]['confl_sents'], ev)
      if (spl[confl_id]['breakpoint'] == bp and (ev-1) in spl[confl_id]['confl_sents']) or ((ev, bp) in spl[confl_id]['confl_sents']):
        consis += 1

        #print(spl[confl_id]['states'][bp], bpprecondobj, bpprecondattr)
        #print(spl[confl_id]['states'][ev], eveffectobj, eveffectattr)

        #need to force non-default here
        if bpprecondstate == sc_to_precond[get_sc(spl[confl_id]['states'][bp], bpprecondobj, bpprecondattr)] \
          and eveffectstate == sc_to_effect[get_sc(spl[confl_id]['states'][ev-1], eveffectobj, eveffectattr)] \
          and eveffectstate != attr_defaults[attr]:
          verif += 1

    total += 1
print('accuracy: %.1f%%' % (100*correct/total))
print('consistency: %.1f%% (relative to acc: %.1f%%)' % (100*consis/total, 100*consis/correct))
print('verifiability: %.1f%% (relative to acc: %.1f%%)' % (100*verif/total, 100*verif/correct))

accuracy: 54.9%
consistency: 9.3% (relative to acc: 17.0%)
verifiability: 6.1% (relative to acc: 11.2%)
