# TARGETED ERROR ANALYSIS

This notebook contains the code used to perform a targeted error analysis on the model output. It uses the get_groupins() function as well as parts of the get_results() function code. The notebook is organised by generalisation case. For each case, the first cell defines the path to the model output file being analysed. The next cell runs a general evaluation of the results which calculates the number of cases which include index errors, missing elements, and inserted elements in the predicted LFs, as well as the overall number of incorrect predictions. This code is the same for all generalisation cases. The last cell of code is specific to the targeted focus of each gen case, checking for different specific errors depending on the requirements of the task. 

# Functions

In [1]:
def get_groupings(lf):
    """
    extract the relational groupings from an LF structure 
    - the reasoning behind this is to check indexing, which draws out these relational groupings

    lf : can be either predicted or target LF string
    """
    
    import re
    #lf = re.split(';|AND', lf_string)

    rel_list = []
    for item in lf:
        if "," in item:
            rel_list.append(item)    

    for item in rel_list:
        if item in lf:
            lf.remove(item)

    rel_groups = []
    #print(lf)
    
    for relation in rel_list:
        group = []
        role = relation.split(" (")[0]
        if '.' in role:
            role = role.split('.')
            group.append(role[0])
            group.append(role[1])
        else:
            group.append(role)

        indxs = re.findall(r'\d+', relation)

        if " ( ( " in relation:
            #print(relation)
            continue 
        if " (" not in relation:
            continue 
        if len(indxs) < 1:
            continue 
        if len(indxs) == 1:
            ind1 = indxs[0]
            #print(relation)
            name = relation.split(" (")[1].split(',')[1]
            name = re.findall("[a-zA-Z]+", name)
            if len(name) < 1:
                name = relation.split(" (")[1].split(',')[0]
                group.append(name)
            else:
                group.append(name[0])
            for item in pred:
                if f' {ind1} ' in item:
                    group.append(item.split(" (")[0])
        else:
            ind1 = indxs[0]
            ind2 = indxs[1]
            for item in lf:
                if f' {ind1} ' in item:
                    group.append(item.split(" (")[0])
                elif f' {ind2} ' in item:
                    group.append(item.split(" (")[0])
        #print(group)
                
        rel_groups.append(group)
            
    return rel_groups

# Object Modification to Subject Modification 

In [15]:
#gen_path = "/Users/marina/Desktop/RESULTS/Rpos_Transformr/SEED66/PRED_66_ende_transformer_recogspos_v1_cogs.tsv"
#gen_path = "/Users/marina/Desktop/RESULTS/C_Transformer/SEED88/PRED_88_ende_transformer_cogs_v1_cogs.tsv"
gen_path = "/Users/marina/Desktop/RESULTS/R_LSTM/SEED42/PRED_42_ende_lstm_recogs_v1_cogs.tsv"
#gen_path = "/Users/marina/Desktop/RESULTS/R_Transformer/SEED66/PRED_66_ende_transformer_recogs_v1_cogs.tsv"

In [16]:
import re 
import pprint

run = gen_path.split('/')[-1].split('_') # isolates the name of the file, which includes details on the run (ex seed, model, data info), example ['PRED', '42', 'ende', 'transformer', 'recogs', 'v1', 'cogs.tsv']
seed = run[1]
model = run[3]
dataset = run[4]

with open(gen_path, 'r') as infile:
    content_t = []
    rows = infile.read().split('\n') 
    for r in rows:
        column = r.split('\t')
        content_t.append(column)

#general checks : only predict lexical items present in input, keep track of variable naming (indexing)
inserted = 0
missing = 0
mis_index = 0
total = 0
incorrect = 0
inc_cases = []
countmult=0
nmodmis_count = 0
for line in content_t[1:-1]:
    #total+=1

    sent = line[0]
    pred = line[1]
    target = line[2]
    cat = line[3]

    if cat == ' obj_pp_to_subj_pp':
        total+=1
    
        pred = re.split(';|AND', pred)
        target = re.split(';|AND', target)
        
        p_elements = []
        t_elements = []
        
        for item in pred:
            item = item.split('(')
            p_elements.append(item[0])
        for item in target:
            item = item.split('(')
            t_elements.append(item[0])
        
        case_missing=0
        for el in t_elements:
            if el not in p_elements:
                case_missing+=1
                if 'nmod' in el:
                    nmodmis_count +=1
            
        if case_missing > 0:
            missing+=1
            

        ins_els = []
        case_inserted=0
        for el in p_elements:
            if el not in t_elements:
                case_inserted+=1
                ins_els.append(el)
                
        if case_inserted > 0:
            inserted+=1
    
        ind_group_pred = get_groupings(pred)
        ind_group_target = get_groupings(target)
        ind_group_pred.sort()
        ind_group_target.sort()
        
        case_misindex = 0
        for group_p, group_t in zip(ind_group_pred, ind_group_target):
            if group_p != group_t:
                count=0
                    
                case_misindex +=1
                
        
        if case_misindex > 0:
            mis_index += 1

        if case_misindex > 0 or case_inserted > 0 or case_missing > 0:
            incorrect+=1
            inc_cases.append(line)
        
            

error_pattern = {'total': total, 'incorrect': incorrect, 'missing': missing, 'inserted': inserted, 'index error': mis_index}
print(error_pattern)
print(nmodmis_count)


851
{'total': 1000, 'incorrect': 996, 'missing': 973, 'inserted': 851, 'index error': 994}
604


In [22]:
import re 
import pprint

run = gen_path.split('/')[-1].split('_') # isolates the name of the file, which includes details on the run (ex seed, model, data info), example ['PRED', '42', 'ende', 'transformer', 'recogs', 'v1', 'cogs.tsv']
seed = run[1]
model = run[3]
dataset = run[4]

with open(gen_path, 'r') as infile:
    content_t = []
    rows = infile.read().split('\n') 
    for r in rows:
        column = r.split('\t')
        content_t.append(column)

# targeted check
missing_subj = 0  ## count number of cases where main subject is missing
multiple_subj = 0 ## count number of cases where main subject is instantiated more than once 
inc_relation = 0  ## count number of cases where relevant relation incorrectly excludes main subject 
inc_nmod = 0   ## count number of cases where there's an nmod related mistake 
missing_nmod = 0 ## count number of cases where nmod element is missing 
missing_rel = 0 ## count number of cases where the relevant thematic relation is missing
countsame=0
misfirstnmod=0
missing_verb=0
nonmod_count = 0
inserted_nmod = 0
nmodmissing_subj = 0
relmissing_subj = 0
relmissing_verb = 0

for line in inc_cases:
    #total+=1
    
    sent = line[0]
    
    pred = line[1]
    target = line[2]
    cat = line[3]
    
    if cat == ' obj_pp_to_subj_pp':

        pred = re.split(';|AND', pred)
        target = re.split(';|AND', target)

        sent_list = sent.split(" ")
        main_subj = sent_list[1]

        notsubj_count = 0
        subj_count = 0
        for item in pred:
            if f'{main_subj} (' in item:
                subj_count+=1
                subj_ind = re.findall(r'\d+', item)

            else:
                notsubj_count +=1
                
        if notsubj_count == len(pred):
            missing_subj +=1 
        if subj_count > 1:
            multiple_subj += 1

        if f' {main_subj} . nmod ' not in line[1]:
            misfirstnmod+=1

        count=0
        for item in target:
            if ',' in item and 'nmod' not in item:
                count+=1
                if count==1:
                    verb_ind = re.findall(r'\d+', item.split(',')[0])
                    verb = item.split('.')[0]
                   # for item in target: ## UNCOMMENT FOR ReCOGS
                    #    if verb_ind[0] in item and ',' not in item:
                     #       print(item)
                     #       verb = item.split('(')[0]
                #print(verb)

        if verb not in line[1]:
            missing_verb+=1
        
        ind_group_pred = get_groupings(pred)
        ind_group_target = get_groupings(target)
        ind_group_pred.sort()
        ind_group_target.sort()

        target_nmod_count=0
        for t_group in ind_group_target:
            for item in t_group:
                if main_subj in item:
                    if ' nmod ' in t_group:
                        nmod_group = t_group
                    else:
                        rel_group = t_group
            if ' nmod ' in t_group:
                target_nmod_count+=1
        
        norel_count = 0 
        if nmod_group not in ind_group_pred:
            inc_nmod +=1
        if rel_group not in ind_group_pred:
            inc_relation +=1

        current_verbmiscount = relmissing_verb
        current_relinccount = inc_relation
        pred_nmod_count=0
        no_subj=0
        misverbcount = 0
        missubjcount = 0
        for p_group in ind_group_pred:
            if ' nmod ' in p_group:
                pred_nmod_count+=1
                if f' {main_subj}' not in p_group:
                    no_subj+=1
            if rel_group[0] in p_group:
                #print(ind_group_pred)
                if f' {main_subj}' in p_group or f' * {main_subj}' in p_group :
                   #print(rel_group)
                   if rel_group[2] not in p_group:
                       #relmissing_verb +=1  
                       misverbcount +=1
                else:
                   #relmissing_subj+=1
                    missubjcount+=1
                    if rel_group[2] not in p_group:
                       #relmissing_verb +=1
                       misverbcount+=1
                   
            else:
                norel_count+=1
                

        if misverbcount > 0:
            relmissing_verb+=1
        if missubjcount > 0:
            relmissing_subj+=1
        if no_subj == pred_nmod_count:
            nmodmissing_subj+=1
        if pred_nmod_count != target_nmod_count:
            if pred_nmod_count < target_nmod_count:
                missing_nmod+=1
            if pred_nmod_count == 0:
                nonmod_count+=1
            if pred_nmod_count > target_nmod_count:
                inserted_nmod+=1
     
        if norel_count == len(ind_group_pred):
            missing_rel+=1
        

print(f'TOTAL: {len(inc_cases)}')
print(f'SUBJECT -----------------------------')
print(f'SUBJ OMISSION: {missing_subj}')
print(f'SUBJ DUPLICATION: {multiple_subj}')
print(f'RELATION -----------------------------')
print(f'RELATION ERROR: {inc_relation}')
print(f'RELATION ROLE MISSING: {missing_rel}')
print(f'RELATION MAIN SUBJ MISSING: {relmissing_subj}')
print(f'RELATION VERB MISSING/MISPREDICTED: {relmissing_verb}')
print(f'PP ELEMENT -----------------------------')
print(f'NMOD ERROR: {inc_nmod}')
print(f'NMOD MISSING: {missing_nmod}')
print(f'NMOD INSERTED: {inserted_nmod}')
print(f'NMOD MAIN SUBJ MISSING: {nmodmissing_subj}')

print(countsame)
print(misfirstnmod)
print(missing_verb)


TOTAL: 996
SUBJECT -----------------------------
SUBJ OMISSION: 0
SUBJ DUPLICATION: 9
RELATION -----------------------------
RELATION ERROR: 702
RELATION ROLE MISSING: 29
RELATION MAIN SUBJ MISSING: 197
RELATION VERB MISSING/MISPREDICTED: 629
PP ELEMENT -----------------------------
NMOD ERROR: 970
NMOD MISSING: 524
NMOD INSERTED: 52
NMOD MAIN SUBJ MISSING: 969
0
996
996


# PP Recurssion

In [39]:
gen_path = "/Users/marina/Desktop/RESULTS/R_LSTM/SEED42/PRED_42_ende_lstm_recogs_v1_cogs.tsv"
#gen_path = "/Users/marina/Desktop/RESULTS/Rpos_LSTM/SEED88/PRED_88_ende_lstm_recogspos_v1_cogs.tsv"

In [40]:
import re 
import pprint

run = gen_path.split('/')[-1].split('_') # isolates the name of the file, which includes details on the run (ex seed, model, data info), example ['PRED', '42', 'ende', 'transformer', 'recogs', 'v1', 'cogs.tsv']
seed = run[1]
model = run[3]
dataset = run[4]

with open(gen_path, 'r') as infile:
    content_t = []
    rows = infile.read().split('\n') 
    for r in rows:
        column = r.split('\t')
        content_t.append(column)

#general checks : only predict lexical items present in input, keep track of variable naming (indexing)
inserted = 0
missing = 0
mis_index = 0
total = 0
incorrect = 0
inc_cases = []
countmult=0
total_countnmod=0
countnmod=0
count_insnmod=0
ccount=0

for line in content_t[1:-1]:
    #total+=1

    sent = line[0]
    pred = line[1]
    target = line[2]
    cat = line[3]

    if cat == ' pp_recursion':
        total+=1
    
        pred = re.split(';|AND', pred)
        target = re.split(';|AND', target)
        
        p_elements = []
        t_elements = []
        
        for item in pred:
            item = item.split('(')
            p_elements.append(item[0])
        for item in target:
            item = item.split('(')
            t_elements.append(item[0])
        
        case_missing=0
        missingnmod=0
        for el in t_elements:
            if el not in p_elements:
                case_missing+=1
                if 'nmod' in el:
                    #if 'hippo' not in line[1]:
                    total_countnmod+=1
                    missingnmod+=1
       
        if case_missing > 0:
            missing+=1
        if missingnmod > 0:
            countnmod+=1
            

        ins_els = []
        case_inserted=0
        ins_countnmod=0
        for el in p_elements:
            if el not in t_elements:
                case_inserted+=1
                ins_els.append(el)
                if 'nmod' in el:
                    ins_countnmod+=1
                else:
                    if '.' in el:
                        ccount+=1
             

       # if len(p_elements) > len(t_elements):
        #    print(p_elements)
         #   print(t_elements)
          #  print()

        if ins_countnmod > 0:
            count_insnmod+=1
        if case_inserted > 0:
            inserted+=1

        #print(ins_els)  ## to check if there are more than one inserted element in prediction (i.e. more than just the subj element)
        #if len(ins_els)>1:
         #   print(sent)
          #  print(pred)
           # print(target)
    
        ind_group_pred = get_groupings(pred)
        ind_group_target = get_groupings(target)
        ind_group_pred.sort()
        ind_group_target.sort()
        
        case_misindex = 0
        for group_p, group_t in zip(ind_group_pred, ind_group_target):
            if group_p != group_t:
                count=0
         
                case_misindex +=1
                
        if case_misindex > 1:
            countmult+=1
        if case_misindex > 0:
            mis_index += 1
       
        if case_misindex > 0 or case_inserted > 0 or case_missing > 0:
            incorrect+=1
            inc_cases.append(line)
       
            

error_pattern = {'total': total, 'incorrect': incorrect, 'missing': missing, 'inserted': inserted, 'index error': mis_index}
#print(countmult)
print(error_pattern)
print(total_countnmod)
print(countnmod)
print(count_insnmod)
print(ccount)

{'total': 1000, 'incorrect': 1000, 'missing': 983, 'inserted': 760, 'index error': 1000}
2302
856
2
0


In [41]:
## Overview of targeted check

## make a list of nouns in target LF (i.e. identify all elements in LF without ',', so only one index, and put in a list)
#   problem with recogs LF because verbs will also appear here, can avoid by looking for agent element, verb will always be mapped
##  to an agent element, find the agent in target LF, find the item its mapped to, remove this from the list of nouns

## check that all nouns from list are present in predicted
#   check for nmod relations by identifying all nmods (with their prepositions) in target LF, identifying the first index its mapped to
##  checking the noun this corresponds to and then making this check in predicted LF to make sure its the same

## as an extra check, check the theme element in the sentence and that its mapped to correct object
#   again, identify theme in target, identify object its mapped to, check for this mapping in predicted



In [42]:
# import re 
import pprint

run = gen_path.split('/')[-1].split('_') # isolates the name of the file, which includes details on the run (ex seed, model, data info), example ['PRED', '42', 'ende', 'transformer', 'recogs', 'v1', 'cogs.tsv']
seed = run[1]
model = run[3]
dataset = run[4]

with open(gen_path, 'r') as infile:
    content_t = []
    rows = infile.read().split('\n') 
    for r in rows:
        column = r.split('\t')
        content_t.append(column)

# targeted check
noun_error = 0 ## count number of cases where there is an error with the predicted nouns
missing_noun = 0 ## count nunber of cases where a noun from the different levels of PP is missing
inserted_noun = 0 ## count number of cases where a noun not from input is inserted
nmod_error = 0 ## count number of cases where there is an error with the predicted nmod relations
missing_nmod = 0 ## count number of cases where an nmod relation is missing
inc_nmodmap = 0 ## count number of cases where an nmod relation is not mapped to correct noun
inserted_nmod = 0 ## count number of cases where an nmod relation is incorrectly inserted
theme_error = 0 ## count number of cases where the theme element is incorrect, not mapped to correct object element

correctnmod = [] ## append number of correct nmod element predictions per case
incorrectnmod = [] ## append number of incorrect nmod element predictions per case

for line in inc_cases:
    #total+=1
    
    sent = line[0]
    
    pred = line[1]
    target = line[2]
    cat = line[3]
    
    if cat == ' pp_recursion':

        pred = re.split(';|AND', pred)
        target = re.split(';|AND', target)

        noun_list = []
        nmod_list = []
        for el in target:
            if ',' not in el:
                noun_list.append(el.split('(')[0])
            elif 'agent' in el:
                if '.' in el:
                    verb = el.split('.')[0]
                else:
                    el = el.split(',')[0]
                    verb_ind = re.findall(r'\d+', el)
                    for el in target:
                        if ',' not in el and verb_ind[0] in el:
                            verb = el.split('(')[0]
            elif 'nmod' in el:
                nmod_list.append(el.split('(')[0])
            elif 'theme' in el:
                el = el.split(',')[1]
                obj_ind = re.findall(r'\d+', el)
                for el in target:
                    if ',' not in el and obj_ind[0] in el:
                        obj = el.split('(')[0]

        
        if verb[0] in noun_list:
            noun_list.remove(verb[0])

        pred_nounlist = []
        pred_nmodlist = []
        for el in pred:
            if ',' not in el:
                pred_nounlist.append(el.split('(')[0])
            elif 'agent' in el:
                if '.' in el:
                    verb = el.split('.')[0]
                else:
                    el = el.split(',')[0]
                    verb_ind = re.findall(r'\d+', el)
                    for el in pred:
                        if ',' not in el and verb_ind[0] in el:
                            verb = el.split('(')[0]
            elif 'nmod' in el:
                pred_nmodlist.append(el.split('(')[0]) 
            elif 'theme' in el:
                el = el.split(',')[1]
                mapped_obj = re.findall(r'\d+', el)
                if len(mapped_obj) > 0:
                    for el in pred:
                        if ',' not in el and mapped_obj[0] in el:
                            pred_obj = el
        if verb[0] in noun_list:
            pred_nounlist.remove(verb[0])

        pred_nounlist.sort()
        noun_list.sort()
        pred_nmodlist.sort()
        nmod_list.sort()
        
        if pred_nounlist != noun_list:
            noun_error+=1
            if len(pred_nounlist) > len(noun_list):
                inserted_noun+=1
            elif len(pred_nounlist) < len(noun_list):
                missing_noun+=1

        if pred_nmodlist != nmod_list:
            nmod_error+=1
            if len(pred_nmodlist) > len(nmod_list):
                inserted_nmod+=1
            if len(pred_nmodlist) < len(nmod_list):
                missing_nmod+=1
           
        ind_group_pred = get_groupings(pred)
        ind_group_target = get_groupings(target)
        ind_group_pred.sort()
        ind_group_target.sort()
        
        correct_prednmod=0
        incorrect_prednmod=0
        nmod_maperror=0
        nmodstring = ''.join(nmod_list)
        for item in pred_nmodlist:
            if item in nmodstring:
                correct_prednmod+=1
                itemlist = item.split('.') ## for COGS LF structure
                #itemlist.remove(itemlist[-1])
                #itemstring = ''.join(itemlist)
                itemstring = item
                for group in ind_group_pred:
                    groupstring = ''.join(group)
                   # if itemstring in groupstring:      
                    if len(itemlist) > 1:
                        if itemlist[0] in groupstring and itemlist[1] in groupstring:
                            p_grouping = group
                for tgroup in ind_group_target:
                    t_groupstring = ''.join(tgroup)
                    if len(itemlist) > 1:
                        if itemlist[0] in t_groupstring and itemlist[1] in t_groupstring:
                   # if itemstring in t_groupstring:
                            t_grouping = tgroup
                if len(itemlist) > 1:
                    if p_grouping != t_grouping:
                        nmod_maperror+=1
            else:
                incorrect_prednmod+=1
        if nmod_maperror > 0:
            inc_nmodmap+=1
        
        correctnmod.append(correct_prednmod)
        incorrectnmod.append(incorrect_prednmod)
        
        if obj != pred_obj:
            theme_error+=1
        if 'theme' not in line[1]:
            theme_error+=1
       
    

avg_correctnmod = sum(correctnmod)/len(correctnmod)
avg_incorrectnmod = sum(incorrectnmod)/len(incorrectnmod)

print(f'TOTAL: {len(inc_cases)}')
print(f'NP -----------------------------')
print(f'NOUN ERROR: {noun_error}')
print(f'NOUN MISSING: {missing_noun}')
print(f'NOUN INSERTED: {inserted_noun}')
print(f'NMOD RELATION -----------------------------')
print(f'NMOD ERROR: {nmod_error}')
print(f'NMOD MISSING: {missing_nmod}')
print(f'NMOD INSETED: {inserted_nmod}')
print(f'NMOD MAPPING ERROR: {inc_nmodmap}')
print(f'AVERAGE CORRECT NMOD ELEMENT {avg_correctnmod} AND INCORRECT {avg_incorrectnmod}')
print(f'THEME RELATION -----------------------------')
print(f'THEME ERROR: {theme_error}')


TOTAL: 1000
NP -----------------------------
NOUN ERROR: 969
NOUN MISSING: 919
NOUN INSERTED: 0
NMOD RELATION -----------------------------
NMOD ERROR: 1000
NMOD MISSING: 1000
NMOD INSETED: 0
NMOD MAPPING ERROR: 973
AVERAGE CORRECT NMOD ELEMENT 2.007 AND INCORRECT 0.002
THEME RELATION -----------------------------
THEME ERROR: 1000


# Theme NP to Objected Omitted Transitive Subject

In [13]:
#gen_path = "/Users/marina/Desktop/RESULTS/C_LSTM/SEED66/PRED_66_ende_lstm_cogs_v1_cogs.tsv"
gen_path = "/Users/marina/Desktop/FullReCOGS/[100Epoch]LSTM_77/PRED_77_ende_lstm_recogs_v1_cogs.tsv"

In [19]:
import re 
import pprint

run = gen_path.split('/')[-1].split('_') # isolates the name of the file, which includes details on the run (ex seed, model, data info), example ['PRED', '42', 'ende', 'transformer', 'recogs', 'v1', 'cogs.tsv']
seed = run[1]
model = run[3]
dataset = run[4]

with open(gen_path, 'r') as infile:
    content_t = []
    rows = infile.read().split('\n') 
    for r in rows:
        column = r.split('\t')
        content_t.append(column)

#general checks : only predict lexical items present in input, keep track of variable naming (indexing)
inserted = 0
missing = 0
mis_index = 0
total = 0
incorrect = 0
inc_cases = []
countmult=0
counthip=0

for line in content_t[1:-1]:
    #total+=1

    sent = line[0]
    pred = line[1]
    target = line[2]
    cat = line[3]

    if cat == ' only_seen_as_unacc_subj_as_obj_omitted_transitive_subj':
        total+=1
    
        pred = re.split(';|AND', pred)
        target = re.split(';|AND', target)
        
        p_elements = []
        t_elements = []
        
        for item in pred:
            item = item.split('(')
            p_elements.append(item[0])
        for item in target:
            item = item.split('(')
            t_elements.append(item[0])
        
        case_missing=0
        for el in t_elements:
            if el not in p_elements:
                case_missing+=1
                if 'hip' in el:
                    #if 'hippo' not in line[1]:
                    counthip+=1
        
        if case_missing > 0:
            missing+=1
            

        ins_els = []
        case_inserted=0
        for el in p_elements:
            if el not in t_elements:
                case_inserted+=1
                ins_els.append(el)
        
        if case_inserted > 0:
            inserted+=1

        #print(ins_els)  ## to check if there are more than one inserted element in prediction (i.e. more than just the subj element)
        #if len(ins_els)>1:
         #   print(sent)
          #  print(pred)
           # print(target)
    
        ind_group_pred = get_groupings(pred)
        ind_group_target = get_groupings(target)
        ind_group_pred.sort()
        ind_group_target.sort()
        
        case_misindex = 0
        for group_p, group_t in zip(ind_group_pred, ind_group_target):
            if group_p != group_t:
                count=0
         
                case_misindex +=1
                
        if case_misindex > 1:
            countmult+=1
        if case_misindex > 0:
            mis_index += 1
    
       
        if case_misindex > 0 or case_inserted > 0 or case_missing > 0:
            incorrect+=1
            #inc_cases.append(line)
            #print(sent)
            

error_pattern = {'total': total, 'incorrect': incorrect, 'missing': missing, 'inserted': inserted, 'index error': mis_index}
#print(countmult)
print(error_pattern)
print(counthip)

{'total': 1000, 'incorrect': 194, 'missing': 174, 'inserted': 174, 'index error': 194}
173


In [29]:
## Overview of Targeted Check

### targeted check the main subject element (which has only been seen as an unaccusative object) and its agent linked element
##  can also check for any theme elements to make sure the main subject isn't being linked to those \

## main subject always 'hippo' (only seen once in training so can except misprediction) 

# Olivia wished that a hippo dusted .	
# wish . agent ( x _ 1 , Olivia ) AND wish . ccomp ( x _ 1 , x _ 5 ) AND hippo ( x _ 4 ) AND dust . agent ( x _ 5 , x _ 4 )	 
# TARGET FOCUS: [ hippo ( x _ 4 ) ] AND dust . [ agent ] ( x _ 5 , [ x _ 4 ] )

In [18]:
import re 
import pprint

run = gen_path.split('/')[-1].split('_') # isolates the name of the file, which includes details on the run (ex seed, model, data info), example ['PRED', '42', 'ende', 'transformer', 'recogs', 'v1', 'cogs.tsv']
seed = run[1]
model = run[3]
dataset = run[4]

with open(gen_path, 'r') as infile:
    content_t = []
    rows = infile.read().split('\n') 
    for r in rows:
        column = r.split('\t')
        content_t.append(column)

# targeted check
inc_subj = 0  ## count number of cases where main subject is missing/mispredicted
subjindex_omission = 0 ## count number of cases where main subject (if present) is missing an index label
inc_agent = 0 ## count number of cases where main agent element is incorrect
missing_agentrel = 0 ## count number of cases where 'mainverb . agent' element is not present
agentrel_indexerror = 0 ## count number of cases where wrong index is used in 'mainverb . agent' element 
theme_error = 0 ## count number of cases where main subject is incorrectly linked to a theme element 
mispredicted_rel = 0 ## count number of cases where main agent element may have been mispredicted in form of a theme element


for line in inc_cases:
    #total+=1
    
    sent = line[0]
    
    pred = line[1]
    target = line[2]
    cat = line[3]
    
    if cat == ' only_seen_as_unacc_subj_as_obj_omitted_transitive_subj':

        pred = re.split(';|AND', pred)
        target = re.split(';|AND', target)

        for el in target:
            if 'hippo' in el:
                subj_ind = re.findall(r'\d+', el)
                subj_position = target.index(el)
        for el in target:
            if ' agent ' in el:
                el_ind = el.split(',')
                verb_ind = re.findall(r'\d+', el_ind[0])
              
        for el in target:
            if verb_ind[0] in el and ',' not in el:
                main_verb = el.split('(')[0]
                   
               # el_ind = el.split(',')[1]
               # if subj_ind[0] in el_ind:
                #    main_verb = el.split('.')[0]

        missing_count = 0
        for el in pred:
            if ' hippo ' not in el and ' * hippo ' not in el:
                missing_count+=1
                pred_subjind = []
            else:
                pred_subjind = re.findall(r'\d+', el) # for ReCOGS main subject index and COGS if index is wrong
                
            if main_verb in el:
                ind = re.findall(r'\d+', el)
                if len(ind) > 0:
                    pred_verb_ind = ind[0]
            
        if len(pred_subjind) == 0:
            subjindex_omission+=1
        
        if missing_count == len(pred):
            inc_subj += 1
          #  if '.' not in pred[subj_position]:
            #    possible_misprediction = pred[subj_position]  ## if hippo is not in LF, using the index of target LF to find the possible element mispredicted in its place
             #   pred_subjind = re.findall(r'\d+', possible_misprediction)
              #  print(possible_misprediction)
              #  print(pred)

        themecount=0
        indexerror=0
        countmissing_agent = 0
       # if f'{main_verb}. agent ' not in line[1]:
        #print(pred_verb_ind[0])
        if f'agent ( {pred_verb_ind}' not in line[1]:
            missing_agentrel+=1
            countmissing_agent+=1
        for item in pred:
            if f'agent ( {pred_verb_ind}' in item:
               # if subj_ind[0] not in item: ## Comment out for ReCOGS
                #    indexerror+=1
                if len(pred_subjind) > 0:
                    if pred_subjind[0] not in item:
                        indexerror+=1
            if 'theme' in item:
                if subj_ind[0] in item:
                    themecount+=1
                  
                elif len(pred_subjind) > 0:
                    if pred_subjind[0] in item:
                        themecount+=1
        
        if indexerror > 0:
            agentrel_indexerror+=1
            
        if themecount > 0:
            theme_error+=1

        if missing_agentrel > 0 and themecount > 0:
            mispredicted_rel += 1
            
        
        ind_group_pred = get_groupings(pred)
        ind_group_target = get_groupings(target)
        ind_group_pred.sort()
        ind_group_target.sort()


print(f'TOTAL: {len(inc_cases)}')
print(f'SUBJECT -----------------------------')
print(f'SUBJ OMISSION/MISPREDICTION: {inc_subj}')
#print(f'SUBJ MISSING INDEX: {subjindex_omission}')
print(f'AGENT RELATION -----------------------------')
print(f'AGENT ERROR: {inc_agent}')
print(f'AGENT MISSING: {missing_agentrel}')
print(f'AGENT INDEX ERROR: {agentrel_indexerror}')
#print(f'RELATION VERB MISSING/MISPREDICTED: {relmissing_verb}')
print(f'THEME RELATION -----------------------------')
print(f'THEME MAPPED TO SUBJECT: {theme_error}')
print(f'THEME POSSIBLY INSERTED IN PLACE OF AGENT: {mispredicted_rel}')

TOTAL: 0
SUBJECT -----------------------------
SUBJ OMISSION/MISPREDICTION: 0
AGENT RELATION -----------------------------
AGENT ERROR: 0
AGENT MISSING: 0
AGENT INDEX ERROR: 0
THEME RELATION -----------------------------
THEME MAPPED TO SUBJECT: 0
THEME POSSIBLY INSERTED IN PLACE OF AGENT: 0


# Object to Subject (Common Noun)

In [8]:
gen_path = "/Users/marina/Desktop/FullReCOGS/[100Epoch]LSTM_77/PRED_77_ende_lstm_recogs_v1_cogs.tsv"
#gen_path = "/Users/marina/Desktop/RESULTS/R_Transformer/SEED66/PRED_66_ende_transformer_recogs_v1_cogs.tsv"

In [1]:
import re 
import pprint

run = gen_path.split('/')[-1].split('_') # isolates the name of the file, which includes details on the run (ex seed, model, data info), example ['PRED', '42', 'ende', 'transformer', 'recogs', 'v1', 'cogs.tsv']
seed = run[1]
model = run[3]
dataset = run[4]

with open(gen_path, 'r') as infile:
    content_t = []
    rows = infile.read().split('\n') 
    for r in rows:
        column = r.split('\t')
        content_t.append(column)

#general checks : only predict lexical items present in input, keep track of variable naming (indexing)
inserted = 0
missing = 0
mis_index = 0
total = 0
incorrect = 0
inc_cases = []
countmult=0

for line in content_t[1:-1]:
    #total+=1

    sent = line[0]
    pred = line[1]
    target = line[2]
    cat = line[3]

    if cat == ' obj_to_subj_common':
        total+=1
    
        pred = re.split(';|AND', pred)
        target = re.split(';|AND', target)
        
        p_elements = []
        t_elements = []
        
        for item in pred:
            item = item.split('(')
            p_elements.append(item[0])
        for item in target:
            item = item.split('(')
            t_elements.append(item[0])
        
        case_missing=0
        for el in t_elements:
            if el not in p_elements:
                case_missing+=1
              
        if case_missing > 0:
            missing+=1
            

        ins_els = []
        case_inserted=0
        for el in p_elements:
            if el not in t_elements:
                case_inserted+=1
                ins_els.append(el)
                
        if case_inserted > 0:
            inserted+=1

        #print(ins_els)  ## to check if there are more than one inserted element in prediction (i.e. more than just the subj element)
        #if len(ins_els)>1:
         #   print(sent)
          #  print(pred)
           # print(target)
    
        ind_group_pred = get_groupings(pred)
        ind_group_target = get_groupings(target)
        ind_group_pred.sort()
        ind_group_target.sort()
        
        case_misindex = 0
        for group_p, group_t in zip(ind_group_pred, ind_group_target):
            if group_p != group_t:
                count=0
                for i in group_t:
                    if 'cockroach' not in i.strip(' '):
                        count+=1
                case_misindex +=1
                
        if case_misindex > 1:
            countmult+=1
    
        if case_misindex > 0:
            mis_index += 1
            
        if case_misindex > 0 or case_inserted > 0 or case_missing > 0:
            incorrect+=1
            inc_cases.append(line)

error_pattern = {'total': total, 'incorrect': incorrect, 'missing': missing, 'inserted': inserted, 'index error': mis_index}
print(countmult)
print(error_pattern)

NameError: name 'gen_path' is not defined

In [None]:
## Overview of Targeted Check

## get the groupings (since already have code for that that works on cogs+recogs indexing) and 
##  output only the groupings with 'agent' and 'theme' roles, compare target to pred, print problem cases and analyse


In [2]:
import re 
import pprint

run = gen_path.split('/')[-1].split('_') # isolates the name of the file, which includes details on the run (ex seed, model, data info), example ['PRED', '42', 'ende', 'transformer', 'recogs', 'v1', 'cogs.tsv']
seed = run[1]
model = run[3]
dataset = run[4]

with open(gen_path, 'r') as infile:
    content_t = []
    rows = infile.read().split('\n') 
    for r in rows:
        column = r.split('\t')
        content_t.append(column)

# targeted check
inc_agent_rel = 0  ## count number of incorrect agent relations
inc_theme_rel = 0  ## count number of incorrect theme relations
inc_subj = 0   ## count number of cases which omit 'cockroach' subject
counttt=0

count_agenterror = 0
count_themeerror = 0

for line in inc_cases:
    #total+=1
    
    sent = line[0]
    
    pred = line[1]
    target = line[2]
    cat = line[3]
    
    if cat == ' obj_to_subj_common':

        pred = re.split(';|AND', pred)
        target = re.split(';|AND', target)

        missing_count = 0
        for i in pred:
            if ' cockroach ' not in i:
                #print(i)
                missing_count+=1
        
        if missing_count == len(pred):
            inc_subj += 1

        ind_group_pred = get_groupings(pred)
        ind_group_target = get_groupings(target)
        ind_group_pred.sort()
        ind_group_target.sort()

        count = 0
        #print()
        for t_grouping in ind_group_target:
            if ' agent' in t_grouping:
                group_ind = ind_group_target.index(t_grouping)
                if len(ind_group_pred) < (group_ind+1):
                    continue
                else:
                    pred_grouping = ind_group_pred[group_ind]
                    itemcount=0
                    if t_grouping != pred_grouping:
                        count+=1
                        inc_agent_rel+=1

            if ' theme' in t_grouping:
                group_ind = ind_group_target.index(t_grouping)
                if len(ind_group_pred) < (group_ind+1):
                    continue
                else:
                    pred_grouping = ind_group_pred[group_ind]
                    if t_grouping != pred_grouping:
                        inc_theme_rel+=1
               
        

print(f'TOTAL: {len(inc_cases)}')
print(f'SUBJ OMISSION: {inc_subj}')
print(f'AGENT ERROR: {inc_agent_rel}')
print(count_agenterror)
print(f'THEME ERROR: {inc_theme_rel}')
print(count_themeerror)


NameError: name 'gen_path' is not defined

# Active to Passive

In [2]:
gen_path = "/Users/marina/Desktop/FullReCOGS/[100Epoch]LSTM_77/PRED_77_ende_lstm_recogs_v1_cogs.tsv"

In [7]:
import re 
import pprint

run = gen_path.split('/')[-1].split('_') # isolates the name of the file, which includes details on the run (ex seed, model, data info), example ['PRED', '42', 'ende', 'transformer', 'recogs', 'v1', 'cogs.tsv']
seed = run[1]
model = run[3]
dataset = run[4]

with open(gen_path, 'r') as infile:
    content_t = []
    rows = infile.read().split('\n') 
    for r in rows:
        column = r.split('\t')
        content_t.append(column)

#general checks : only predict lexical items present in input, keep track of variable naming (indexing)
inserted = 0
missing = 0
mis_index = 0
total = 0
incorrect = 0
inc_cases = []
countmult=0

countlen = 0
for line in content_t[1:-1]:
    #total+=1

    sent = line[0]
    pred = line[1]
    target = line[2]
    cat = line[3]

    if cat == ' active_to_passive':
        total+=1
    
        pred = re.split(';|AND', pred)
        target = re.split(';|AND', target)
        
        p_elements = []
        t_elements = []
        
        for item in pred:
            item = item.split('(')
            p_elements.append(item[0])
        for item in target:
            item = item.split('(')
            t_elements.append(item[0])
        
        case_missing=0
        for el in t_elements:
            if el not in p_elements:
                case_missing+=1
            
        if case_missing > 0:
            missing+=1
            

        ins_els = []
        case_inserted=0
        for el in p_elements:
            if el not in t_elements:
                case_inserted+=1
                ins_els.append(el)
                                
        if case_inserted > 0:
            inserted+=1

        #print(ins_els)  ## to check if there are more than one inserted element in prediction (i.e. more than just the subj element)
        #if len(ins_els)>1:
         #   print(sent)
          #  print(pred)
           # print(target)
   
    
        ind_group_pred = get_groupings(pred)
        ind_group_target = get_groupings(target)
        ind_group_pred.sort()
        ind_group_target.sort()
        
        case_misindex = 0
        for group_p, group_t in zip(ind_group_pred, ind_group_target):
            if group_p != group_t:
                case_misindex +=1
          
        if case_misindex > 1:
            countmult+=1
        if case_misindex > 0:
            mis_index += 1
        if case_misindex > 0 or case_inserted > 0 or case_missing > 0:
            incorrect+=1
            inc_cases.append(line)
            
print(countlen)
error_pattern = {'total': total, 'incorrect': incorrect, 'missing': missing, 'inserted': inserted, 'index error': mis_index}
print(countmult)
print(error_pattern)

0
795
{'total': 1000, 'incorrect': 960, 'missing': 960, 'inserted': 953, 'index error': 960}


In [63]:
## Overview of Targeted Check

# construction of gen test sentences mostly "x was blessed by y", but some are "x was blessed."
## idea for targeted check, first look for "was" in sentence, then find the word that comes before it (this will should always be
##  the main element that in active construction would be agent but here want to make sure its theme), the verb will always be
##   "blessed" 
##  so then go to the groupings and find all relational groupings that include bless, check that the theme grouping includes
##   the main element we checked for before, also check for agent element and make sure it does NOT include this main passive element

# SO blessed_item = x, for pred_grouping: if 'blessed' in pred_grouping and if 'theme' in pred_grouping,
##   if blessed_item not in pred_grouping: count // print pred_grouping   ALSO COUNT if theres NO THEME grouping or NO BLESSED
## then do the same but for 'agent' in pred_grouping, and if blessed_item IN pred_grouping: count  IF NO AGENT, check sentence for 'by'

## ALSO check for incorrect verb prediction, because only one case of 'bless' in training 

In [33]:
import re 
import pprint

run = gen_path.split('/')[-1].split('_') # isolates the name of the file, which includes details on the run (ex seed, model, data info), example ['PRED', '42', 'ende', 'transformer', 'recogs', 'v1', 'cogs.tsv']
seed = run[1]
model = run[3]
dataset = run[4]

with open(gen_path, 'r') as infile:
    content_t = []
    rows = infile.read().split('\n') 
    for r in rows:
        column = r.split('\t')
        content_t.append(column)

# targeted check
inc_agent_rel = 0  ## count number of incorrect agent relations which include main theme element
inc_theme_rel = 0  ## count number of theme relations which incorrectly exclude main theme element
inc_verb = 0   ## count number of cases which omit 'cockroach' subject


for line in inc_cases:
    #total+=1
    
    sent = line[0]
    
    pred = line[1]
    target = line[2]
    cat = line[3]
    
    if cat == ' active_to_passive':

        pred = re.split(';|AND', pred)
        target = re.split(';|AND', target)

        sent_list = sent.split(" ")
        theme_item_index = sent_list.index("was")-1
        theme_item = sent_list[theme_item_index]

        misprediciton_list = []
        for item in target:
            if ' bless' in item:
                bless_indx = target.index(item)
                misprediction = pred[bless_indx].split('.')[0].strip(' ')
                misprediciton_list.append(misprediction)
                               
        missing_count = 0
        for p in pred:
            if ' bless ' not in p:
                #print(i)
                missing_count+=1
            else:
                for t in target:
                    if ' bless . agent' in t and ' bless . agent' not in p:
                        missing_count+=1
        
        if missing_count == len(pred):
            inc_verb += 1

        ind_group_pred = get_groupings(pred)
        ind_group_target = get_groupings(target)
        ind_group_pred.sort()
        ind_group_target.sort()

        for p_grouping in ind_group_pred:
            if ' bless ' in p_grouping:
                if ' theme' in p_grouping:
                    if f' {theme_item}' in p_grouping or f' * {theme_item}' in p_grouping:
                        #print(p_grouping)
                        yes = 'yes' #not doing anything here, just continuing 
                    else:
                        inc_theme_rel+=1
            else:
                #inc_theme_rel+=1 ## comment this out if not not wanting to include bless misprediction
                for item in misprediciton_list:
                    if f' {item} ' in p_grouping:
                        if ' theme' in p_grouping:
                            if f' {theme_item}' in p_grouping or f' * {theme_item}' in p_grouping:
                                yes = 'yes' #not doing anything here, just continuing 
                                #print(p_grouping)
                            else:
                                inc_theme_rel+=1
                               

        count_incagent = 0
        for p_grouping in ind_group_pred:
            if ' bless ' in p_grouping:
                if ' agent' in p_grouping:
                    if f' {theme_item}' in p_grouping or f' * {theme_item}' in p_grouping:
                        inc_agent_rel+=1
            else:
                #count_incagent+=1
                #inc_agent_rel+=1 ## comment this out if not not wanting to include bless misprediction
                for item in misprediciton_list:
                    if f' {item} ' in p_grouping:
                        if ' agent' in p_grouping:
                            #count_incagent+=1
                            if f' {theme_item}' in p_grouping or f' * {theme_item}' in p_grouping:
                                inc_agent_rel+=1
        if count_incagent > 0:
            inc_agent_rel+=1
                

print(f'TOTAL: {len(inc_cases)}')
print(f'VERB OMISSION: {inc_verb}')
print(f'THEME ERROR: {inc_theme_rel}')
print(f'AGENT ERROR: {inc_agent_rel}')

TOTAL: 1000
VERB OMISSION: 1000
THEME ERROR: 0
AGENT ERROR: 0
