# 5 Argument Identification

This script provides a function to identify arguments per predicate <br>


*Input:*  
- executionMode_dict
- mode               -> ('production' / 'sample')
- model              -> ('train' / 'test')
- print_status       -> (True / False)
- sentence_limit = None  (limit of sentences to import (default: None)

*Output:* 
- executionMode_dict 


## Preparation

In [6]:
import pandas as pd
import numpy  as np

import warnings
warnings.filterwarnings('ignore')

## Reading data in

In [2]:
def identifyArguments(executionMode_dict,
                      mode,                   #('production' / 'sample')
                      model,                  #('train' / 'test')
                      print_status   = False,
                      sentence_limit = None):
    

    path_to_input = executionMode_dict[mode]['intermediate'][model]['04_FeaturesExtracted']
    path_to_save = '../data/intermediate/' + mode + '_' + model +'_05_identifiedArguments.csv'
    executionMode_dict[mode]['intermediate'][model]['05_identifiedArguments'] = path_to_save
    
    # read dataframe in
    df = pd.read_csv(path_to_input)
    
    # insert predicted predicates here
    df['predicate_prediction'] = False

    display(df.head(5))
    ## insert here
    
    
    ## variable
    #reference column for predicates :  prediction vs truth
    predicate_column = 'predicate_prediction'
    #predicate_column = 'predicate_gold'



    # assign only subset of dataframe with 
    # uncomment if you want to restrict for this or another sentence
    #df = df[df.sentenceId == 1].copy()


    # loop through sentences
    for s_id in df.sentenceId.unique():

        # filter for only this sentence
        df_sentence = df[df.sentenceId == s_id]

        # initiate np array to insert preditioncs
        predicates = np.full(len(df_sentence.id.unique()), False)

        ### find all predicates of that sentence 

        # loop through each repetition
        for s_rep in df_sentence.sentenceRepetition.unique():

            # create new subframe for working within this repetition of sentence
            df_sentence_repetition = df_sentence[df_sentence.sentenceRepetition == s_rep]


            ###  identify arguments (their indices)

            ## 1. identify id of predicate
            #temp = df_sentence_repetition[df_sentence_repetition[predicate_column] == True].id
            #if len(temp) == 1:
            #    predicate_id = int(temp)

            predicate_identification = np.where(df_sentence_repetition[predicate_column] == True)[0]
            if len(predicate_identification) == 1:
                index_of_pred = np.where(df_sentence_repetition[predicate_column] == True)[0][0]
                #index_of_pred

                predicates[index_of_pred] = True


        # loop through each repetition
        for s_rep in df_sentence.sentenceRepetition.unique():

            # create new subframe for working within this repetition of sentence
            df_sentence_repetition = df_sentence[df_sentence.sentenceRepetition == s_rep]

            # initiate np array to insert preditioncs
            pred = np.full(len(df_sentence_repetition), False)


            ###  identify arguments (their indices)

            ## 1. identify id of predicate
            temp = df_sentence_repetition[df_sentence_repetition[predicate_column] == True].id
            if len(temp) == 1:
                predicate_id = int(temp)
                #predicate_id = int(df_sentence_repetition[df_sentence_repetition[predicate_column] == True].id)

                ## RULE 1
                # ->take subset which have predicate id as head
                #df_potentials = df_sentence_repetition[df_sentence_repetition['head'] == predicate_id]
                pred = np.array(df_sentence_repetition['head'] == predicate_id)

                # set label_intent prediction to true
                #df_potentials.label_ident_prediction = True
                #display(df_potentials)


                ## RULE 2
                # -> exclude punctuation
                punct_index = df_sentence_repetition[df_sentence_repetition.dep == 'punct'].id - 1
                pred[punct_index] = False

                ## RULE 3
                # -> exclude all the identified predicates from arguments
                for i in range(len(pred)):
                    # if this token is a predicate
                    if predicates[i] == True:
                        pred[i] = False


            # assign value to all features
            df_sentence_repetition.label_ident_prediction = pred
    
    
    
    
    #write dataframe out
    df.to_csv(path_to_save, index=False)
    
    
    if print_status == True:
        
        print('\n\n#### 5 Argument Identification ####\n\n')
        print(' - completed')
    
    return executionMode_dict

In [3]:
'''#path_to_input = executionMode_dict[mode]['intermediate'][model]['04_FeaturesExtracted']
path_to_input = '../data/intermediate/' + 'sample' + '_' + 'train' + '_04_ExtractedFeatures.csv'
#executionMode_dict[mode]['intermediate'][model]['05_identifiedArguments'] = path_to_save

# read dataframe in
df = pd.read_csv(path_to_input)

# insert predicted predicates here
df['predicate_prediction'] = False

display(df.head(15))
## insert here
'''

"#path_to_input = executionMode_dict[mode]['intermediate'][model]['04_FeaturesExtracted']\npath_to_input = '../data/intermediate/' + 'sample' + '_' + 'train' + '_04_ExtractedFeatures.csv'\n#executionMode_dict[mode]['intermediate'][model]['05_identifiedArguments'] = path_to_save\n\n# read dataframe in\ndf = pd.read_csv(path_to_input)\n\n# insert predicted predicates here\ndf['predicate_prediction'] = False\n\ndisplay(df.head(15))\n## insert here\n"

In [4]:
'''## variable
#reference column for predicates :  prediction vs truth
predicate_column = 'predicate_prediction'
predicate_column = 'predicate_gold'



# assign only subset of dataframe with 
# uncomment if you want to restrict for this or another sentence
#df = df[df.sentenceId == 1].copy()


# loop through sentences
for s_id in df.sentenceId.unique():
    
    # filter for only this sentence
    df_sentence = df[df.sentenceId == s_id]
    
    # initiate np array to insert preditioncs
    predicates = np.full(len(df_sentence.id.unique()), False)
    
    ### find all predicates of that sentence 
    
    # loop through each repetition
    for s_rep in df_sentence.sentenceRepetition.unique():
        
        # create new subframe for working within this repetition of sentence
        df_sentence_repetition = df_sentence[df_sentence.sentenceRepetition == s_rep]
        
        
        ###  identify arguments (their indices)
        
        ## 1. identify id of predicate
        #print('\n*****', df_sentence_repetition[df_sentence_repetition[predicate_column] == True].id)
        #predicate_id = int(df_sentence_repetition[df_sentence_repetition[predicate_column] == True].id)
    
        predicate_identification = np.where(df_sentence_repetition[predicate_column] == True)[0]
        if len(predicate_identification) == 1:
            index_of_pred = np.where(df_sentence_repetition[predicate_column] == True)[0][0]
        index_of_pred
        
        predicates[index_of_pred] = True

    
    # loop through each repetition
    for s_rep in df_sentence.sentenceRepetition.unique():
        
        # create new subframe for working within this repetition of sentence
        df_sentence_repetition = df_sentence[df_sentence.sentenceRepetition == s_rep]
        
        # initiate np array to insert preditioncs
        # pred = np.full(len(df_sentence_repetition), False)
        
        
        ###  identify arguments (their indices)
        
        ## 1. identify id of predicate
        #print('\n*****', df_sentence_repetition[df_sentence_repetition[predicate_column] == True].id)
        #predicate_id = int(df_sentence_repetition[df_sentence_repetition[predicate_column] == True].id)
        
        ## RULE 1
        # ->take subset which have predicate id as head
        #df_potentials = df_sentence_repetition[df_sentence_repetition['head'] == predicate_id]
        pred = np.array(df_sentence_repetition['head'] == predicate_id)
        
        # set label_intent prediction to true
        #df_potentials.label_ident_prediction = True
        #display(df_potentials)
        
        
        ## RULE 2
        # -> exclude punctuation
        punct_index = df_sentence_repetition[df_sentence_repetition.dep == 'punct'].id - 1
        pred[punct_index] = False
        
        ## RULE 3
        # -> exclude all the identified predicates from arguments
        for i in range(len(pred)):
            # if this token is a predicate
            if predicates[i] == True:
                pred[i] = False

        
        # assign value to all features
        df_sentence_repetition.label_ident_prediction = pred
        
        
        
        
        
        print  ('\n** sentence : ', s_id, 
                '\n*  sentence repetition :', s_rep)
        display( df_sentence_repetition, '\n')
       
    #break
    '''

"## variable\n#reference column for predicates :  prediction vs truth\npredicate_column = 'predicate_prediction'\npredicate_column = 'predicate_gold'\n\n\n\n# assign only subset of dataframe with \n# uncomment if you want to restrict for this or another sentence\n#df = df[df.sentenceId == 1].copy()\n\n\n# loop through sentences\nfor s_id in df.sentenceId.unique():\n    \n    # filter for only this sentence\n    df_sentence = df[df.sentenceId == s_id]\n    \n    # initiate np array to insert preditioncs\n    predicates = np.full(len(df_sentence.id.unique()), False)\n    \n    ### find all predicates of that sentence \n    \n    # loop through each repetition\n    for s_rep in df_sentence.sentenceRepetition.unique():\n        \n        # create new subframe for working within this repetition of sentence\n        df_sentence_repetition = df_sentence[df_sentence.sentenceRepetition == s_rep]\n        \n        \n        ###  identify arguments (their indices)\n        \n        ## 1. identi

In [18]:
#len(df_sentence_repetition[df_sentence_repetition[predicate_column] == True].id)

1