In [1]:
from allennlp.predictors.predictor import Predictor
import allennlp_models.tagging
from nltk import Tree
import re
import traceback
import json

In [2]:
from PyDictionary import PyDictionary
dictionary=PyDictionary()

In [3]:
predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/structured-prediction-srl-bert.2020.12.15.tar.gz")


In [4]:
def process_sentenceALLEN(sentence):
    result = predictor.predict_json({"sentence": sentence})
    
    if len(result['verbs']) == 0:
        return -1
    tags = result['verbs'][0]['tags']
    words = result['words']
    if result['verbs'][0]['verb'].endswith("ed"):
        return -1
    dictResult = dictionary.meaning(result['verbs'][0]['verb'])
    if dictResult == None or not 'Verb' in dictResult:
        return -1
    return tags, words

In [5]:
import pandas as pd
recipe_file_path = "../csvs/steps2.csv"
df = pd.read_csv(recipe_file_path, delimiter=',')

In [6]:
no_columns = len(df.columns)
if no_columns >= 4:
    df.drop(df.columns[[i for i in range(4,no_columns)]], axis=1, inplace=True)
df.head()

Unnamed: 0,id,content,number,recipe_id
0,1153,Preheat oven to 200°F. Sprinkle 1 side of tuna...,1,353
1,1154,"Add butter, sliced green onions, cilantro, gin...",2,353
2,1155,Preheat oven to 350°F. Butter and flour two 9x...,1,354
3,1156,Divide batter equally between prepared pans. B...,2,354
4,1157,Preheat oven to 300°F. and grease pans. Line b...,1,355


In [7]:
steps = df['content'].tolist()

In [8]:
import subprocess

In [9]:
def runOutFile():
    with open('input.txt','r') as infile:
        subprocess.call(
            ('./a.out',),
            stdin=infile,
            universal_newlines=True)

In [10]:
def writeInput(sentence):
    f = open("input.txt", "w")
    f.write(sentence+"\n")
    f.close()

In [11]:
def getResult():
    f = open("recipe.json", "r")
    result = f.read()
    f.close()
    jsonResult = json.loads(result)
    return jsonResult

In [12]:
def extractWords(arg):
    words = []
    words.extend(arg['value'].split())
    if ('children' in arg.keys()):
        for child in arg['children']:
            words.extend(extractWords(child))
    return words

In [13]:
def wordsToLabel(tree):
    if (tree == "-1"):
        return -1
    try:
        sentence = tree['children'][0]['children'][0]
    except:
        return -1
    result = list()
    result.append(('V',[sentence['value']]))
    if ('children' in sentence.keys()):
        for child in sentence['children']:
            words = extractWords(child)
            result.append((child['label'],words))
    return result

In [14]:
def allenToLabel(solution):
    if (solution == -1):
        return -1
    tags, words = solution
    result = list()
    currentList = list()
    currentTag = 'X'
    for i in range(len(tags)):
        vals = tags[i].split("-",1)
        if (vals[0] != 'I'):
            if (currentTag != 'X'):
                result.append((currentTag,currentList))
            currentList = list()
            if (vals[0] != 'O'):
                currentTag = vals[1]
            else: 
                currentTag = 'X'
        if (vals[0] != 'O'):
            newWords = []
            if words[i].endswith("-"):
                newWords.append(words[i][:-1])
            else:
                if words[i][0].isdigit() and "-" in words[i]:
                    splitV = words[i].split("-")
                    newWords.extend(splitV)
                else:
                    newWords.append(words[i])
            currentList.extend(newWords)
    return result

In [15]:
def process_sentenceSOL(sentence):
    writeInput(sentence)
    runOutFile()
    return getResult()

In [16]:
def argXargM(label1, label2):
    if (label1 == 'ARG2' or label1 == 'ARG3') and (label2 == 'ARGM-LOC' or label2 == 'ARGM-MNR' or label2 == 'ARGM-ADV' or label2 == 'ARGM-EXT' or label2 == 'ARGM-PRP'):
            return True
    return False

In [17]:
def argSimilar(label1, label2):
    if (label1 == 'ARGM-LOC' and label2 == 'ARGM-DIR'):
        return True
    if (label1 == 'ARGM-MNR' and label2 == 'ARGM-DIR'):
        return True
    if (label1 == 'ARGM-MNR' and label2 == 'ARGM-ADV'):
        return True
    if (label1 == 'ARG2' and label2 == 'ARG3'):
        return True
    if (label1 == 'ARG2' and label2 == 'ARG4'):
        return True
    return False

In [18]:
def similarLabels(label1, label2):
    result = argXargM(label1, label2)
    if result:
        return result
    result = argXargM(label2, label1)
    if result:
        return result
    result = argSimilar(label1, label2)
    if result:
        return result
    result = argSimilar(label2, label1)
    if result:
        return result
    return False

In [19]:
def compareResult(sol1, sol2):
    if sol1 == -1 and sol2 != -1:
        return 0,1,-1
    if sol1 == -1 and sol2 == -1:
        return 0,0,-1
    if sol1 != -1 and sol2 == -1:
        return 1,0,-1

    exact = 0
    similar = 0
    total = 0
    for (label1,words1) in sol1:
        total += len(words1)
        foundLabel = 0
        for (label2,words2) in sol2:
            if label2 == label1:
                foundLabel = 1
                if label1 == 'V' and words1[0].startswith(words2[0]):
                    exact +=1
                    continue
                if label1 == 'V' and not words1[0].startswith(words2[0]):
                    return 1,1,-1
                for word1 in words1:
                    if word1 in words2:
                        exact += 1
        if foundLabel == 0:
            for (label2,words2) in sol2:
                if similarLabels(label1, label2):
                    foundLabel = 1
                    for word1 in words1:
                        if word1 in words2:
                            similar += 1
                            
#     print(sol1)
#     print(sol2)
#     print("Exact labeling: ", exact)
#     print("Similar labeling: ", similar)
    match = (exact+similar)*100/total
    totalMatch = exact*100/total
#     print("Match percent: ", match)
#     print("Total match: ", totalMatch)
#     print("\n")
    return 1,1,match

In [20]:
i_start = 0
i_end = 100

In [21]:
total = 0
matchable = 0

successA = 0
successS = 0
failBoth = 0
totalMatch = 0

for index in range(i_start,i_end): 
    try:
        step = steps[index]
        if not isinstance(step, str):
            continue
        step = re.sub(r'\([^)]*\)', ' ', step)
        sentences = re.split('(?<=[!?;.]) +',step)
        for sentence in sentences:
            sentence = sentence.strip()
            if sentence == '' or re.match('[0-9]+.',sentence):
                continue;
            sentence = sentence.lower()
            rezA = process_sentenceALLEN(sentence)
            rezS = process_sentenceSOL(sentence)

            labelsS = wordsToLabel(rezS)
            labelsA = allenToLabel(rezA)
            S, A, match = compareResult(labelsS, labelsA)
            successA += A
            successS += S
            failBoth += (A == 0 and S == 0)
            total+=1
            if (S == 1 and A == 1 and match != -1):
                matchable += 1
                totalMatch += match

    except:
        print(index, sentence)
        traceback.print_exc()
        break

pA = successA*100.0/total
pS = successS*100.0/total

print("Both fail: ", failBoth)
print("Total: ",total)
print("Success Allen: ",successA, pA)
print("Success Solution: ", successS, pS)
print("\nMatchable: ", matchable)
print("Match percent: ", totalMatch/matchable)


Both fail:  19
Total:  452
Success Allen:  328 72.56637168141593
Success Solution:  364 80.53097345132744

Matchable:  209
Match percent:  86.16593423411605
