From 0f1859fc731b956d984ad557bf4470e414b35240 Mon Sep 17 00:00:00 2001 From: Rangaraj Tirumala Date: Sun, 3 Mar 2019 18:48:46 -0500 Subject: [PATCH] added modal and verb to json --- funk.py | 5 +++++ tag_JSON.py | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/funk.py b/funk.py index ddf76f5..cc61bdf 100644 --- a/funk.py +++ b/funk.py @@ -172,6 +172,11 @@ def punct_in_set(pos_set:list): return True return False +def get_pos_word_in_set(word_set:list, pos:str): + for tup in word_set: + if tup[1][0].lower() == pos.lower(): + return tup[0] + return None # from nltk import word_tokenize # from nltk import pos_tag diff --git a/tag_JSON.py b/tag_JSON.py index 216a186..39d6379 100644 --- a/tag_JSON.py +++ b/tag_JSON.py @@ -23,6 +23,8 @@ def tagList(jsonList:list, whWord:str, collocate:str, context:str): tagged = pos_tag(word_tokenize(sent)) clauseType = None + modal = None + verb = None modals = ['can', 'could', 'may', 'might', 'shall', 'should', 'will', 'would', 'must'] @@ -53,15 +55,19 @@ def tagList(jsonList:list, whWord:str, collocate:str, context:str): # ELSE IF "to" exists in SET B - Non-Finite elif f.x_in_set("to", wh_collocate, is_pos=False): clauseType = "Non-Finite" + verb = f.get_pos_word_in_set(wh_collocate, 'V') # ELSE IF "gap" exists in either set - : elif f.x_in_set(":", context_wh, is_pos=True) or f.x_in_set(":", wh_collocate, is_pos=True): clauseType = ":" # ELSE IF modal exists in SET B - Modal elif f.x_in_set(modals, wh_collocate, is_pos=False): clauseType = "Modal" + modal = f.get_pos_word_in_set(wh_collocate, 'M') + verb = f.get_pos_word_in_set(wh_collocate, 'V') # ELSE - Finite else: clauseType = "Finite" + verb = f.get_pos_word_in_set(wh_collocate, 'V') except: print("BROKE HERE: ") print(obj["resNumber"]) @@ -71,5 +77,7 @@ def tagList(jsonList:list, whWord:str, collocate:str, context:str): break obj['clauseType'] = clauseType + obj['modal'] = modal + obj['verb'] = verb return jsonList