Skip to content

Commit

Permalink
algorithm errors fixed
Browse files Browse the repository at this point in the history
  • Loading branch information
rounakdatta committed Jul 10, 2018
1 parent b2d4913 commit 0fa3d39
Show file tree
Hide file tree
Showing 5 changed files with 44 additions and 15 deletions.
24 changes: 14 additions & 10 deletions research/nlp_tests.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -387,20 +387,24 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" jumps \n",
" ________|______________ \n",
" | | over \n",
" | | | \n",
" | fox dog \n",
" | ____|_____ ___|____ \n",
" . The quick brown the lazy\n",
" is \n",
" _______________________________|_________ \n",
" | | is \n",
" | | ____________________________|________________________ \n",
" | | | | | | are | | \n",
" | | | | | | _________|_______ | | \n",
" | | | | | | | | easy | do \n",
" | | | | | | | | | | ___|_____ \n",
" | downside | | | | | programs use easy | analysis \n",
" | | | | | | | | | | | _____|_______ \n",
" . The that , , it because statistical to equally to the wrong\n",
"\n"
]
},
Expand All @@ -410,7 +414,7 @@
"[None]"
]
},
"execution_count": 1,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -422,7 +426,7 @@
"\n",
"en_nlp = spacy.load('en')\n",
"\n",
"doc = en_nlp(\"The quick brown fox jumps over the lazy dog.\")\n",
"doc = en_nlp(\"The downside is that, because statistical programs are easy to use, it is equally easy to do the wrong analysis.\")\n",
"\n",
"def to_nltk_tree(node):\n",
" if node.n_lefts + node.n_rights > 0:\n",
Expand Down
8 changes: 4 additions & 4 deletions research/sentence_structure.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,25 +52,25 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"cIrnmjio\n"
"nsubjauxrootpunct\n"
]
}
],
"source": [
"import spacy\n",
"nlp = spacy.load('en_core_web_sm')\n",
"doc = nlp(\"I am going out with my family.\")\n",
"doc = nlp(\"We are walking.\")\n",
"\n",
"sent_struct = []\n",
"for token in doc:\n",
" sent_struct.append(deps_dict[token.dep_.lower()])\n",
" sent_struct.append(token.dep_.lower())\n",
"\n",
"sentence_code = ''.join(sent_struct)\n",
"print(sentence_code)"
Expand Down
27 changes: 26 additions & 1 deletion src/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def decode_coding(code):
return decoded_list

def VB_VB_VB_correction(payload, raw_text, error_count): # correct errors of type has-been-walking
if 'been' not in raw_text.split():
return raw_text, error_count
if(payload.tag_[:2] != 'VB' and payload.tag_[:2] != 'NN' and payload.tag_[:2] != 'JJ'):
return raw_text, error_count
for ch in payload.children:
Expand Down Expand Up @@ -92,14 +94,37 @@ def VB_VB_VB_correction(payload, raw_text, error_count): # correct errors of typ
def VB_VB_correction(payload, raw_text, error_count): # correct errors of type is-walking OR has-cooked
if(payload.tag_[:2] != 'VB'):
return raw_text, error_count
nounBeforeVerb = False
nounAfterVerb = False
verbFound = False
if(payload.text == 'is' or payload.text == 'was' or payload.text == 'are' or payload.text == 'were'):
return raw_text, error_count

for ch in payload.children:
if(ch.tag_[:2] == 'VB'):
verbFound = True
if((not verbFound) and (ch.dep_ == 'nsubj')):
print(ch.lower_)
nounBeforeVerb = True
if(verbFound and (ch.dep_ == 'nsubj')):
nounAfterVerb = True

ifHave = False
ifBeen = False
if(ch.tag_[:2] == 'VB'): # this might need to be removed
dummy, error_count = VB_VB_VB_correction(ch, raw_text, error_count)
try:
if(ch.lower_ == 'has') or (ch.lower_ == 'have') or (ch.lower_ == 'had'):
ifHave = True
if(ch.lower_ == 'been' or payload.text == "been"):
ifBeen = True

if(ifHave and ifBeen):
x = conjugate(verb=lemma(payload.text), tense=PAST+PARTICIPLE, mood=INDICATIVE, person=1, number=PL)
else:
elif(nounBeforeVerb and ((ch.lower_ == 'is') or (ch.lower_ == 'are') or (ch.lower_ == 'was') or (ch.lower_ == 'was') or (ch.lower_ == 'were'))):
x = conjugate(verb=lemma(payload.text), tense=PRESENT, mood=INDICATIVE, aspect=PROGRESSIVE, person=1, number=PL)
else:
x = payload.text

if(x != payload.text):
error_count += 1
Expand Down
Binary file removed test/sample1.docx
Binary file not shown.
Binary file removed test/sample2.docx
Binary file not shown.

0 comments on commit 0fa3d39

Please sign in to comment.