Skip to content

Commit

Permalink
improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
rounakdatta committed Jul 17, 2018
1 parent 0fa3d39 commit b682254
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 14 deletions.
35 changes: 24 additions & 11 deletions research/nlp_tests.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -440,20 +440,21 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" walk_VB \n",
" ______|____________ \n",
" | | | down_IN \n",
" | | | | \n",
" | | | street_NN\n",
" | | | | \n",
"I_PRP am_VBP ._. the_DT \n",
" plays_ROOT_VBZ \n",
" _______________|______________ \n",
" | in_prep_IN \n",
" | | \n",
" | garden_pobj_NN\n",
" | | \n",
"children_nsubj_N the_det_DT \n",
" NS \n",
"\n"
]
},
Expand All @@ -463,7 +464,7 @@
"[None]"
]
},
"execution_count": 2,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -475,10 +476,10 @@
"\n",
"en_nlp = spacy.load('en')\n",
"\n",
"doc = en_nlp(\"I am walk down the street.\")\n",
"doc = en_nlp(\"children plays in the garden\")\n",
"\n",
"def tok_format(tok):\n",
" return \"_\".join([tok.orth_, tok.tag_])\n",
" return \"_\".join([tok.orth_, tok.dep_, tok.tag_])\n",
"\n",
"\n",
"def to_nltk_tree(node):\n",
Expand All @@ -491,6 +492,18 @@
"[to_nltk_tree(sent.root).pretty_print() for sent in doc.sents]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "raw",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": 3,
Expand Down
13 changes: 10 additions & 3 deletions src/checker.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import language_check as lc
import spacy
from nltk import Tree
from pattern.en import conjugate, lemma, lexeme, INFINITIVE, PRESENT, PAST, PARTICIPLE, FUTURE, SG, PL, INDICATIVE, IMPERATIVE, CONDITIONAL, SUBJUNCTIVE, PROGRESSIVE
from pattern.en import conjugate, lemma, lexeme, INFINITIVE, PRESENT, PAST, PARTICIPLE, FUTURE, SG, PL, INDICATIVE, IMPERATIVE, CONDITIONAL, SUBJUNCTIVE, PROGRESSIVE, singularize
import numpy as np
import os
from sympound import sympound
Expand Down Expand Up @@ -223,10 +223,17 @@ def modify(text):

matches = tool.check(text)
text = lc.correct(text,matches)
if(text[-1] != '.'):
text += '.'

doc = en_nlp(text)

# adding punctuation
punctFound = False
for token in doc:
if(token.dep_ == 'punct'):
punctFound = True
if not punctFound:
text += '.'

for sent in doc.sents:
text, e1 = VB_VB_correction(sent.root, text, 0)
text, e2 = VB_VB_VB_correction(sent.root, text, 0)
Expand Down

0 comments on commit b682254

Please sign in to comment.