From b682254db8658f2da102148f6500ff5547f4e0d8 Mon Sep 17 00:00:00 2001 From: rounakdatta Date: Tue, 17 Jul 2018 19:43:18 +0530 Subject: [PATCH] improvements --- research/nlp_tests.ipynb | 35 ++++++++++++++++++++++++----------- src/checker.py | 13 ++++++++++--- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/research/nlp_tests.ipynb b/research/nlp_tests.ipynb index c9b00fd..b1f35aa 100644 --- a/research/nlp_tests.ipynb +++ b/research/nlp_tests.ipynb @@ -440,20 +440,21 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - " walk_VB \n", - " ______|____________ \n", - " | | | down_IN \n", - " | | | | \n", - " | | | street_NN\n", - " | | | | \n", - "I_PRP am_VBP ._. the_DT \n", + " plays_ROOT_VBZ \n", + " _______________|______________ \n", + " | in_prep_IN \n", + " | | \n", + " | garden_pobj_NN\n", + " | | \n", + "children_nsubj_N the_det_DT \n", + " NS \n", "\n" ] }, @@ -463,7 +464,7 @@ "[None]" ] }, - "execution_count": 2, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -475,10 +476,10 @@ "\n", "en_nlp = spacy.load('en')\n", "\n", - "doc = en_nlp(\"I am walk down the street.\")\n", + "doc = en_nlp(\"children plays in the garden\")\n", "\n", "def tok_format(tok):\n", - " return \"_\".join([tok.orth_, tok.tag_])\n", + " return \"_\".join([tok.orth_, tok.dep_, tok.tag_])\n", "\n", "\n", "def to_nltk_tree(node):\n", @@ -491,6 +492,18 @@ "[to_nltk_tree(sent.root).pretty_print() for sent in doc.sents]" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "raw", + "metadata": {}, + "source": [] + }, { "cell_type": "code", "execution_count": 3, diff --git a/src/checker.py b/src/checker.py index 8a69b30..128b362 100644 --- a/src/checker.py +++ b/src/checker.py @@ -1,7 +1,7 @@ import language_check as lc import spacy from nltk import Tree -from pattern.en import conjugate, lemma, lexeme, INFINITIVE, PRESENT, PAST, PARTICIPLE, FUTURE, SG, PL, INDICATIVE, IMPERATIVE, CONDITIONAL, SUBJUNCTIVE, PROGRESSIVE +from pattern.en import conjugate, lemma, lexeme, INFINITIVE, PRESENT, PAST, PARTICIPLE, FUTURE, SG, PL, INDICATIVE, IMPERATIVE, CONDITIONAL, SUBJUNCTIVE, PROGRESSIVE, singularize import numpy as np import os from sympound import sympound @@ -223,10 +223,17 @@ def modify(text): matches = tool.check(text) text = lc.correct(text,matches) - if(text[-1] != '.'): - text += '.' doc = en_nlp(text) + + # adding punctuation + punctFound = False + for token in doc: + if(token.dep_ == 'punct'): + punctFound = True + if not punctFound: + text += '.' + for sent in doc.sents: text, e1 = VB_VB_correction(sent.root, text, 0) text, e2 = VB_VB_VB_correction(sent.root, text, 0)