From b682254db8658f2da102148f6500ff5547f4e0d8 Mon Sep 17 00:00:00 2001
From: rounakdatta <rounakdatta12@gmail.com>
Date: Tue, 17 Jul 2018 19:43:18 +0530
Subject: [PATCH] improvements

---
 research/nlp_tests.ipynb | 35 ++++++++++++++++++++++++-----------
 src/checker.py           | 13 ++++++++++---
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/research/nlp_tests.ipynb b/research/nlp_tests.ipynb
index c9b00fd..b1f35aa 100644
--- a/research/nlp_tests.ipynb
+++ b/research/nlp_tests.ipynb
@@ -440,20 +440,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "      walk_VB              \n",
-      "   ______|____________      \n",
-      "  |      |     |   down_IN \n",
-      "  |      |     |      |     \n",
-      "  |      |     |  street_NN\n",
-      "  |      |     |      |     \n",
-      "I_PRP  am_VBP ._.   the_DT \n",
+      "                 plays_ROOT_VBZ               \n",
+      "        _______________|______________         \n",
+      "       |                          in_prep_IN  \n",
+      "       |                              |        \n",
+      "       |                        garden_pobj_NN\n",
+      "       |                              |        \n",
+      "children_nsubj_N                  the_det_DT  \n",
+      "       NS                                     \n",
       "\n"
      ]
     },
@@ -463,7 +464,7 @@
        "[None]"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -475,10 +476,10 @@
     "\n",
     "en_nlp = spacy.load('en')\n",
     "\n",
-    "doc = en_nlp(\"I am walk down the street.\")\n",
+    "doc = en_nlp(\"children plays in the garden\")\n",
     "\n",
     "def tok_format(tok):\n",
-    "    return \"_\".join([tok.orth_, tok.tag_])\n",
+    "    return \"_\".join([tok.orth_, tok.dep_, tok.tag_])\n",
     "\n",
     "\n",
     "def to_nltk_tree(node):\n",
@@ -491,6 +492,18 @@
     "[to_nltk_tree(sent.root).pretty_print() for sent in doc.sents]"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "raw",
+   "metadata": {},
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": 3,
diff --git a/src/checker.py b/src/checker.py
index 8a69b30..128b362 100644
--- a/src/checker.py
+++ b/src/checker.py
@@ -1,7 +1,7 @@
 import language_check as lc
 import spacy
 from nltk import Tree
-from pattern.en import conjugate, lemma, lexeme, INFINITIVE, PRESENT, PAST, PARTICIPLE, FUTURE, SG, PL, INDICATIVE, IMPERATIVE, CONDITIONAL, SUBJUNCTIVE, PROGRESSIVE
+from pattern.en import conjugate, lemma, lexeme, INFINITIVE, PRESENT, PAST, PARTICIPLE, FUTURE, SG, PL, INDICATIVE, IMPERATIVE, CONDITIONAL, SUBJUNCTIVE, PROGRESSIVE, singularize
 import numpy as np
 import os
 from sympound import sympound
@@ -223,10 +223,17 @@ def modify(text):
 
 	matches = tool.check(text)
 	text = lc.correct(text,matches)
-	if(text[-1] != '.'):
-		text += '.'
 
 	doc = en_nlp(text)
+
+	# adding punctuation
+	punctFound = False
+	for token in doc:
+		if(token.dep_ == 'punct'):
+			punctFound = True
+	if not punctFound:
+		text += '.'
+
 	for sent in doc.sents:
 		text, e1 = VB_VB_correction(sent.root, text, 0)
 		text, e2 = VB_VB_VB_correction(sent.root, text, 0)