In [4]:
import nltk
import re    

from  nltk.corpus import brown
from nltk import load

# ---------------------------------
# POS TAGGER
# Using nltk default tagger for now 
# ---------------------------------
# example:
sents = ["He want to eat ice cream", "a birds is laying eggs", "I feel interesting", 
         "when I will play soccer", "I will played chess tomorrow",
         "before played, I need to warm up", "how much steps is necessary"]
sents = [nltk.word_tokenize(sent) for sent in sents]  # Tokenization
tagged_lists = [nltk.pos_tag(sent) for sent in sents] # Using default Tagger by NLTK 
tagged_sents = [['/'.join([a,b]) for (a,b) in sent] for sent in tagged_lists]
tagged_sents = [' '.join(sent) for sent in tagged_sents]


# check result:
print ("tokenized sentence:\n{0}\n".format(sents))
print ("tagged list:\n{0}\n".format(tagged_lists))
print ("tagged sentences:\n{0}".format(tagged_sents))

tokenized sentence:
[['He', 'want', 'to', 'eat', 'ice', 'cream'], ['a', 'birds', 'is', 'laying', 'eggs'], ['I', 'feel', 'interesting'], ['when', 'I', 'will', 'play', 'soccer'], ['I', 'will', 'played', 'chess', 'tomorrow'], ['before', 'played', ',', 'I', 'need', 'to', 'warm', 'up'], ['how', 'much', 'steps', 'is', 'necessary']]

tagged list:
[[('He', 'PRP'), ('want', 'VBP'), ('to', 'TO'), ('eat', 'VB'), ('ice', 'NN'), ('cream', 'NN')], [('a', 'DT'), ('birds', 'NNS'), ('is', 'VBZ'), ('laying', 'VBG'), ('eggs', 'NNS')], [('I', 'PRP'), ('feel', 'VBP'), ('interesting', 'VBG')], [('when', 'WRB'), ('I', 'PRP'), ('will', 'MD'), ('play', 'VB'), ('soccer', 'NN')], [('I', 'PRP'), ('will', 'MD'), ('played', 'VB'), ('chess', 'NN'), ('tomorrow', 'NN')], [('before', 'IN'), ('played', 'VBN'), (',', ','), ('I', 'PRP'), ('need', 'VBP'), ('to', 'TO'), ('warm', 'VB'), ('up', 'RP')], [('how', 'WRB'), ('much', 'JJ'), ('steps', 'NNS'), ('is', 'VBZ'), ('necessary', 'JJ')]]

tagged sentences:
['He/PRP want/VBP 

In [6]:
import pickle
# ---------------------------------
# POS CHUNKER
# Use NLTK_train chunker based on corpus conll2000 for now
# ---------------------------------
chunker = pickle.load(open("conll2000_ub.pickle", 'rb'))
chunked_sents = [chunker.parse(tagged_list) for tagged_list in tagged_lists] 
for chunked_sent in chunked_sents:
    print(chunked_sent) 

# NOTE: chunked_sent is a NLTK tree structure

ModuleNotFoundError: No module named 'nltk_trainer'

In [7]:
# ---------------------------------
# GRAMMAR CHECKER
# ---------------------------------

# Writting grammar rules 
grammar_rules = [ r"(?P<E1>(((a|an)/DT)|(one/CD))(\s\w+/(JJ))*\s\w+/(NNS))",   # plural noun detected after 'a', 'an' or 'one'
                  r"(?P<E2>many/JJ(\s\w+/(JJ))*\s\w+/NN(?!S))",               # singular noun detected after 'many'
                  r"(?P<E3>(?<!the/DT)\s\w+/JJS)",                            # missing 'the' before superlative
                  r"(?P<E4>(feels*|felt)/VB\w\s(\w+/(RB|JJ)\s)*\w+ing/VBG)", # "feel/felt" should not be followed by 'adj-ing'
                  r"(?P<E5>\w+/MD\s\w+(((s|ed)/VB\w*)|/VB\w))",             # modal verb should be followed by a base-form verb
                  r"(?P<E6>\w{2,}/(NN|NNP|PRP)\s\w+/VB(?!Z))",            # sigular noun requires verb with '-s' or '-es'
                  r"(?P<E7>(is|are|were|was|been)/VB\w\s\w+/(VB(\s|$)|VBZ|VBP))",  # passive required verb with '-ed'
                  r"(?P<E8>when/WRB\s\w+/\w+\s(will|shall)/MD)",   # "when" clause should be in present tense                ]   
                  r"(?P<E9>(before|after)/\w+\s\w+/VB(?!G))",       # expecting a verb-ing after preposition 
                  r"(?P<E10>how/\w+\smuch/\w+\s\w+/(NNS|NNPS))"    # use "many" for plural nouns instead of "much"
                  r"(?P<E11>how/\w+\smany/\w+\s\w+/(NN\s|NNP\s))"    # use "many" for plural nouns instead of "much"

                ]

messages = {"E1":  (lambda x: "{} -> plural noun detected after singular determinant".format(x)),
            "E2":  (lambda x: "{} -> singular noun detected after 'many'".format(x)),
            "E3":  (lambda x: "{} -> missing 'the' before superlative".format(x)),
            "E4":  (lambda x: "{} -> 'feel/felt' should not be followed by 'adj-ing'".format(x)),
            "E5":  (lambda x: "{} -> modal verb should be followed by a base-form verb".format(x)),
            "E6":  (lambda x: "{} -> sigular noun requires verb with '-s' or '-es'".format(x)),
            "E7":  (lambda x: "{} -> passive required verb with '-ed'".format(x)),
            "E8":  (lambda x: "{} -> 'when' clause should be in present tense".format(x)),
            "E9":  (lambda x: "{} -> expecting a verb-ing after preposition".format(x)),
            "E10": (lambda x: "{} -> use 'many' for plural nouns instead of 'much'".format(x))
            "E11": (lambda x: "{} -> use 'much' for non-plural nouns instead of 'many'".format(x))

           }


In [8]:
for i in range(0,len(grammar_rules)):
    pattern = re.compile(grammar_rules[i])
    results = [pattern.search(sent) for sent in tagged_sents]
    for result in results:
        if result is not None:
            key = ''.join(result.groupdict().keys())
            print(messages[key](result[0]))

a/DT cars/NNS -> plural noun detected after singular determinant
feel/VBP interesting/VBG -> 'feel/felt' should not be followed by 'adj-ing'
will/MD played/VB -> modal verb should be followed by a base-form verb
He/PRP want/VB -> sigular noun requires verb with '-s' or '-es'
when/WRB I/PRP will/MD -> 'when' clause should be in present tense
before/IN played/VB -> expecting a verb-ing after preposition
how/WRB much/JJ steps/NNS -> use 'many' for plural nouns instead of 'much'
