In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.metrics import *

In [3]:
import nltk

In [71]:
# Lexical rules for direct word-to-POS mapping
LEXICAL_RULES = {
    "the": "DT",
    "a": "DT",
    "an": "DT",
    "and": "CC",
    "but": "CC",
    "he": "PRP",
    "she": "PRP",
    "it": "PRP",
    "they": "PRP",
    "is": "VBZ",
    "are": "VBP",
    "was": "VBD",
    "were": "VBD",
    "run": "VB",
    "runs": "VBZ",
    "running": "VBG",
    "ran": "VBD",
    "walk": "VB",
    "walking": "VBG",
    "walked": "VBD"
}

# Suffix rules for pattern-based tagging
SUFFIX_RULES = {
    "ing": "VBG",  # Gerund or Present Participle
    "ed": "VBD",   # Past Tense
    "ly": "RB",    # Adverb
    "ion": "NN",   # Noun
    "s": "NNS"     # Plural Noun
}

# Punctuation rules
PUNCTUATION_RULES = {
    ".": "PUNCT",
    "?": "PUNCT-Q",
    "!": "PUNCT-E",
    ",": "PUNCT-C",
    ":": "PUNCT-S",
    ";": "PUNCT-S",
    "\"": "PUNCT-QT",
    "'": "PUNCT-QT",
    "(": "PUNCT-P",
    ")": "PUNCT-P"
}


In [73]:
def pos_tagging(text):
    tags=[]
    tokens = text.split()
    print(tokens)
    for i,word in enumerate(tokens):
        print(word)
        lower_word = word.lower()

        if word in PUNCTUATION_RULES:
            tags.append((word, PUNCTUATION_RULES[word]))
            continue
        
        #Rule1: Lexical lookup
        if lower_word in LEXICAL_RULES:
            tags.append((word, LEXICAL_RULES[lower_word]))
            continue

        #Rule2: Suffix based 
        mached_suffix=False
        for suffix, tag in SUFFIX_RULES.items():
            if lower_word.endswith(suffix):
                tags.append((word, tag))
                mached_suffix=True
                break
        if mached_suffix:
            continue

        #Rule3: Propernoun
        if word[0].isupper() and i>0:
            tags.append((word, "NNP"))
            continue
            
        #Rule4: Default to noun    
        tags.append((word, "NN"))
        
        #Rule5: Context awareness tagging
        if i>0:
            
            prev_word, prev_tag = tags[i-1]
            #adjective and determiner followed by noun
            if prev_tag in ["DET","JJ"] and len(lower_word)>3:
                tags[-1]=(word,"NN")
            

            
    return tags
            
        
        

In [106]:
tagged_text=pos_tagging("The boy was running somewhere with the stick in hand but Nitin did not do it. He went aside .'")

['The', 'boy', 'was', 'running', 'somewhere', 'with', 'the', 'stick', 'in', 'hand', 'but', 'Nitin', 'did', 'not', 'do', 'it.', 'He', 'went', 'aside', ".'"]
The
boy
was
running
somewhere
with
the
stick
in
hand
but
Nitin
did
not
do
it.
He
went
aside
.'


In [108]:
def check_grammar(tagged_text):
    """
    Perform detailed grammar checking based on POS tagging rules, including punctuation.
    """
    errors = []
    words = [word for word, tag in tagged_text]

    # Rule 1: Sentence must end with valid punctuation
    if words[-1] not in [".", "!", "?"]:
        errors.append("Sentence does not end with valid punctuation.")

    # Rule 2: Unbalanced punctuation (parentheses, quotes, etc.)
    stack = []
    matching_pairs = {"(": ")", "[": "]", "{": "}", "\"": "\"", "'": "'"}
    for word in words:
        if word in matching_pairs.keys():  # Opening punctuation
            stack.append(word)
        elif word in matching_pairs.values():  # Closing punctuation
            if not stack or matching_pairs[stack.pop()] != word:
                errors.append(f"Unbalanced punctuation detected: '{word}'")

    # Rule 3: Commas in lists or clauses
    for i, (word, tag) in enumerate(tagged_text):
        if word == "," and (i == 0 or tagged_text[i - 1][1] == "PUNCT"):
            errors.append("Comma misused or incorrectly placed.")

    return errors


In [110]:
check_grammar(tagged_text)

['Sentence does not end with valid punctuation.']