In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [16]:
!pip install stanza


Collecting stanza
  Downloading stanza-1.9.2-py3-none-any.whl.metadata (13 kB)
Collecting emoji (from stanza)
  Downloading emoji-2.14.0-py3-none-any.whl.metadata (5.7 kB)
Downloading stanza-1.9.2-py3-none-any.whl (1.1 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m42.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading emoji-2.14.0-py3-none-any.whl (586 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m586.9/586.9 kB[0m [31m33.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji, stanza
Successfully installed emoji-2.14.0 stanza-1.9.2


In [17]:
import stanza
stanza.download('ta')  # Download the Tamil language model
nlp = stanza.Pipeline('ta', processors='tokenize,pos', use_gpu=False)  # Pipeline for tokenization and POS tagging


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.9.0.json:   0%|   …

INFO:stanza:Downloaded file to /root/stanza_resources/resources.json
INFO:stanza:Downloading default packages for language: ta (Tamil) ...


Downloading https://huggingface.co/stanfordnlp/stanza-ta/resolve/v1.9.0/models/default.zip:   0%|          | 0…

INFO:stanza:Downloaded file to /root/stanza_resources/ta/default.zip
INFO:stanza:Finished downloading models and saved to /root/stanza_resources
INFO:stanza:Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES


Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.9.0.json:   0%|   …

INFO:stanza:Downloaded file to /root/stanza_resources/resources.json
INFO:stanza:Loading these models for language: ta (Tamil):
| Processor | Package      |
----------------------------
| tokenize  | ttb          |
| mwt       | ttb          |
| pos       | ttb_nocharlm |

INFO:stanza:Using device: cpu
INFO:stanza:Loading: tokenize
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Loading: mwt
  checkpoint = torch.load(filename, lambda storage, loc: storage)
INFO:stanza:Loading: pos
  checkpoint = torch.load(filename, lambda storage, loc: storage)
  data = torch.load(self.filename, lambda storage, loc: storage)
INFO:stanza:Done loading processors!


In [19]:
# Define the sentence you want to process
corrected_sentence = "உங்கள் வாக்கியம் இங்கே."  # Replace with your actual sentence

# The rest of your code remains the same
doc = nlp(corrected_sentence)  # Process the sentence
for sent in doc.sentences:
    for word in sent.words:
        print(f"{word.text}\t{word.upos}")  # Print word and POS tag

உங்கள்	PRON
வாக்கியம்	NOUN
இங்கே.	PUNCT


In [20]:
def check_and_correct_grammar(sentence):
    doc = nlp(sentence)  # Process sentence
    errors = []
    corrected_words = sentence.split()

    # Extract words and POS tags
    words = []
    pos_tags = []
    for sent in doc.sentences:
        for word in sent.words:
            words.append(word.text)
            pos_tags.append(word.upos)

    # Rule 1: Subject-Object-Verb Order
    if 'PRON' in pos_tags and 'NOUN' in pos_tags and 'VERB' in pos_tags:
        pron_index = pos_tags.index('PRON')
        noun_index = pos_tags.index('NOUN')
        verb_index = pos_tags.index('VERB')
        if not (pron_index < noun_index < verb_index):
            errors.append("Error: The sentence should follow Subject-Object-Verb (SOV) order.")
            # Correct the word order while preserving other parts of the sentence
            corrected_words = [words[pron_index], words[noun_index], words[verb_index]] + \
                               [word for i, word in enumerate(words) if i not in (pron_index, noun_index, verb_index)]

    # Rule 2: Adjective-Noun Order
    if 'ADJ' in pos_tags and 'NOUN' in pos_tags:
        for i, (tag, word) in enumerate(zip(pos_tags, words)):
            if tag == 'ADJ':
                adj_index = i
                for j in range(adj_index + 1, len(pos_tags)):
                    if pos_tags[j] == 'NOUN':
                        if adj_index > j:
                            errors.append("Error: Adjectives should precede the noun they modify.")
                            words[adj_index], words[j] = words[j], words[adj_index]
                        break

    # Rule 3: Plural Agreement
    if 'PRON' in pos_tags and 'VERB' in pos_tags:
        pron_index = pos_tags.index('PRON')
        verb_index = pos_tags.index('VERB')
        pron_word = words[pron_index]
        verb_word = words[verb_index]
        if pron_word.endswith("ள்") and not verb_word.endswith("ோம்"):
            errors.append("Error: Plural pronoun should match plural verb form.")
            if "ேன்" in verb_word:
                corrected_words[verb_index] = verb_word.replace("ேன்", "ோம்")
            else:
                corrected_words[verb_index] += "ோம்"  # Add plural suffix if not present

    if errors:
        corrected_sentence = " ".join(corrected_words)
        return {"status": "errors", "details": errors, "corrected_sentence": corrected_sentence}
    else:
        return {"status": "correct", "details": "The sentence is grammatically correct."}


In [21]:
sentence = "பள்ளிக்கு நாங்கள் செல்வேன்"  # Incorrect Tamil sentence
result = check_and_correct_grammar(sentence)

if result["status"] == "correct":
    print(result["details"])
else:
    print("Grammar Errors Found:")
    for error in result["details"]:
        print(f"- {error}")
    if "corrected_sentence" in result:
        print(f"Corrected Sentence: {result['corrected_sentence']}")


Grammar Errors Found:
- Error: The sentence should follow Subject-Object-Verb (SOV) order.
- Error: Plural pronoun should match plural verb form.
Corrected Sentence: நாங்கள் பள்ளிக்கு செல்வோம்
