<a href="https://colab.research.google.com/github/muktabhumkar/Grant-Writing-Support-Tool/blob/main/error_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [10]:
import spacy
import re
from nltk.tokenize import sent_tokenize
import nltk

# Download the 'punkt_tab' resource (required for sentence tokenization)
nltk.download('punkt_tab')


# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

# Function to check for inconsistent language (e.g., synonyms used inconsistently)
def check_inconsistent_language(text):
    # Look for frequent changes in terms (e.g., 'grant' vs 'funding')
    issues = []
    terms = ['grant', 'funding', 'proposal', 'project']  # Example terms
    term_usage = {term: 0 for term in terms}

    # Tokenize the text and count occurrences of key terms
    doc = nlp(text)
    for token in doc:
        if token.text.lower() in terms:
            term_usage[token.text.lower()] += 1

    # Check if any terms are overused or used inconsistently
    for term, count in term_usage.items():
        if count > 3:
            issues.append(f"Overused term detected: '{term}' appears {count} times.")

    if len(set(term_usage.values())) == 1 and any(v > 1 for v in term_usage.values()):
        issues.append("Inconsistent use of related terms. Try to stick to one primary term.")

    return issues

# Function to check for unclear objectives (simple heuristic: check for vague words)
def check_unclear_objectives(text):
    unclear_phrases = ['improve', 'better', 'increase', 'grow', 'enhance']  # Example vague words
    issues = []
    sentences = sent_tokenize(text) # This line previously raised the error

    for sentence in sentences:
        if any(phrase in sentence.lower() for phrase in unclear_phrases):
            issues.append(f"Unclear objective detected: '{sentence}'")

    return issues

# Function to check for missing information (e.g., missing sections like budget or timeline)
def check_missing_information(text):
    missing_sections = ['budget', 'timeline', 'goals', 'objectives', 'impact']
    issues = []

    for section in missing_sections:
        if section not in text.lower():
            issues.append(f"Missing section: '{section}' is not mentioned in the proposal.")

    return issues

# Main function to check the entire proposal for errors
def detect_errors_in_grant_proposal(text):
    errors = []

    # Check for inconsistent language
    errors.extend(check_inconsistent_language(text))

    # Check for unclear objectives
    errors.extend(check_unclear_objectives(text))

    # Check for missing information
    errors.extend(check_missing_information(text))

    if not errors:
        errors.append("No major issues detected.")

    return errors

# Example usage
if __name__ == "__main__":
    # Sample grant proposal text
    proposal_text = """Our goal is to improve the community through this funding.
    We will enhance the local environment by planting trees andincreasing renewable energy use.
    The project timeline will be provided. However, the budget section is still under planning"""

  # Detect errors in the proposal
    errors = detect_errors_in_grant_proposal(proposal_text)

    # Print detected errors
    for error in errors:
        print(error)

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


Unclear objective detected: 'Our goal is to improve the community through this funding.'
Unclear objective detected: 'We will enhance the local environment by planting trees andincreasing renewable energy use.'
Missing section: 'goals' is not mentioned in the proposal.
Missing section: 'objectives' is not mentioned in the proposal.
Missing section: 'impact' is not mentioned in the proposal.
