In [2]:
!pip install -q --upgrade pip
!pip install -q torch sentencepiece
!pip install -q transformers
!pip install -q language-tool-python
!pip install -q nltk


In [15]:
import nltk

# Ensure required NLTK data is present
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
nltk.download('omw-1.4')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [16]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

def paraphrase_t5(text, num_return_sequences=3, num_beams=5, max_length=256):
    """
    T5-based paraphraser. Returns a list of paraphrases.
    """
    input_text = "paraphrase: " + text + " </s>"
    encoding = tokenizer.encode_plus(input_text, padding='longest', return_tensors="pt")
    input_ids, attention_mask = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)

    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_length=max_length,
        num_beams=num_beams,
        num_return_sequences=num_return_sequences,
        early_stopping=True,
        no_repeat_ngram_size=2
    )
    paraphrases = [tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) for output in outputs]
    # Deduplicate while preserving order
    seen = set()
    result = []
    for p in paraphrases:
        if p not in seen:
            seen.add(p)
            result.append(p)
    return result


Device: cuda


In [17]:
!apt-get install openjdk-17-jdk-headless -qq > /dev/null
!java -version

openjdk version "17.0.16" 2025-07-15
OpenJDK Runtime Environment (build 17.0.16+8-Ubuntu-0ubuntu122.04.1)
OpenJDK 64-Bit Server VM (build 17.0.16+8-Ubuntu-0ubuntu122.04.1, mixed mode, sharing)


In [18]:
import language_tool_python

tool = language_tool_python.LanguageTool('en-US')

def grammar_correct(text):
    matches = tool.check(text)
    corrected = language_tool_python.utils.correct(text, matches)
    issues = []
    for m in matches:
        issues.append({
            "message": m.message,
            "replacements": m.replacements,
            "offset": m.offset,
            "error_length": m.errorLength
        })
    return {"corrected": corrected, "issues": issues}


# AI Writing Assistant

This notebook demonstrates a simple AI-powered writing assistant that can:
- Check grammar and spelling
- Suggest better vocabulary
- Provide paraphrases
- Adjust tone (formal/informal)


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import nltk
from nltk import word_tokenize, pos_tag
from nltk.corpus.reader.wordnet import NOUN, VERB, ADJ, ADV
import language_tool_python

# Download required NLTK resources
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('wordnet')

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

In [23]:

model_name = "t5-small"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

tool = language_tool_python.LanguageTool('en-US')

def grammar_correct(text):
    matches = tool.check(text)
    corrected = language_tool_python.utils.correct(text, matches)
    issues = []
    for m in matches:
        issues.append({
            "message": m.message,
            "replacements": m.replacements,
            "offset": m.offset,
            "error_length": m.errorLength
        })
    return {"corrected": corrected, "issues": issues}

def get_wordnet_pos(treebank_tag):
    """
    Convert treebank POS tags to wordnet POS tags for better synonym lookup.
    """
    if treebank_tag.startswith('J'):
        return ADJ
    elif treebank_tag.startswith('V'):
        return VERB
    elif treebank_tag.startswith('N'):
        return NOUN
    elif treebank_tag.startswith('R'):
        return ADV
    else:
        return None

def suggest_vocabulary(text, top_n=3):
    """
    For each content word return up to top_n synonyms from WordNet.
    Returns dict[word] = [syn1, syn2...]
    """
    from nltk.corpus import wordnet as wn

    tokens = word_tokenize(text)
    pos_tags = pos_tag(tokens)
    suggestions = {}
    for word, tag in pos_tags:
        wn_pos = get_wordnet_pos(tag)
        if wn_pos is None:
            continue
        synsets = wn.synsets(word, pos=wn_pos)
        # collect lemmas
        lemmas = []
        for s in synsets:
            for l in s.lemmas():
                name = l.name().replace('_', ' ')
                if name.lower() != word.lower() and name.isalpha():
                    lemmas.append(name)
        # keep frequency-ish uniqueness
        unique = []
        for lem in lemmas:
            if lem not in unique:
                unique.append(lem)
            if len(unique) >= top_n:
                break
        if unique:
            suggestions[word] = unique
    return suggestions

def paraphrase_t5(text, num_return_sequences=3, num_beams=5, max_length=256):
    """
    T5-based paraphraser. Returns a list of paraphrases.
    """
    input_text = "paraphrase: " + text + " </s>"
    encoding = tokenizer.encode_plus(input_text, padding='longest', return_tensors="pt")
    input_ids, attention_mask = encoding["input_ids"].to(device), encoding["attention_mask"].to(device)

    outputs = model.generate(
        input_ids=input_ids,
        attention_mask=attention_mask,
        max_length=max_length,
        num_beams=num_beams,
        num_return_sequences=num_return_sequences,
        early_stopping=True,
        no_repeat_ngram_size=2
    )
    paraphrases = [tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True) for output in outputs]
    # Deduplicate while preserving order
    seen = set()
    result = []
    for p in paraphrases:
        if p not in seen:
            seen.add(p)
            result.append(p)
    return result

def decide_and_run_agent(text, request_options=None):

    if request_options is None:
        request_options = {}

    result = {"original": text}

    # 1) Always run grammar check by default
    grammar_res = grammar_correct(text)
    result["grammar"] = grammar_res

    # 2) Vocabulary suggestions (only if requested OR always - here we do always)
    vocab_res = suggest_vocabulary(grammar_res["corrected"], top_n=3)
    result["vocabulary_suggestions"] = vocab_res

    # 3) Paraphrase / rewrite (only if requested)
    if request_options.get("paraphrase", False):
        num = request_options.get("paraphrase_count", 3)
        paraphrases = paraphrase_t5(grammar_res["corrected"], num_return_sequences=num)
        result["paraphrases"] = paraphrases
    else:
        result["paraphrases"] = []

    # 4) Tone adjustment - simple rule: we'll rephrase with simple prompts for T5 if tone requested
    tone = request_options.get("tone", None)
    if tone is not None:
        # We'll generate 2 variations asking for a more formal/informal paraphrase
        prompt_text = ""
        if tone.lower() == "formal":
            prompt_text = "paraphrase (formal): " + grammar_res["corrected"]
        elif tone.lower() == "informal":
            prompt_text = "paraphrase (informal): " + grammar_res["corrected"]
        else:
            prompt_text = "paraphrase: " + grammar_res["corrected"]
        # simple call
        try:
            torch.cuda.empty_cache()
            encoding = tokenizer.encode_plus(prompt_text + " </s>", return_tensors="pt").to(device)
            outputs = model.generate(
                **encoding,
                max_length=256,
                num_beams=4,
                num_return_sequences=2,
                early_stopping=True,
                no_repeat_ngram_size=2
            )
            tone_variations = [tokenizer.decode(o, skip_special_tokens=True, clean_up_tokenization_spaces=True) for o in outputs]
        except Exception as e:
            tone_variations = []
        result["tone_variations"] = tone_variations
    else:
        result["tone_variations"] = []

    return result

# Simple interactive loop for testing in Colab (run this cell; stop when done)
print("=== Simple AI Writing Assistant ===")
print("Type your English sentence/paragraph. Type 'exit' to quit.")

while True:
    text = input("\nYour text > ").strip()
    if not text:
        print("Please enter some text.")
        continue
    if text.lower() in ["exit", "quit"]:
        print("Goodbye!")
        break

    # Ask what extra options user wants
    paraphrase_ans = input("Do you want paraphrases? (y/n) > ").strip().lower()
    tone_ans = input("Do you want tone adjustment? (none/formal/informal) > ").strip().lower()
    vocab_ans = input("Do you want vocabulary suggestions? (y/n) > ").strip().lower()

    opts = {}
    if paraphrase_ans == 'y':
        try:
            n = int(input("How many paraphrase variants (1-5)? > ").strip())
            opts["paraphrase"] = True
            opts["paraphrase_count"] = max(1, min(5, n))
        except:
            opts["paraphrase"] = True
            opts["paraphrase_count"] = 2
    if tone_ans in ["formal", "informal"]:
        opts["tone"] = tone_ans
    if vocab_ans == 'y':
        opts["vocab_suggest"] = True

    # Run agent
    out = decide_and_run_agent(text, request_options=opts)

    # Present results clearly
    print("\n--- RESULTS ---")
    print("\nOriginal:")
    print(out["original"])

    print("\nGrammar-corrected:")
    print(out["grammar"]["corrected"])

    if out["grammar"]["issues"]:
        print("\nFound issues (few examples):")
        for iss in out["grammar"]["issues"][:6]:
            print("-", iss["message"], "=> suggestions:", iss["replacements"])

    if out["vocabulary_suggestions"]:
        print("\nVocabulary suggestions (word : alternatives):")
        for w, alts in out["vocabulary_suggestions"].items():
            print(f" {w} : {alts}")

    if out["paraphrases"]:
        print("\nParaphrase suggestions:")
        for i, p in enumerate(out["paraphrases"], 1):
            print(f" {i}. {p}")

    if out["tone_variations"]:
        print("\nTone variations:")
        for i, t in enumerate(out["tone_variations"], 1):
            print(f" {i}. {t}")

    print("\n--- End of result ---\n")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Device: cuda
=== Simple AI Writing Assistant ===
Type your English sentence/paragraph. Type 'exit' to quit.

Your text > i dont know what to write. this is not good english and it seem wrong.
Do you want paraphrases? (y/n) > y
Do you want tone adjustment? (none/formal/informal) > y
Do you want vocabulary suggestions? (y/n) > y
How many paraphrase variants (1-5)? > 2

--- RESULTS ---

Original:
i dont know what to write. this is not good english and it seem wrong.

Grammar-corrected:
I don't know what to write. This is not good English and it seems wrong.

Found issues (few examples):
- This sentence does not start with an uppercase letter. => suggestions: ['I']
- Possible spelling mistake found. => suggestions: ["don't"]
- This sentence does not start with an uppercase letter. => suggestions: ['This']
- Possible spelling mistake found. => suggestions: ['English']
- After ‘it’, use the third-person verb form “seems”. => suggestions: ['seems']

Vocabulary suggestions (word : alternatives

## How to Use

1. Enter your English sentence or paragraph.
2. Choose which features you want:
   - Paraphrases
   - Tone adjustment (formal/informal)
   - Vocabulary suggestions
3. Run the cell below to see the AI assistant's suggestions.


In [24]:
# Simple interactive demo
text = "i dont know what to write. this is not good english and it seem wrong."
options = {"paraphrase": True, "paraphrase_count": 2, "tone": "formal", "vocab_suggest": True}

output = decide_and_run_agent(text, request_options=options)

print("Original:", output["original"])
print("Grammar-corrected:", output["grammar"]["corrected"])
print("Vocabulary suggestions:", output["vocabulary_suggestions"])
print("Paraphrases:", output["paraphrases"])
print("Tone variations:", output["tone_variations"])


Original: i dont know what to write. this is not good english and it seem wrong.
Grammar-corrected: I don't know what to write. This is not good English and it seems wrong.
Vocabulary suggestions: {'do': ['make', 'perform', 'execute'], 'know': ['cognize', 'cognise', 'experience'], 'write': ['compose', 'pen', 'indite'], 'is': ['be', 'exist', 'equal'], 'not': ['non'], 'good': ['full', 'estimable', 'honorable'], 'English': ['side'], 'seems': ['look', 'appear', 'seem'], 'wrong': ['incorrect', 'improper', 'amiss']}
Paraphrases: ['Paraphrase', 'Paraphrasephrase']
Tone variations: ["Paraphrase (formal): I don't know what to write.", "Paraphrase (formal): I don't know what to write, this is not good English and it seems wrong."]
