In [3]:
!pip install pyspellchecker
from transformers import pipeline
from spellchecker import SpellChecker

# Initialize spell checker
spell = SpellChecker()

# Load Hugging Face pipelines
fill_mask = pipeline("fill-mask", model="bert-base-uncased")
text_gen = pipeline("text-generation", model="gpt2")

# -------------------------------
# SPELLING CHECK
# -------------------------------
def check_spelling(text):
    words = text.split()
    misspelled = spell.unknown(words)
    return list(misspelled)

# -------------------------------
# SPELLING CORRECTION
# -------------------------------
def correct_spelling(text):
    words = text.split()
    corrected_words = [spell.correction(word) if word in spell.unknown([word]) else word for word in words]
    return ' '.join(corrected_words)

# -------------------------------
# AUTOCOMPLETE
# -------------------------------
def autocomplete_text(prefix, max_length=50):
    completions = text_gen(prefix, max_length=max_length, num_return_sequences=1, do_sample=True)
    return completions[0]['generated_text']

# -------------------------------
# EXAMPLES
# -------------------------------


Collecting pyspellchecker
  Downloading pyspellchecker-0.8.2-py3-none-any.whl.metadata (9.4 kB)
Downloading pyspellchecker-0.8.2-py3-none-any.whl (7.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m68.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyspellchecker
Successfully installed pyspellchecker-0.8.2


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Device set to use cpu


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu


NameError: name '_name_' is not defined

In [4]:
if __name__ == "__main__":
    input_text = "I realy lovve progrmming and artifical intellgence"

    print("Original Text: ", input_text)

    # 1. Spelling Check
    misspelled = check_spelling(input_text)
    print("Misspelled Words:", misspelled)

    # 2. Spelling Correction
    corrected = correct_spelling(input_text)
    print("Corrected Text:", corrected)

    # 3. Autocomplete
    prefix = "Artificial intelligence will"
    completed = autocomplete_text(prefix)
    print("Autocomplete Result:\n", completed)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Original Text:  I realy lovve progrmming and artifical intellgence
Misspelled Words: ['intellgence', 'artifical', 'realy', 'lovve', 'progrmming']
Corrected Text: I really love programming and artificial intelligence
Autocomplete Result:
 Artificial intelligence will help them make decisions for themselves. But even in their case, their problems will be a far cry from the current ones the U.K. has adopted," said Ben Sheetsch, head of research at IBM, in a


In [6]:
from transformers import pipeline
from spellchecker import SpellChecker

# Initialize tools
spell = SpellChecker()
fill_mask = pipeline("fill-mask", model="bert-base-uncased")
text_gen = pipeline("text-generation", model="gpt2")

def check_spelling(text):
    words = text.split()
    misspelled = spell.unknown(words)
    return list(misspelled)

def correct_spelling(text):
    words = text.split()
    corrected = [spell.correction(w) if w in spell.unknown([w]) else w for w in words]
    return ' '.join(corrected)

def autocomplete_text(text, max_length=50):
    result = text_gen(text, max_length=max_length, num_return_sequences=1, do_sample=True)
    return result[0]['generated_text']

def main():
    print("\n--- Spelling Check, Correction, and Autocomplete ---")
    print("Type 'exit' to quit.\n")

    while True:
        user_input = input("Enter text: ")

        if user_input.lower() == "exit":
            break

        print("\nChecking spelling...")
        misspelled = check_spelling(user_input)
        print("Misspelled words:", misspelled if misspelled else "None")

        corrected = correct_spelling(user_input)
        print("Corrected text: ", corrected)

        print("\nAutocomplete suggestion:")
        auto = autocomplete_text(corrected)
        print(auto)
        print("\n" + "-"*60)

if __name__ == "__main__":
    main()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu
Device set to use cpu



--- Spelling Check, Correction, and Autocomplete ---
Type 'exit' to quit.

Enter text: hapy


Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Checking spelling...
Misspelled words: ['hapy']
Corrected text:  happy

Autocomplete suggestion:
happy one from time to time", said the official, who spoke on condition of anonymity.

Nasa described the satellite as the first visible instance that some have speculated could be a life-saving technique for extracting organ transplants from bodies not directly

------------------------------------------------------------
Enter text: exit
