In [29]:
import re
import nltk
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [30]:
#sample dataset of correctly spelled and misspelled words
correct_words=['hello','world','python','spell','language','model','check']
misspelled_words=['helo','worl','pythoon','spl','langage','moel','chek']
print(correct_words)
print()
print(misspelled_words)


['hello', 'world', 'python', 'spell', 'language', 'model', 'check']

['helo', 'worl', 'pythoon', 'spl', 'langage', 'moel', 'chek']


In [31]:
#combile correct & misspelled into a single dataset
all_words=correct_words+misspelled_words
print(all_words)

['hello', 'world', 'python', 'spell', 'language', 'model', 'check', 'helo', 'worl', 'pythoon', 'spl', 'langage', 'moel', 'chek']


In [32]:
labels=[1] *len(correct_words)+[0] *len(misspelled_words)
labels


[1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]

In [33]:
#Preprocess the data
def preprocess_text(text):
 text=re.sub(r'\b\w\b','',text) #remove single characters
 return text.lower()
all_words=[preprocess_text(word) for word in all_words]
print(all_words)


['hello', 'world', 'python', 'spell', 'language', 'model', 'check', 'helo', 'worl', 'pythoon', 'spl', 'langage', 'moel', 'chek']


In [34]:
#Split the dataset into train & test
xtrain,xtest,ytrain,ytest=train_test_split(all_words,labels,test_size=0.2,random_state=4)
xtrain,ytrain

(['pythoon',
  'check',
  'chek',
  'python',
  'hello',
  'moel',
  'worl',
  'world',
  'model',
  'helo',
  'spl'],
 [0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0])

In [35]:
xtest,ytest

(['language', 'spell', 'langage'], [1, 1, 0])

In [36]:

#Vectorize the words with BOW repr
cv=CountVectorizer()
xtrain_cv=cv.fit_transform(xtrain)
xtest_cv=cv.transform(xtest)
print(xtrain_cv)

  (0, 7)	1
  (1, 0)	1
  (2, 1)	1
  (3, 6)	1
  (4, 2)	1
  (5, 5)	1
  (6, 9)	1
  (7, 10)	1
  (8, 4)	1
  (9, 3)	1
  (10, 8)	1


In [23]:
!pip install language_tool_python

Collecting language_tool_python
  Downloading language_tool_python-2.7.1-py3-none-any.whl (34 kB)
Installing collected packages: language_tool_python
Successfully installed language_tool_python-2.7.1


In [37]:
print(xtest_cv)





In [39]:
#Classifier model with Naive Bayes Algorithm
clf=MultinomialNB()
clf.fit(xtrain_cv,ytrain)
#test
ypred=clf.predict(xtest_cv)
#Evaluate
accuracy=accuracy_score(ytest,ypred)
print(f"Accuracy: {accuracy*100:.2f}%")


Accuracy: 33.33%


In [42]:
#Test the spell check
def spell_check(test_word):
 test_word_vector=cv.transform([preprocess_text(test_word)])
 prediction=clf.predict(test_word_vector)

 if prediction[0]==1:
  print(f"{test_word} is spelled correctly.")
 else:
  print(f"{test_word} is likely misspelled ")



In [43]:
spell_check('helo')

helo is likely misspelled 


In [47]:
!pip install pyspellchecker






In [45]:
spell_check('python')


python is spelled correctly.


In [48]:
import language_tool_python
from spellchecker import SpellChecker

# Initialize language tool and spell checker
tool = language_tool_python.LanguageTool('en-GB')
spell_checker = SpellChecker()

# Function to detect and correct errors in a sentence
def correct_errors(sentence):
    # Detect errors using language tool
    matches = tool.check(sentence)

    # Correct errors using spell checker
    corrected_sentence = sentence
    for mistake in matches:
        start, end = mistake.offset, mistake.offset + mistake.errorLength
        incorrect_word = sentence[start:end]
        corrected_word = spell_checker.correction(incorrect_word)
        corrected_sentence = corrected_sentence[:start] + corrected_word + corrected_sentence[end:]

    return corrected_sentence

# Example usage
text = "This is an exemple sentece."
corrected_text = correct_errors(text)
print("Original Text:", text)
print("Corrected Text:", corrected_text)


Original Text: This is an exemple sentece.
Corrected Text: This is an example sentence.
