# Levenshtein Distance Spelling Correction NLP

We are going to use the [Smart Home Commands Dataset](https://www.kaggle.com/bouweceunen/smart-home-commands-dataset) which contains smart home commands. We are going to use the [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) to do spelling correction on faulty sentences.

In [None]:
import os
from nltk import word_tokenize
import itertools
import pandas as pd

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df = pd.read_csv("/kaggle/input/smart-home-commands-dataset/dataset.csv")
sentences_df = df[['Sentence']]
sentences_df.head(10)

In [None]:
def get_plain_vocabluary():
    sentencess = [word_tokenize(sentence['Sentence']) for index, sentence in sentences_df.iterrows()]
    mergesentences = list(itertools.chain.from_iterable(sentencess))
    plainvocabulary = list(set(mergesentences))
    return plainvocabulary

In [None]:
def levenshtein_distance(s1, s2):
    if len(s1) > len(s2):
        s1, s2 = s2, s1

    distances = range(len(s1) + 1)
    for i2, c2 in enumerate(s2):
        distances_ = [i2+1]
        for i1, c1 in enumerate(s1):
            if c1 == c2:
                distances_.append(distances[i1])
            else:
                distances_.append(1 + min((distances[i1], distances[i1 + 1], distances_[-1])))
        distances = distances_
    return distances[-1]

In [None]:
def spelling_correction(sentence):
    splittedsentence = word_tokenize(sentence)
    vocwords = list(itertools.chain.from_iterable([get_plain_vocabluary()]))
    for i,word in enumerate(splittedsentence):
        if (word not in vocwords and not word.isdigit()): # ignore digits
            levdistances = []
            for vocword in vocwords:
                levdistances.append(levenshtein_distance(word,vocword))
            splittedsentence[i] = vocwords[levdistances.index(min(levdistances))]
        else:
            splittedsentence[i] = word
    return ' '.join(splittedsentence)

In [None]:
# 1 word is not spelled correctly "lihgts"
print(spelling_correction("Turn off the lihgts?"))

In [None]:
# 1 word is not spelled correctly "Opn"
print(spelling_correction("Opn the garage door."))

In [None]:
# 2 words are not spelled correctly "youu" "doorr"
print(spelling_correction("Can youu please open the doorr."))

In [None]:
# 2 words are not spelled correctly "lihts" "rooom"
print(spelling_correction("Turn off the lihts in the dining rooom."))