### Using NLP for Text Data Quality
**Objective**: Enhance text data quality using NLP techniques.

**Task**: Spelling Corrections

**Steps**:
1. Data Set: Import a dataset containing text reviews with spelling errors.
2. Apply Corrections: Use a spell-checker from an NLP library to correct spelling mistakes.
3. Verify Improvements: Review the corrections to ensure data quality improvement.

In [None]:
# write your code from here


In [2]:
import pandas as pd
from textblob import TextBlob
import unittest

# Sample data
data = {
    'review_id': [1, 2, 3, 4],
    'review': [
        "This prodcut is amazng and usefull.",
        "Excellnt qulaity but slow dellivery.",
        "Horrble servce, will not recomend.",
        "",  # Empty string
    ]
}
df = pd.DataFrame(data)

# Improved spelling correction function
def correct_spelling(text):
    if not isinstance(text, str):
        return ''
    if text.strip() == "":
        return ''
    try:
        corrected = str(TextBlob(text).correct())
        return corrected
    except Exception as e:
        print(f"Error correcting text: {e}")
        return text

# Apply correction
df['corrected_review'] = df['review'].apply(correct_spelling)

# Display original vs corrected
print("Original vs Corrected Reviews:")
print(df[['review_id', 'review', 'corrected_review']])


ModuleNotFoundError: No module named 'textblob'

In [3]:
class TestSpellingCorrection(unittest.TestCase):

    def test_typical_sentence(self):
        self.assertEqual(
            correct_spelling("This prodcut is amazng."),
            "This product is amazing."
        )

    def test_empty_string(self):
        self.assertEqual(correct_spelling(""), "")

    def test_none_input(self):
        self.assertEqual(correct_spelling(None), "")

    def test_numeric_string(self):
        self.assertEqual(correct_spelling("12345"), "12345")

    def test_single_word(self):
        self.assertEqual(correct_spelling("reccomend"), "recommend")


# Run tests
if __name__ == "__main__":
    unittest.main(argv=[''], exit=False)


NameError: name 'unittest' is not defined