<a href="https://colab.research.google.com/github/navneetkrc/Deep_learning_experiments/blob/master/symspell_for_dictionary_creations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install symspellpy

In [3]:
import symspellpy
from symspellpy import SymSpell, Verbosity
import pandas as pd

def create_product_dictionary(product_names, output_file="product_dictionary.txt"):
    """
    Create a dictionary file from product names for use with SymSpell

    Args:
        product_names (list): List of product names
        output_file (str): Path to output dictionary file
    """
    # Create dictionary file with product names and count of 1
    with open(output_file, "w", encoding="utf-8") as f:
        for product in product_names:
            # Format: word, frequency
            f.write(f"{product.lower()},1\n")

            # Also add individual words from multi-word product names
            words = product.lower().split()
            if len(words) > 1:
                for word in words:
                    if len(word) > 2:  # Only add words longer than 2 characters
                        f.write(f"{word},1\n")

    print(f"Dictionary created at {output_file}")
    return output_file

def initialize_symspell(dictionary_path, max_edit_distance=2):
    """
    Initialize SymSpell with the product dictionary

    Args:
        dictionary_path (str): Path to the dictionary file
        max_edit_distance (int): Maximum edit distance for spelling correction

    Returns:
        SymSpell: Initialized SymSpell object
    """
    sym_spell = SymSpell(max_dictionary_edit_distance=max_edit_distance)

    # Load dictionary with term_index=0 and count_index=1
    sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)

    return sym_spell

def test_spell_correction(sym_spell, misspelled_queries):
    """
    Test spell correction with some misspelled queries

    Args:
        sym_spell (SymSpell): Initialized SymSpell object
        misspelled_queries (list): List of misspelled queries

    Returns:
        DataFrame: Results of spell correction
    """
    results = []

    for query in misspelled_queries:
        suggestions = sym_spell.lookup(query.lower(), Verbosity.CLOSEST,
                                      max_edit_distance=2, include_unknown=True)

        if suggestions:
            for suggestion in suggestions[:3]:  # Get top 3 suggestions
                results.append({
                    "Misspelled": query,
                    "Suggestion": suggestion.term,
                    "Distance": suggestion.distance,
                    "Count": suggestion.count
                })
        else:
            results.append({
                "Misspelled": query,
                "Suggestion": "No suggestion",
                "Distance": "-",
                "Count": "-"
            })

    return pd.DataFrame(results)

# Example Samsung product names
samsung_products = [
    "Galaxy S22 Ultra",
    "Galaxy Z Fold 4",
    "Galaxy A53",
    "Galaxy Watch 5",
    "Galaxy Buds Pro",
    "Neo QLED 8K TV",
    "Odyssey G9 Monitor",
    "Galaxy Book2 Pro",
    "SmartThings Hub",
    "Bespoke Refrigerator"
]

# Example misspelled queries
misspelled_queries = [
    "galxy s22 ultr",
    "samsng neo qld",
    "galaxy wach",
    "z flod",
    "galxy buds",
    "samung book pro",
    "odysse monitor"
]

# Main execution
if __name__ == "__main__":
    # Create dictionary file
    dict_path = create_product_dictionary(samsung_products)

    # Initialize SymSpell with our dictionary
    sym_spell = initialize_symspell(dict_path)

    # Test with misspelled queries
    results_df = test_spell_correction(sym_spell, misspelled_queries)

    # Display results
    print("\nSpell Correction Results:")
    print(results_df)

    # Advanced usage: Create a more comprehensive dictionary with variants
    print("\nCreating comprehensive dictionary with variants...")
    all_terms = set()

    # Add original terms
    for product in samsung_products:
        product_lower = product.lower()
        all_terms.add(product_lower)

        # Add terms without spaces
        all_terms.add(product_lower.replace(" ", ""))

        # Add individual words
        for word in product_lower.split():
            if len(word) > 2:
                all_terms.add(word)

    # Write enhanced dictionary
    enhanced_dict_path = "enhanced_product_dictionary.txt"
    with open(enhanced_dict_path, "w", encoding="utf-8") as f:
        for term in all_terms:
            f.write(f"{term},1\n")

    # Initialize SymSpell with enhanced dictionary
    enhanced_sym_spell = initialize_symspell(enhanced_dict_path)

    # Test with the same misspelled queries
    enhanced_results_df = test_spell_correction(enhanced_sym_spell, misspelled_queries)

    # Display enhanced results
    print("\nEnhanced Spell Correction Results:")
    print(enhanced_results_df)

Dictionary created at product_dictionary.txt

Spell Correction Results:
        Misspelled       Suggestion  Distance  Count
0   galxy s22 ultr   galxy s22 ultr         3      0
1   samsng neo qld   samsng neo qld         3      0
2      galaxy wach      galaxy wach         3      0
3           z flod           z flod         3      0
4       galxy buds       galxy buds         3      0
5  samung book pro  samung book pro         3      0
6   odysse monitor   odysse monitor         3      0

Creating comprehensive dictionary with variants...

Enhanced Spell Correction Results:
        Misspelled       Suggestion  Distance  Count
0   galxy s22 ultr   galxy s22 ultr         3      0
1   samsng neo qld   samsng neo qld         3      0
2      galaxy wach      galaxy wach         3      0
3           z flod           z flod         3      0
4       galxy buds       galxy buds         3      0
5  samung book pro  samung book pro         3      0
6   odysse monitor   odysse monitor         3

