In [14]:
pip install pyspellchecker


Collecting pyspellchecker
  Downloading pyspellchecker-0.8.2-py3-none-any.whl.metadata (9.4 kB)
Downloading pyspellchecker-0.8.2-py3-none-any.whl (7.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: pyspellchecker
Successfully installed pyspellchecker-0.8.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import re
import pandas as pd
from spellchecker import SpellChecker

# A dictionary of known synonyms or forced corrections
SYNONYMS = {
    "appl": "apple",
    "appin": "apple",
    "aplin": "apple",
    "appln": "apple",
    "aapl": "apple",
    "nvda": "nvidia",
    # Add as many forced mappings as you need
}

def load_domain_dictionary(csv_file_path, text_column='ner_text_cleaned'):
    df = pd.read_csv(csv_file_path)
    texts = df[text_column].dropna()
    
    all_words = []
    for line in texts:
        line_clean = re.sub(r'[^\w\s]', '', line.lower())
        words = line_clean.split()
        all_words.extend(words)
    
    return all_words

def build_spellchecker(csv_file_path):
    # Increase distance to 3 for more flexible corrections (optional)
    spell = SpellChecker(distance=3)

    domain_words = load_domain_dictionary(csv_file_path)
    spell.word_frequency.load_words(domain_words)

    # Boost stock-related words; large frequency ensures they outrank common English words
    stock_names = ["apple", "tesla", "nvidia", "amazon", "google", "meta", "microsoft"]
    for name in stock_names:
        spell.word_frequency.add(name, 1000000)  # Very high frequency to overshadow common words
    
    return spell

def correct_query(spell, user_query):
    query_clean = re.sub(r'[^\w\s]', '', user_query.lower())
    query_words = query_clean.split()
    
    corrected_words = []
    for w in query_words:
        # 1) Force known synonyms first
        if w in SYNONYMS:
            corrected_words.append(SYNONYMS[w])
        # 2) If recognized, keep as-is
        elif w in spell:
            corrected_words.append(w)
        else:
            # 3) Otherwise, use spell.correction
            correction = spell.correction(w)
            corrected_words.append(correction if correction else w)
    
    return " ".join(corrected_words)

def search_engine_demo():
    csv_file_path = "NER_with_sentiment.csv"
    spell = build_spellchecker(csv_file_path)
    
    user_query = input("Enter search query: ")
    suggestion = correct_query(spell, user_query)
    
    if suggestion != user_query.lower():
        print(f"Did you mean: {suggestion}?")
    else:
        print("Your query looks good!")

if __name__ == "__main__":
    search_engine_demo()


  df = pd.read_csv(csv_file_path)


Did you mean: samsung galaxy?
