In [1]:
import numpy as np

def load_glove_model(glove_file):
    print("Loading Glove Model...")
    model = {}
    with open(glove_file, 'r', encoding="utf8") as f:
        for line in f:
            split_line = line.split()
            word = split_line[0]
            embedding = np.array(split_line[1:], dtype='float32')
            model[word] = embedding
    print(f"{len(model)} words loaded.")
    return model

# Usage example:
glove_file = "C:\\Users\\rohit\\OneDrive\\Desktop\\AIPALLETE\\glove.6B\\glove.6B.50d.txt"  # Path to the small-sized GloVe file (e.g., 50 dimensions)
glove_model = load_glove_model(glove_file)

# Get word embedding for a word
embedding = glove_model.get("king")
print(embedding)


Loading Glove Model...
400000 words loaded.
[ 0.50451   0.68607  -0.59517  -0.022801  0.60046  -0.13498  -0.08813
  0.47377  -0.61798  -0.31012  -0.076666  1.493    -0.034189 -0.98173
  0.68229   0.81722  -0.51874  -0.31503  -0.55809   0.66421   0.1961
 -0.13495  -0.11476  -0.30344   0.41177  -2.223    -1.0756   -1.0783
 -0.34354   0.33505   1.9927   -0.04234  -0.64319   0.71125   0.49159
  0.16754   0.34344  -0.25663  -0.8523    0.1661    0.40102   1.1685
 -1.0137   -0.21585  -0.15155   0.78321  -0.91241  -1.6106   -0.64426
 -0.51042 ]


In [2]:
import pandas as pd
result_df=pd.read_excel("3.xlsx")

In [4]:
result_df

Unnamed: 0.1,Unnamed: 0,Input Ingredient,Lens,Related Trend,doc_count,score,bg_count,_index,_type,_id,...,_source.refresh_month,_source.trend_type,_source.actual_trend_type,_source.look_up_years,_source.created_date,_source.updated_date,_source.reviewer,_source.reviewer_comment,_source.environment,_source.previous_client_justification
0,0,Almond,Ingredient,Almond Milk,8.653991e-03,262.211400,878454,trend-ai-classification-v2,_doc,ZD2ULJEB3R3lz87wGzmn,...,2024-06-30,ingredient,ingredient,2.0,2024-08-07T11:23:07.393765,2024-08-07T11:23:07.393765,,,PRODUCTION,Medium engagement with flat growth (relative t...
1,1,Almond,Ingredient,Almond,1.837933e-02,208.257100,4973237,trend-ai-classification-v2,_doc,Yj2ULJEB3R3lz87wGzmn,...,2024-06-30,ingredient,ingredient,2.0,2024-08-07T11:23:07.393765,2024-08-07T11:23:07.393765,,,PRODUCTION,High engagement with positive growth (relative...
2,2,Almond,Ingredient,Milk,1.066920e-02,24.713820,13865534,trend-ai-classification-v2,_doc,oT2ULJEB3R3lz87wGzqo,...,2024-06-30,ingredient,ingredient,2.0,2024-08-07T11:23:07.393765,2024-08-07T11:23:07.393765,,,PRODUCTION,High engagement with flat growth (relative to ...
3,3,Almond,Ingredient,Orgeat,7.963686e-05,15.208090,1319,,,,...,,,,,,,,,,
4,4,Almond,Ingredient,Cereal,1.419893e-03,11.172760,552867,trend-ai-classification-v2,_doc,xz2ULJEB3R3lz87wGzmn,...,2024-06-30,ingredient,ingredient,2.0,2024-08-07T11:23:07.393765,2024-08-07T11:23:07.393765,,,PRODUCTION,High engagement with flat growth (relative to ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1186,1186,Yoghurt,Product,Yoghurt Tea,2.142696e-06,5.941920,36,,,,...,,,,,,,,,,
1187,1187,Yoghurt,Product,Berry Yoghurt Smoothie,6.428087e-06,3.768215,309,,,,...,,,,,,,,,,
1188,1188,Yoghurt,Product,Fruit Smoothie,2.606946e-05,2.780901,5478,,,,...,,,,,,,,,,
1189,1189,Yoghurt,Product,Strawberry Milk Drink,0.000000e+00,2.640904,9,,,,...,,,,,,,,,,


In [10]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import numpy as np
import faiss

# Load model and prebuild FAISS index with keyword embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Assuming 'result_df' is your dataframe containing the 'Related Trend' column
keywords = result_df['Related Trend'].dropna().astype(str).tolist()  # Clean NaN and convert to string

# Encode keywords
keyword_embeddings = model.encode(keywords)

# Build FAISS index for fast search
dimension = keyword_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)  # L2 distance metric for similarity search
index.add(np.array(keyword_embeddings))






In [15]:
# Function for real-time suggestions with fuzzy matching
def get_autocomplete_suggestions(user_input):
    # 1. Get FAISS top-k suggestions (based on semantic similarity)
    input_embedding = model.encode([user_input])
    _, indices = index.search(np.array(input_embedding), k=10)  # Top 10 semantic matches
    faiss_suggestions = [keywords[i] for i in indices[0]]

    # 2. Apply fuzzywuzzy to refine the suggestions further
    fuzzy_matches = process.extract(user_input, faiss_suggestions, scorer=fuzz.ratio, limit=5)

    # Return top fuzzy matches along with their scores
    return fuzzy_matches

# Example usage
user_input = ""

suggestions = get_autocomplete_suggestions(user_input)
print("Fuzzy suggestions based on input:", suggestions)

Fuzzy suggestions based on input: [('Green', 100), ('Green', 100), ('Green', 100), ('Green', 100), ('Color', 20)]
