# Using Sentence Embedding

In [15]:
# %%
from sentence_transformers import SentenceTransformer, util
import numpy as np
import torch

# %%
# Load the data from the text file
with open('FinalProductsList.txt', 'r', encoding="utf8") as f:
    products = f.readlines()

# %%
# Initialize the sentence transformer model for generating embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for each product
product_embeddings = model.encode(products, convert_to_tensor=True)

In [16]:
# %%
# Define a function to search for similar products
def search_products(query, k):
    query_embedding = model.encode(query, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(query_embedding, product_embeddings)[0]
    top_results = torch.topk(cosine_scores, k=k)

    print("Query:", query)
    for score, idx in zip(top_results[0], top_results[1]):
        print("\nScore:", score.item())
        print("Product Details:", products[idx])

# %%
# Example query
search_query = "Show me watches under Rs. 500"
search_products(search_query, 2)

Query: Show me watches under Rs. 500

Score: 0.5022721290588379
Product Details: Product 15: Product Name = BUISNESS WATCH CLASSIC LUXURY ROUND DILE STEEL STRAPS WITH SILVER HAND|WRIST BRACELT CHAIN FOR BOYS MENS, Product Category = Watches Sunglasses Jewellery/Watches/Men/Business, Brand Name = No Brand, Seller Name = Finds Shop, URL = https://www.daraz.pk/products/-i438130449-s2118958341.html?search=1, Price Details = Original: Rs. 999, Discounted: Rs. 457 | Original: Rs. 999, Discounted: Rs. 529, Positive Seller Ratings = 82%, Ship on Time = 95%, Return Policy = 14 days free & easy return (Change of mind is not applicable)


Score: 0.4969567060470581
Product Details: Product 03: Product Name = Sports Digtal Fashion Watch Women Men Square LED Watch Silicone Electronic Watch Women's Watches Clock, Product Category = Watches Sunglasses Jewellery/Watches/Men/Sports, Brand Name = No Brand, Seller Name = Modern watch company, URL = https://www.daraz.pk/products/led-i431801201-s2265468211.

# Using VectorDB

In [17]:
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from sentence_transformers import SentenceTransformer
from langchain.embeddings import GPT4AllEmbeddings

In [18]:
query = "Show me watches under Rs. 200"

In [19]:
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=650, chunk_overlap=0)

In [20]:
loader = TextLoader("FinalProductsList.txt")
docs = loader.load()
splits = text_splitter.split_documents(docs)

Created a chunk of size 675, which is longer than the specified 650
Created a chunk of size 708, which is longer than the specified 650
Created a chunk of size 715, which is longer than the specified 650
Created a chunk of size 697, which is longer than the specified 650
Created a chunk of size 751, which is longer than the specified 650


In [21]:
persist_directory = 'chroma/ProductsSearch'
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=GPT4AllEmbeddings(),
    persist_directory=persist_directory
)
vectordb.persist()

In [22]:
docs = vectordb.similarity_search_with_score(query, k=2)
for result in docs:
    print("\n")
    print(result[1])
    print(result[0].page_content)



0.9470004697188122
Product 03: Product Name = Sports Digtal Fashion Watch Women Men Square LED Watch Silicone Electronic Watch Women's Watches Clock, Product Category = Watches Sunglasses Jewellery/Watches/Men/Sports, Brand Name = No Brand, Seller Name = Modern watch company, URL = https://www.daraz.pk/products/led-i431801201-s2265468211.html?search=1, Price Details = Original: Rs. 899, Discounted: Rs. 300 | Original: Rs. 650, Discounted: Rs. 199, Positive Seller Ratings = 79%, Ship on Time = 97%, Return Policy = 14 days free & easy return (Change of mind is not applicable)


0.9470004697188122
Product 03: Product Name = Sports Digtal Fashion Watch Women Men Square LED Watch Silicone Electronic Watch Women's Watches Clock, Product Category = Watches Sunglasses Jewellery/Watches/Men/Sports, Brand Name = No Brand, Seller Name = Modern watch company, URL = https://www.daraz.pk/products/led-i431801201-s2265468211.html?search=1, Price Details = Original: Rs. 899, Discounted: Rs. 300 | Ori

In [23]:
# with open("search_results.txt", "w") as file:
#     count = 1
#     for result in docs:
#         file.write(f"response" + str(count) + " = \"" + result[0].page_content + "\"\n")  
#         count+=1

# with open("search_results.txt", "r") as file:
#     print(file.read())

In [24]:
count = 1
for result in docs:
    print(f"response" + str(count) + " = \"" + result[0].page_content + "\"")  
    count+=1

response1 = "Product 03: Product Name = Sports Digtal Fashion Watch Women Men Square LED Watch Silicone Electronic Watch Women's Watches Clock, Product Category = Watches Sunglasses Jewellery/Watches/Men/Sports, Brand Name = No Brand, Seller Name = Modern watch company, URL = https://www.daraz.pk/products/led-i431801201-s2265468211.html?search=1, Price Details = Original: Rs. 899, Discounted: Rs. 300 | Original: Rs. 650, Discounted: Rs. 199, Positive Seller Ratings = 79%, Ship on Time = 97%, Return Policy = 14 days free & easy return (Change of mind is not applicable)"
response2 = "Product 03: Product Name = Sports Digtal Fashion Watch Women Men Square LED Watch Silicone Electronic Watch Women's Watches Clock, Product Category = Watches Sunglasses Jewellery/Watches/Men/Sports, Brand Name = No Brand, Seller Name = Modern watch company, URL = https://www.daraz.pk/products/led-i431801201-s2265468211.html?search=1, Price Details = Original: Rs. 899, Discounted: Rs. 300 | Original: Rs. 650,

In [25]:
# from transformers import AutoModelForCausalLM, AutoTokenizer
# from transformers.generation import GenerationConfig
# import warnings
# warnings.filterwarnings("ignore")
# import textwrap

# !pip install tiktoken
# !pip install tiktoken transformers_stream_generator einops optimum auto-gptq
# tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True)
# from transformers import pipeline
# pipe = pipeline("text-generation", model="Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True)

# response1 ="Product 03: Product Name = Sports Digtal Fashion Watch Women Men Square LED Watch Silicone Electronic Watch Women's Watches Clock, Product Category = Watches Sunglasses Jewellery/Watches/Men/Sports, Brand Name = No Brand, Seller Name = Modern watch company, URL = https://www.daraz.pk/products/led-i431801201-s2265468211.html?search=1, Price Details = Original: Rs. 899, Discounted: Rs. 300 | Original: Rs. 650, Discounted: Rs. 199, Positive Seller Ratings = 79%, Ship on Time = 97%, Return Policy = 14 days free & easy return (Change of mind is not applicable)"
# response2 ="Product 03: Product Name = Sports Digtal Fashion Watch Women Men Square LED Watch Silicone Electronic Watch Women's Watches Clock, Product Category = Watches Sunglasses Jewellery/Watches/Men/Sports, Brand Name = No Brand, Seller Name = Modern watch company, URL = https://www.daraz.pk/products/led-i431801201-s2265468211.html?search=1, Price Details = Original: Rs. 899, Discounted: Rs. 300 | Original: Rs. 650, Discounted: Rs. 199, Positive Seller Ratings = 79%, Ship on Time = 97%, Return Policy = 14 days free & easy return (Change of mind is not applicable)"
# response3 ="Product 15: Product Name = BUISNESS WATCH CLASSIC LUXURY ROUND DILE STEEL STRAPS WITH SILVER HAND|WRIST BRACELT CHAIN FOR BOYS MENS, Product Category = Watches Sunglasses Jewellery/Watches/Men/Business, Brand Name = No Brand, Seller Name = Finds Shop, URL = https://www.daraz.pk/products/-i438130449-s2118958341.html?search=1, Price Details = Original: Rs. 999, Discounted: Rs. 457 | Original: Rs. 999, Discounted: Rs. 529, Positive Seller Ratings = 82%, Ship on Time = 95%, Return Policy = 14 days free & easy return (Change of mind is not applicable)"

# question = "Show me watches under Rs. 200"

# def get_completion(prompt):
#     messages = [{
#         "role": "user", 
#         "content": prompt }]
#     prompt2 = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
#     outputs = pipe(prompt2, max_new_tokens=400, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
#     return outputs[0]["generated_text"]

# prompt = f"""
# Based on the following information:\n\n
# 1. {response1}\n\n
# 2. {response2}\n\n
# 3. {response3}\n\n
# Please provide a detailed answer to the question: {question}.
# Your answer should integrate the essence of all three responses, providing a unified answer that leverages the \
# diverse perspectives or data points provided by three responses.
# """

# response = get_completion(prompt)
# print(response)