# Using Sentence Embedding

In [8]:
# %%
from sentence_transformers import SentenceTransformer, util
import numpy as np
import torch

# %%
# Load the data from the text file
with open('ProductsList.txt', 'r', encoding="utf8") as f:
    products = f.readlines()

# %%
# Initialize the sentence transformer model for generating embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for each product
product_embeddings = model.encode(products, convert_to_tensor=True)

In [9]:
# %%
# Define a function to search for similar products
def search_products(query, k):
    query_embedding = model.encode(query, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(query_embedding, product_embeddings)[0]
    top_results = torch.topk(cosine_scores, k=k)

    print("Query:", query)
    for score, idx in zip(top_results[0], top_results[1]):
        print("\nScore:", score.item())
        print("Product Details:", products[idx])

# %%
# Example query
search_query = "Show me watches from sellers with more than 90% positive ratings."
search_products(search_query, 2)

Query: Show me watches from sellers with more than 90% positive ratings.

Score: 0.5897935628890991
Product Details: Product 12: Product Name = High Quality Wrist Watch For Men & Boys| Decent Wrist Leather Strap Attractive Dial, Product Category = Watches Sunglasses Jewellery/Watches/Men/Fashion, Brand Name = No Brand, Seller Name = Maal-Lo, URL = https://www.daraz.pk/products/-i409485404-s1960964280.html?search=1, Price Details = Original: Rs. 1000, Discounted: Rs. 699 | Original: Rs. 1000, Discounted: Rs. 900 | Original: Rs. 1000, Discounted: Rs. 799 | Original: Rs. 1000, Discounted: Rs. 580 | Original: Rs. 1000, Discounted: Rs. 590 | Original: Rs. 1000, Discounted: Rs. 580, Positive Seller Ratings = 90%, Ship on Time = 98%, Return Policy = 14 days free & easy return (Change of mind is not applicable)


Score: 0.5287548303604126
Product Details: Product 08: Product Name = OMG's Stylish watch for men , steel Built Design , Heavy Weight Watch in Fashion and for Casual use, Product Cate

# Using VectorDB

In [10]:
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from sentence_transformers import SentenceTransformer
from langchain.embeddings import GPT4AllEmbeddings

In [11]:
query = "Show me watches from sellers with more than 90% positive ratings."

In [12]:
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=650, chunk_overlap=0)

In [13]:
loader = TextLoader("ProductsList.txt")
docs = loader.load()
splits = text_splitter.split_documents(docs)

Created a chunk of size 697, which is longer than the specified 650
Created a chunk of size 751, which is longer than the specified 650


In [14]:
persist_directory = 'chroma/Retrieval02'
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=GPT4AllEmbeddings(),
    persist_directory=persist_directory
)
vectordb.persist()

In [15]:
docs = vectordb.similarity_search_with_score(query, k=2)
for result in docs:
    print("\n")
    print(result[1])
    print(result[0].page_content)



0.8202949862541257
Product 12: Product Name = High Quality Wrist Watch For Men & Boys| Decent Wrist Leather Strap Attractive Dial, Product Category = Watches Sunglasses Jewellery/Watches/Men/Fashion, Brand Name = No Brand, Seller Name = Maal-Lo, URL = https://www.daraz.pk/products/-i409485404-s1960964280.html?search=1, Price Details = Original: Rs. 1000, Discounted: Rs. 699 | Original: Rs. 1000, Discounted: Rs. 900 | Original: Rs. 1000, Discounted: Rs. 799 | Original: Rs. 1000, Discounted: Rs. 580 | Original: Rs. 1000, Discounted: Rs. 590 | Original: Rs. 1000, Discounted: Rs. 580, Positive Seller Ratings = 90%, Ship on Time = 98%, Return Policy = 14 days free & easy return (Change of mind is not applicable)


0.9422445279893952
Product 08: Product Name = OMG's Stylish watch for men , steel Built Design , Heavy Weight Watch in Fashion and for Casual use, Product Category = Watches Sunglasses Jewellery/Watches/Men/Fashion, Brand Name = No Brand, Seller Name = OMGs, URL = https://www.da

In [16]:
print("question = \"" + query + "\"")
count = 1
for result in docs:
    print(f"response" + str(count) + " = \"" + result[0].page_content + "\"")  
    count+=1

question = "Show me watches from sellers with more than 90% positive ratings."
response1 = "Product 12: Product Name = High Quality Wrist Watch For Men & Boys| Decent Wrist Leather Strap Attractive Dial, Product Category = Watches Sunglasses Jewellery/Watches/Men/Fashion, Brand Name = No Brand, Seller Name = Maal-Lo, URL = https://www.daraz.pk/products/-i409485404-s1960964280.html?search=1, Price Details = Original: Rs. 1000, Discounted: Rs. 699 | Original: Rs. 1000, Discounted: Rs. 900 | Original: Rs. 1000, Discounted: Rs. 799 | Original: Rs. 1000, Discounted: Rs. 580 | Original: Rs. 1000, Discounted: Rs. 590 | Original: Rs. 1000, Discounted: Rs. 580, Positive Seller Ratings = 90%, Ship on Time = 98%, Return Policy = 14 days free & easy return (Change of mind is not applicable)"
response2 = "Product 08: Product Name = OMG's Stylish watch for men , steel Built Design , Heavy Weight Watch in Fashion and for Casual use, Product Category = Watches Sunglasses Jewellery/Watches/Men/Fashion,

In [17]:
# from transformers import AutoModelForCausalLM, AutoTokenizer
# from transformers.generation import GenerationConfig
# import warnings
# warnings.filterwarnings("ignore")
# import textwrap

# !pip install tiktoken
# !pip install tiktoken transformers_stream_generator einops optimum auto-gptq
# tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True)
# from transformers import pipeline
# pipe = pipeline("text-generation", model="Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True)

# question = "Show me watches from sellers with more than 90% positive ratings."
# response1 = "Product 12: Product Name = High Quality Wrist Watch For Men & Boys| Decent Wrist Leather Strap Attractive Dial, Product Category = Watches Sunglasses Jewellery/Watches/Men/Fashion, Brand Name = No Brand, Seller Name = Maal-Lo, URL = https://www.daraz.pk/products/-i409485404-s1960964280.html?search=1, Price Details = Original: Rs. 1000, Discounted: Rs. 699 | Original: Rs. 1000, Discounted: Rs. 900 | Original: Rs. 1000, Discounted: Rs. 799 | Original: Rs. 1000, Discounted: Rs. 580 | Original: Rs. 1000, Discounted: Rs. 590 | Original: Rs. 1000, Discounted: Rs. 580, Positive Seller Ratings = 90%, Ship on Time = 98%, Return Policy = 14 days free & easy return (Change of mind is not applicable)"
# response2 = "Product 08: Product Name = OMG's Stylish watch for men , steel Built Design , Heavy Weight Watch in Fashion and for Casual use, Product Category = Watches Sunglasses Jewellery/Watches/Men/Fashion, Brand Name = No Brand, Seller Name = OMGs, URL = https://www.daraz.pk/products/-i433228448-s2139698887.html?search=1, Price Details = Original: Rs. 2500, Discounted: Rs. 2199, Positive Seller Ratings = 96%, Ship on Time = 100%, Return Policy = 14 days free & easy return (Change of mind is not applicable)"

# def get_completion(prompt):
#     messages = [{
#         "role": "user", 
#         "content": prompt }]
#     prompt2 = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
#     outputs = pipe(prompt2, max_new_tokens=400, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
#     return outputs[0]["generated_text"]

# prompt = f"""
# Based on the following information:\n\n
# 1. {response1}\n\n
# 2. {response2}\n\n
# 3. {response3}\n\n
# Please provide a detailed answer to the question: {question}.
# Your answer should integrate the essence of all three responses, providing a unified answer that leverages the \
# diverse perspectives or data points provided by three responses.
# """

# response = get_completion(prompt)
# print(response)