# Using Sentence Embedding

In [2]:
# %%
from sentence_transformers import SentenceTransformer, util
import numpy as np
import torch

# %%
# Load the data from the text file
with open('DarazData02.txt', 'r', encoding="utf8") as f:
    products = f.readlines()

# %%
# Initialize the sentence transformer model for generating embeddings
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for each product
product_embeddings = model.encode(products, convert_to_tensor=True)

In [3]:
# %%
# Define a function to search for similar products
def search_products(query, k):
    query_embedding = model.encode(query, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(query_embedding, product_embeddings)[0]
    top_results = torch.topk(cosine_scores, k=k)

    print("Query:", query)
    for score, idx in zip(top_results[0], top_results[1]):
        print("\nScore:", score.item())
        print("Product Details:", products[idx])

# %%
# Example query
search_query = "What are your refund policies?"
search_products(search_query, 2)

Query: What are your refund policies?

Score: 0.5706478953361511
Product Details: 1.19 “Returns and Refunds Policy” shall mean the applicable Company policies which govern the procedure for returns and refunds of Products by Customers on the relevant Channels located at Returns and Refunds Policy of Pakistan.


Score: 0.549439549446106
Product Details: Home & Living Bedding & Bath, Furniture & Lighting, Kitchen & Dining, Home Décor, Home Improvements, Household & Home Storage Supplies, Lawn & Garden,Other Accessories Change of mind isnot applicable for return and refund. If the item received is damaged, defective, incorrect, or incomplete, a refund will be issued based on Daraz's assessment. Note: For device-related issues after usage or expiration of return policy period, please check if the item is covered under Seller orBrand Warranty. Refer to our Warranty Policy for information on the different warranty types and ways to contact the seller/manufacturer. Items that are non-returnab

# Using VectorDB

In [4]:
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from sentence_transformers import SentenceTransformer
from langchain.embeddings import GPT4AllEmbeddings

In [5]:
query = search_query

In [6]:
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=650, chunk_overlap=0)

In [7]:
loader = TextLoader("DarazData02.txt")
docs = loader.load()
splits = text_splitter.split_documents(docs)

Created a chunk of size 26958, which is longer than the specified 650
Created a chunk of size 1422, which is longer than the specified 650
Created a chunk of size 1350, which is longer than the specified 650
Created a chunk of size 685, which is longer than the specified 650
Created a chunk of size 868, which is longer than the specified 650
Created a chunk of size 871, which is longer than the specified 650
Created a chunk of size 656, which is longer than the specified 650
Created a chunk of size 1285, which is longer than the specified 650
Created a chunk of size 720, which is longer than the specified 650
Created a chunk of size 2500, which is longer than the specified 650
Created a chunk of size 1005, which is longer than the specified 650
Created a chunk of size 743, which is longer than the specified 650
Created a chunk of size 2032, which is longer than the specified 650
Created a chunk of size 762, which is longer than the specified 650
Created a chunk of size 945, which is lo

In [8]:
persist_directory = 'chroma/RetrievalDarazData'
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=GPT4AllEmbeddings(),
    persist_directory=persist_directory
)
vectordb.persist()

In [9]:
docs = vectordb.similarity_search_with_score(query, k=2)
for result in docs:
    print("\n")
    print(result[1])
    print(result[0].page_content)



0.9012474417686462
Home & Living Bedding & Bath, Furniture & Lighting, Kitchen & Dining, Home Décor, Home Improvements, Household & Home Storage Supplies, Lawn & Garden,Other Accessories Change of mind isnot applicable for return and refund. If the item received is damaged, defective, incorrect, or incomplete, a refund will be issued based on Daraz's assessment. Note: For device-related issues after usage or expiration of return policy period, please check if the item is covered under Seller orBrand Warranty. Refer to our Warranty Policy for information on the different warranty types and ways to contact the seller/manufacturer. Items that are non-returnable: Any custom-made items


0.9291806817054749
What is the cancellation and return policy for Buy More, Save More orders? Partial cancellations are not allowed but partial returns are allowed on Buy More, Save More orders i.e. you will not be able to cancel 1 or 2 products in your order and would instead have to cancel the entire or

In [10]:
print("question = \"" + query + "\"")
count = 1
for result in docs:
    print(f"response" + str(count) + " = \"" + result[0].page_content + "\"")  
    count+=1

question = "What are your refund policies?"
response1 = "Home & Living Bedding & Bath, Furniture & Lighting, Kitchen & Dining, Home Décor, Home Improvements, Household & Home Storage Supplies, Lawn & Garden,Other Accessories Change of mind isnot applicable for return and refund. If the item received is damaged, defective, incorrect, or incomplete, a refund will be issued based on Daraz's assessment. Note: For device-related issues after usage or expiration of return policy period, please check if the item is covered under Seller orBrand Warranty. Refer to our Warranty Policy for information on the different warranty types and ways to contact the seller/manufacturer. Items that are non-returnable: Any custom-made items"
response2 = "What is the cancellation and return policy for Buy More, Save More orders? Partial cancellations are not allowed but partial returns are allowed on Buy More, Save More orders i.e. you will not be able to cancel 1 or 2 products in your order and would instead

In [11]:
# from transformers import AutoModelForCausalLM, AutoTokenizer
# from transformers.generation import GenerationConfig
# import warnings
# warnings.filterwarnings("ignore")
# import textwrap

# !pip install tiktoken
# !pip install tiktoken transformers_stream_generator einops optimum auto-gptq
# tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True)
# from transformers import pipeline
# pipe = pipeline("text-generation", model="Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True)

# question = "Show me watches from sellers with more than 90% positive ratings."
# response1 = "Product 12: Product Name = High Quality Wrist Watch For Men & Boys| Decent Wrist Leather Strap Attractive Dial, Product Category = Watches Sunglasses Jewellery/Watches/Men/Fashion, Brand Name = No Brand, Seller Name = Maal-Lo, URL = https://www.daraz.pk/products/-i409485404-s1960964280.html?search=1, Price Details = Original: Rs. 1000, Discounted: Rs. 699 | Original: Rs. 1000, Discounted: Rs. 900 | Original: Rs. 1000, Discounted: Rs. 799 | Original: Rs. 1000, Discounted: Rs. 580 | Original: Rs. 1000, Discounted: Rs. 590 | Original: Rs. 1000, Discounted: Rs. 580, Positive Seller Ratings = 90%, Ship on Time = 98%, Return Policy = 14 days free & easy return (Change of mind is not applicable)"
# response2 = "Product 08: Product Name = OMG's Stylish watch for men , steel Built Design , Heavy Weight Watch in Fashion and for Casual use, Product Category = Watches Sunglasses Jewellery/Watches/Men/Fashion, Brand Name = No Brand, Seller Name = OMGs, URL = https://www.daraz.pk/products/-i433228448-s2139698887.html?search=1, Price Details = Original: Rs. 2500, Discounted: Rs. 2199, Positive Seller Ratings = 96%, Ship on Time = 100%, Return Policy = 14 days free & easy return (Change of mind is not applicable)"

# def get_completion(prompt):
#     messages = [{
#         "role": "user", 
#         "content": prompt }]
#     prompt2 = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
#     outputs = pipe(prompt2, max_new_tokens=400, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
#     return outputs[0]["generated_text"]

# prompt = f"""
# Based on the following information:\n\n
# 1. {response1}\n\n
# 2. {response2}\n\n
# 3. {response3}\n\n
# Please provide a detailed answer to the question: {question}.
# Your answer should integrate the essence of all three responses, providing a unified answer that leverages the \
# diverse perspectives or data points provided by three responses.
# """

# response = get_completion(prompt)
# print(response)

Based on the following information:


1. Product 12: Product Name = High Quality Wrist Watch For Men & Boys| Decent Wrist Leather Strap Attractive Dial, Product Category = Watches Sunglasses Jewellery/Watches/Men/Fashion, Brand Name = No Brand, Seller Name = Maal-Lo, URL = https://www.daraz.pk/products/-i409485404-s1960964280.html?search=1, Price Details = Original: Rs. 1000, Discounted: Rs. 699 | Original: Rs. 1000, Discounted: Rs. 900 | Original: Rs. 1000, Discounted: Rs. 799 | Original: Rs. 1000, Discounted: Rs. 580 | Original: Rs. 1000, Discounted: Rs. 590 | Original: Rs. 1000, Discounted: Rs. 580, Positive Seller Ratings = 90%, Ship on Time = 98%, Return Policy = 14 days free & easy return (Change of mind is not applicable)


2. Product 08: Product Name = OMG's Stylish watch for men , steel Built Design , Heavy Weight Watch in Fashion and for Casual use, Product Category = Watches Sunglasses Jewellery/Watches/Men/Fashion, Brand Name = No Brand, Seller Name = OMGs, URL = https://www.daraz.pk/products/-i433228448-s2139698887.html?search=1, Price Details = Original: Rs. 2500, Discounted: Rs. 2199, Positive Seller Ratings = 96%, Ship on Time = 100%, Return Policy = 14 days free & easy return (Change of mind is not applicable)


Please provide a detailed answer to the question: Show me watches from sellers with more than 90% positive ratings..
Your answer should integrate the essence of all three responses, providing a unified answer that leverages the diverse perspectives or data points provided by three responses.
<|im_end|>
<|im_start|>assistant
Based on the given information, we can identify watches from sellers who have more than 90% positive ratings by analyzing the product details and seller ratings provided.
For example, the product "OMG's Stylish watch for men" has a price of Rs. 2500 and an average rating of 96%. Additionally, the seller "OMGs" has received positive ratings of 96% on their website.
On the other hand, the product "High Quality Wrist Watch For Men & Boys" has a price of Rs. 1000 and an average rating of 100%. The seller "Maal-Lo" has received negative ratings of 90% on their website.
Therefore, based on the analysis, watches from sellers with more than 90% positive ratings include the "OMG's Stylish watch for men" and "High Quality Wrist Watch For Men & Boys". These watches are likely to be well-received by customers due to their high quality and stylish design.