In [78]:
import sys
sys.path.append("..")
import os.path
import pandas as pd
import time
from tqdm import tqdm
import chromadb
from openai import OpenAI
import json

from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Qdrant
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationSummaryMemory, ConversationBufferMemory

In [9]:
class DeepInfraEmbeddings:
    def __init__(self, api_key, base_url, model="BAAI/bge-base-en-v1.5"):
        self.client = OpenAI(api_key=api_key, base_url=base_url)
        self.model = model

    def embed_documents(self, texts):
        if isinstance(texts, str):
            texts = [texts]

        embeddings = self.client.embeddings.create(
            model=self.model,
            input=texts,
            encoding_format="float"
        )

        return [embedding.embedding for embedding in embeddings.data]

    def embed_query(self, text):
        return self.embed_documents([text])[0]

In [10]:
COLLECTION_NAME = "big-basket-products-all"

# Create Chroma client
# client = chromadb.Client()
client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), 'vector_stores'))

# Load data
file_path = os.path.join('./data/bigBasketProducts.csv')
df = pd.read_csv(file_path)
# df = df[:1000]
metadatas = [{'source': int(df.loc[i][0]), 'row': i} for i in range(len(df))]
docs = df.apply(lambda x: x.to_json(), axis=1).tolist()

# Initialize DeepInfraEmbeddings with your API key and base URL
embeddings = DeepInfraEmbeddings(
    api_key="7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk",
    base_url="https://api.deepinfra.com/v1/openai"
)

# Create Chroma collection
vector_store = Chroma(
    collection_name=COLLECTION_NAME,
    embedding_function=embeddings,  # Pass the DeepInfraEmbeddings instance
    client=client,
    persist_directory = os.path.join(os.getcwd(), 'vector_stores')
)

In [11]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

In [12]:
docs = retriever.get_relevant_documents("what is skin care?")

In [13]:
for doc in docs:
    for k, v in doc:
        print(k, v)

id None
metadata {'row': 20544, 'source': 20545}
page_content {"index":20545,"product":"Vitamin E Face Wash","category":"Beauty & Hygiene","sub_category":"Skin Care","brand":"INATUR ","sale_price":315.0,"market_price":450.0,"type":"Face Care","rating":null,"description":"Inatur Vitamin E Face Cleanser is a mild and creamy formulation that removes dirt, impurities, and make-up gently. Being rich in anti-oxidants, it is effective in preserving the moisture balance of the skin. It leaves the skin nourished and hydrated making it look, soft, clean & healthy."}
type Document
id None
metadata {'row': 8225, 'source': 8226}
page_content {"index":8226,"product":"Face Wash - Oily Skin","category":"Beauty & Hygiene","sub_category":"Men's Grooming","brand":"USTRAA","sale_price":194.0,"market_price":199.0,"type":"Face & Body","rating":3.0,"description":"This face wash with basil and lime extracts gives a younger, fresher and oil-free appearance. This face wash checks acne and controls oil on the fa

In [93]:

class NeuralSearcher:

    def __init__(self, collection_name: str):
        self.client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), 'vector_stores'))
        
        self.embeddings = DeepInfraEmbeddings(
                        api_key="7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk",
                        base_url="https://api.deepinfra.com/v1/openai"
                    )
        self.vector_store = Chroma(
                        collection_name=COLLECTION_NAME,
                        embedding_function=self.embeddings,  # Pass the DeepInfraEmbeddings instance
                        client=self.client,
                        persist_directory = os.path.join(os.getcwd(), 'vector_stores')
                    )
        
        self.llm = ChatOpenAI(
            model='meta-llama/Meta-Llama-3.1-70B-Instruct',
            api_key="7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk",
            base_url="https://api.deepinfra.com/v1/openai",
            max_tokens = 70000
        )
        
        self.memory = ConversationBufferMemory(
            llm=self.llm,
            memory_key="chat_history",
            return_messages=True,
            input_key="question",
            output_key='answer'
        )
        
        prompt_template = '''
        About: You are a Product Recommendation Agent who gets his context from the retrieved descriptions of the products that matches best with the User's query. 
        User is a human who, as a customer, wants to buy a product from this application.

        Given below is the summary of conversation between you (AI) and the user (Human):
        Context: {chat_history}

        Now use this summary of previous conversations and the retrieved descriptions of products to answer the following question asked by the user:
        Question: {question}

        Note: 
        - Give your answer in a compreshenive manner in enumerated format.
        - Do not generate any information on your own, striclty stick to the provided data. 
        - Also, do not repeat the information that is already present in the context.
        - If, you feel there is redundant information (or) an product is being described twice, specify that as well in the response.
        - The tone of the answer should be like a polite and friendly AI Assistant.
        '''
        self.PROMPT = PromptTemplate(
            template=prompt_template, input_variables=["chat_history", "question"]
        )

    def search(self, question: str, num_results: int, filter_: dict = None) -> dict:
        chain = RetrievalQAWithSourcesChain.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.vector_store.as_retriever(search_kwargs={'k':num_results}),
            memory=self.memory,
            return_source_documents=True,
        )

        print(self.memory.load_memory_variables({})['chat_history'])
        gen_prompt = self.PROMPT.format(question=question, 
                                        chat_history=self.memory.load_memory_variables({})['chat_history'])
        start_time = time.time()
        res = chain(gen_prompt)
        print(f"Search took {time.time() - start_time} seconds")

        ret = {}
        ret['answer'] = res['answer']

        srcs = [json.loads(row.page_content) for row in res['source_documents']]

        df = pd.DataFrame(srcs)
        df = df.fillna('null')
        # df.set_index('product', inplace=True)

        df1 = df[['product','brand', 'sale_price', 'rating', 'description']]

        # Remove duplicates
        df1 = df1.drop_duplicates()

        ret['products'] = df1.to_dict(orient='records')
        return ret
    
    def check_memory_history(self):
        return self.memory.load_memory_variables({})

In [94]:
neural_searcher = NeuralSearcher(collection_name=COLLECTION_NAME)

In [95]:
q = "Suggest me some top 5 hair products?"
num_results = 5
res = neural_searcher.search(question=q, num_results=num_results)

[]
Search took 16.609533071517944 seconds


In [96]:
print(res['answer'])

Here are the top 5 hair products that I would like to suggest to you:

1. **Professional Brush** by Salon - This is a highly rated (5.0) tool that can help make hair application effortless and optimize the performance of your hair products.
2. **Professional Brush - Roller** by Salon - Similar to the previous product, this one is also a high-quality brush (4.0 rating) that can help with hair styling.
3. **Hair Roller - Medium 20 mm** by Daiou - This is a hair styling tool with a 4.0 rating that can help you achieve the desired look.
4. **Hair Gel Super Hold** by Vi-john - This is a hair gel that can help set your hair in the style you want, and also makes your hair smell good for hours.

Note that I have not included the **Balm - Ultra Power** product in the list as it seems to be more related to health and medicine rather than hair care.

Also, I would like to mention that the **Professional Brush** and **Professional Brush - Roller** products seem to have similar descriptions, so you

In [86]:
res

{'answer': 'Here are the top 5 hair products I would like to suggest to you:\n\n1. **Professional Brush** by Salon - This is a 5-star rated product that will make your hair care routine effortless and optimize the performance of your hair care products. (',
 'products': [{'product': 'Professional Brush',
   'brand': 'Salon',
   'sale_price': 500.0,
   'rating': 5.0,
   'description': 'The bestÃƒâ€šÂ\xa0brushesÃƒâ€šÂ\xa0will render application effortless and optimise the performance of your makeup products to their full potential  For Beauty tips, tricks & more visitÂ\xa0https://bigbasket.blog/'},
  {'product': 'Professional Brush - Roller',
   'brand': 'Salon',
   'sale_price': 500.0,
   'rating': 4.0,
   'description': 'The bestÃƒ€š\xa0brushesÃƒ€š\xa0will render application effortless and optimise the performance of your makeup products to their full potential  For Beauty tips, tricks & more visit\xa0https://bigbasket.blog/'},
  {'product': 'Hair Roller - Medium 20 mm',
   'brand': 'D

In [91]:
mem = neural_searcher.check_memory_history()

In [92]:
print(mem["chat_history"])

[HumanMessage(content="\n        About: You are a Product Recommendation Agent who gets his context from the retrieved descriptions of the products that matches best with the User's query. \n        User is a human who, as a customer, wants to buy a product from this application.\n\n        Given below is the summary of conversation between you (AI) and the user (Human):\n        Context: []\n\n        Now use this summary of previous conversations and the retrieved descriptions of products to answer the following question asked by the user:\n        Question: Suggest me some top 5 hair products?\n\n        Note: \n        - Give your answer in a compreshenive manner in enumerated format.\n        - Do not generate any information on your own, striclty stick to the provided data. \n        - Also, do not repeat the information that is already present in the context.\n        - If, you feel there is redundant information (or) an product is being described twice, specify that as well in 

In [70]:
mem

{'chat_history': [SystemMessage(content='Here is the new summary:\n\nThe human is a customer looking to buy a product from the application, and the AI is a Product Recommendation Agent. The human asked the AI to suggest the top 5 hair products, and the AI provided a list of recommendations: a Professional Brush by Salon, a Hair Roller by Daiou, a Professional Brush - Roller by Salon, a Hair Gel Super Hold by Vi-john, and a Balm - Ultra Power by Zandu, noting that the first and third products seem similar but have slightly different descriptions.')]}

In [74]:
q = "Yes tell me the difference"
num_results = 5
res = neural_searcher.search(question=q, num_results=num_results)

Search took 22.008066654205322 seconds


In [75]:
print(res['answer'])

Here are the differences between the two products:

1. **Index**: The index of "Professional Brush" is 10410, while the index of "Professional Brush - Roller" is 23125.
2. **Product Name**: The product name of the first product is "Professional Brush", while the product name of the second product is "Professional Brush - Roller".
3. **Rating**: The rating of the first product is 5.0, while the rating of the second product is 4.0.
4. **Description**: Although the descriptions of both products seem similar, they are not identical. The description of the first product is: "The best brushes will render application effortless and optimise the performance of your makeup products to their full potential..." while the description of the second product is: "The best brushes will render application effortless and optimise the performance of your makeup products to their full potential...". Note that the descriptions seem more relevant to makeup products, not hair care products.

It's worth notin

In [73]:
res

{'answer': 'I\'m happy to help you with your question about the first product.\n\nBased on the provided descriptions, the first product is the "Professional Brush" by Salon. Here are some additional details about this product:\n\n1. **Category and Sub-Category**: This product belongs to the "Beauty & Hygiene" category and "Hair Care" sub-category.\n2. **Brand and Price**: The brand is Salon, and the sale price is ₹500.0 (which is the same as the market price).\n3. **Type and Rating**: This product is classified as "Tools & Accessories" and has a rating of 5.0.\n4. **Description**: Unfortunately, the description provided is not very detailed and seems to be more relevant to makeup products rather than hair care. It mentions that the best brushes will render application effortless and optimize the performance of makeup products.\n\nPlease note that there is another product called "Professional Brush - Roller" by Salon, which seems similar to this product but has a slightly different desc