In [78]:
import sys
sys.path.append("..")
import os.path
import pandas as pd
import time
from tqdm import tqdm
import chromadb
from openai import OpenAI
import json

from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Qdrant
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationSummaryMemory, ConversationBufferMemory

In [9]:
class DeepInfraEmbeddings:
    def __init__(self, api_key, base_url, model="BAAI/bge-base-en-v1.5"):
        self.client = OpenAI(api_key=api_key, base_url=base_url)
        self.model = model

    def embed_documents(self, texts):
        if isinstance(texts, str):
            texts = [texts]

        embeddings = self.client.embeddings.create(
            model=self.model,
            input=texts,
            encoding_format="float"
        )

        return [embedding.embedding for embedding in embeddings.data]

    def embed_query(self, text):
        return self.embed_documents([text])[0]

In [10]:
COLLECTION_NAME = "big-basket-products-all"

# Create Chroma client
# client = chromadb.Client()
client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), 'vector_stores'))

# Load data
file_path = os.path.join('./data/bigBasketProducts.csv')
df = pd.read_csv(file_path)
# df = df[:1000]
metadatas = [{'source': int(df.loc[i][0]), 'row': i} for i in range(len(df))]
docs = df.apply(lambda x: x.to_json(), axis=1).tolist()

# Initialize DeepInfraEmbeddings with your API key and base URL
embeddings = DeepInfraEmbeddings(
    api_key="7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk",
    base_url="https://api.deepinfra.com/v1/openai"
)

# Create Chroma collection
vector_store = Chroma(
    collection_name=COLLECTION_NAME,
    embedding_function=embeddings,  # Pass the DeepInfraEmbeddings instance
    client=client,
    persist_directory = os.path.join(os.getcwd(), 'vector_stores')
)

In [11]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

In [12]:
docs = retriever.get_relevant_documents("what is skin care?")

In [13]:
for doc in docs:
    for k, v in doc:
        print(k, v)

id None
metadata {'row': 20544, 'source': 20545}
page_content {"index":20545,"product":"Vitamin E Face Wash","category":"Beauty & Hygiene","sub_category":"Skin Care","brand":"INATUR ","sale_price":315.0,"market_price":450.0,"type":"Face Care","rating":null,"description":"Inatur Vitamin E Face Cleanser is a mild and creamy formulation that removes dirt, impurities, and make-up gently. Being rich in anti-oxidants, it is effective in preserving the moisture balance of the skin. It leaves the skin nourished and hydrated making it look, soft, clean & healthy."}
type Document
id None
metadata {'row': 8225, 'source': 8226}
page_content {"index":8226,"product":"Face Wash - Oily Skin","category":"Beauty & Hygiene","sub_category":"Men's Grooming","brand":"USTRAA","sale_price":194.0,"market_price":199.0,"type":"Face & Body","rating":3.0,"description":"This face wash with basil and lime extracts gives a younger, fresher and oil-free appearance. This face wash checks acne and controls oil on the fa

In [112]:

class NeuralSearcher:

    def __init__(self, collection_name: str):
        self.client = chromadb.PersistentClient(path=os.path.join(os.getcwd(), 'vector_stores'))
        
        self.embeddings = DeepInfraEmbeddings(
                        api_key="7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk",
                        base_url="https://api.deepinfra.com/v1/openai"
                    )
        self.vector_store = Chroma(
                        collection_name=COLLECTION_NAME,
                        embedding_function=self.embeddings,  # Pass the DeepInfraEmbeddings instance
                        client=self.client,
                        persist_directory = os.path.join(os.getcwd(), 'vector_stores')
                    )
        
        self.llm = ChatOpenAI(
            model='meta-llama/Meta-Llama-3.1-70B-Instruct',
            api_key="7E4hdDQrPP9mLi52rX4zCkJ2rFKIadOk",
            base_url="https://api.deepinfra.com/v1/openai",
            max_tokens = 70000
        )
        
        self.memory = ConversationSummaryMemory(
            llm=self.llm,
            memory_key="chat_history",
            return_messages=True,
            input_key="question",
            output_key='answer'
        )
        
        prompt_template = '''
        About: You are a Product Recommendation Agent who gets his context from the retrieved descriptions of the products that matches best with the User's query. 
        User is a human who, as a customer, wants to buy a product from this application.

        Given below is the summary of conversation between you (AI) and the user (Human):
        Context: {chat_history}

        Now use this summary of previous conversations and the retrieved descriptions of products to answer the following question asked by the user:
        Question: {question}

        Note: 
        - Give your answer in a compreshenive manner in enumerated format.
        - Do not generate any information on your own, striclty stick to the provided data. 
        - Also, do not repeat the information that is already present in the context.
        - If, you feel there is redundant information (or) an product is being described twice, specify that as well in the response.
        - The tone of the answer should be like a polite and friendly AI Assistant.
        '''
        self.PROMPT = PromptTemplate(
            template=prompt_template, input_variables=["chat_history", "question"]
        )

    def search(self, question: str, num_results: int, filter_: dict = None) -> dict:
        chain = RetrievalQAWithSourcesChain.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=self.vector_store.as_retriever(search_kwargs={'k':num_results}),
            memory=self.memory,
            return_source_documents=True,
        )

        gen_prompt = self.PROMPT.format(question=question, chat_history=self.memory.load_memory_variables({})['chat_history'][0].content)
        start_time = time.time()
        res = chain(gen_prompt)
        print(f"Search took {time.time() - start_time} seconds")

        ret = {}
        ret['answer'] = res['answer']

        srcs = [json.loads(row.page_content) for row in res['source_documents']]

        df = pd.DataFrame(srcs)
        df = df.fillna('null')
        # df.set_index('product', inplace=True)

        df1 = df[['product','brand', 'sale_price', 'rating', 'description']]

        # Remove duplicates
        df1 = df1.drop_duplicates()

        ret['products'] = df1.to_dict(orient='records')
        return ret
    
    def check_memory_history(self):
        return self.memory.load_memory_variables({})

In [113]:
neural_searcher = NeuralSearcher(collection_name=COLLECTION_NAME)

In [114]:
q = "Suggest me some top 5 hair products?"
num_results = 5
res = neural_searcher.search(question=q, num_results=num_results)

Search took 19.25849747657776 seconds


In [115]:
print(res['answer'])

Here are the top 5 hair product suggestions based on the retrieved descriptions:

1. **Professional Brush** by Salon - A high-quality brush that makes application effortless and optimizes the performance of your makeup products. (Rating: 5.0)
2. **Hair Roller - Medium 20 mm** by Daiou - A hair roller for hair stylers, perfect for achieving the desired hairstyle. (Rating: 4.0)
3. **Professional Brush - Roller** by Salon - Another variant of the professional brush, designed for effortless application and optimal performance. (Rating: 4.0)
4. **Hair Gel Super Hold** by Vi-john - A hair gel that sets your hair in the desired look and keeps it smelling good for hours. (Rating: null)
5. **Balm - Ultra Power** by Zandu - An Ayurvedic proprietary medicine for external use only, which may have benefits for hair care. (Rating: 4.4)

Note: The **Professional Brush** and **Professional Brush - Roller** seem to be similar products, with the only difference being the addition of "Roller" in the latt

In [116]:
res

{'answer': 'Here are the top 5 hair product suggestions based on the retrieved descriptions:\n\n1. **Professional Brush** by Salon - A high-quality brush that makes application effortless and optimizes the performance of your makeup products. (Rating: 5.0)\n2. **Hair Roller - Medium 20 mm** by Daiou - A hair roller for hair stylers, perfect for achieving the desired hairstyle. (Rating: 4.0)\n3. **Professional Brush - Roller** by Salon - Another variant of the professional brush, designed for effortless application and optimal performance. (Rating: 4.0)\n4. **Hair Gel Super Hold** by Vi-john - A hair gel that sets your hair in the desired look and keeps it smelling good for hours. (Rating: null)\n5. **Balm - Ultra Power** by Zandu - An Ayurvedic proprietary medicine for external use only, which may have benefits for hair care. (Rating: 4.4)\n\nNote: The **Professional Brush** and **Professional Brush - Roller** seem to be similar products, with the only difference being the addition of 

In [126]:
mem = neural_searcher.check_memory_history()

In [127]:
print(mem["chat_history"])

[SystemMessage(content='Here is the new summary:\n\nThe human asks the AI for top 5 hair product suggestions. The AI provides a list of 5 hair products based on the retrieved descriptions, including a professional brush, hair roller, hair gel, and an Ayurvedic balm. The AI notes that two of the products, the Professional Brush and Professional Brush - Roller, seem to be similar, with the only difference being the addition of "Roller" in the latter. The human asks for more information about the first product, which is the Professional Brush. The AI provides additional details about the Professional Brush, including its category, sub-category, brand, price, type, rating, and description, and reiterates that it seems similar to the Professional Brush - Roller product. The human then asks if there are any other similar products, and the AI responds by mentioning the Professional Brush - Roller as a similar product, noting that it has almost identical specifications and description as the P

In [128]:
mem

{'chat_history': [SystemMessage(content='Here is the new summary:\n\nThe human asks the AI for top 5 hair product suggestions. The AI provides a list of 5 hair products based on the retrieved descriptions, including a professional brush, hair roller, hair gel, and an Ayurvedic balm. The AI notes that two of the products, the Professional Brush and Professional Brush - Roller, seem to be similar, with the only difference being the addition of "Roller" in the latter. The human asks for more information about the first product, which is the Professional Brush. The AI provides additional details about the Professional Brush, including its category, sub-category, brand, price, type, rating, and description, and reiterates that it seems similar to the Professional Brush - Roller product. The human then asks if there are any other similar products, and the AI responds by mentioning the Professional Brush - Roller as a similar product, noting that it has almost identical specifications and des

In [120]:
q = "Tell me more about the first product"
num_results = 5
res = neural_searcher.search(question=q, num_results=num_results)

Search took 26.596819162368774 seconds


In [121]:
print(res['answer'])

I'm happy to help you with your question about the first product!

Based on our previous conversation, the first product I mentioned was the "Professional Brush". Here are some additional details about this product:

1. **Product Name**: Professional Brush
2. **Category**: Beauty & Hygiene
3. **Sub-Category**: Hair Care
4. **Brand**: Salon
5. **Sale Price**: ₹500.0
6. **Market Price**: ₹500.0
7. **Type**: Tools & Accessories
8. **Rating**: 5.0
9. **Description**: The best brushes will render application effortless and optimise the performance of your makeup products to their full potential.

Please note that this product seems to be similar to the "Professional Brush - Roller" product, with the only difference being the addition of "Roller" in the latter. If you'd like to know more about the differences between these two products, I'd be happy to help!




In [122]:
res

{'answer': 'I\'m happy to help you with your question about the first product!\n\nBased on our previous conversation, the first product I mentioned was the "Professional Brush". Here are some additional details about this product:\n\n1. **Product Name**: Professional Brush\n2. **Category**: Beauty & Hygiene\n3. **Sub-Category**: Hair Care\n4. **Brand**: Salon\n5. **Sale Price**: ₹500.0\n6. **Market Price**: ₹500.0\n7. **Type**: Tools & Accessories\n8. **Rating**: 5.0\n9. **Description**: The best brushes will render application effortless and optimise the performance of your makeup products to their full potential.\n\nPlease note that this product seems to be similar to the "Professional Brush - Roller" product, with the only difference being the addition of "Roller" in the latter. If you\'d like to know more about the differences between these two products, I\'d be happy to help!\n\n',
 'products': [{'product': 'Professional Brush - Roller',
   'brand': 'Salon',
   'sale_price': 500.0

In [123]:
q = "Are there any other similar products?"
num_results = 5
res = neural_searcher.search(question=q, num_results=num_results)

Search took 21.11023235321045 seconds


In [124]:
print(res['answer'])

Based on the provided data, here are some similar products to the Professional Brush:

1. **Professional Brush - Roller**: This product seems to be very similar to the Professional Brush, with the only difference being the addition of "Roller" in the name. The description and specifications are almost identical. (


In [125]:
res

{'answer': 'Based on the provided data, here are some similar products to the Professional Brush:\n\n1. **Professional Brush - Roller**: This product seems to be very similar to the Professional Brush, with the only difference being the addition of "Roller" in the name. The description and specifications are almost identical. (',
 'products': [{'product': 'Professional Brush - Roller',
   'brand': 'Salon',
   'sale_price': 500.0,
   'rating': 4.0,
   'description': 'The bestÃƒ€š\xa0brushesÃƒ€š\xa0will render application effortless and optimise the performance of your makeup products to their full potential  For Beauty tips, tricks & more visit\xa0https://bigbasket.blog/'},
  {'product': 'Professional Brush',
   'brand': 'Salon',
   'sale_price': 500.0,
   'rating': 5.0,
   'description': 'The bestÃƒâ€šÂ\xa0brushesÃƒâ€šÂ\xa0will render application effortless and optimise the performance of your makeup products to their full potential  For Beauty tips, tricks & more visitÂ\xa0https://big