In [1]:
import chromadb
import os
import subprocess
from dotenv import load_dotenv
load_dotenv()

os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_API_KEY'] = os.getenv("LANGCHAIN_API_KEY")
os.environ['TAVILY_API_KEY'] = os.getenv("TAVILY_API_KEY")
os.environ['GOOGLE_API_KEY'] = os.getenv("GEMINI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

import pandas as pd
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_ollama import OllamaEmbeddings
from langchain_groq import ChatGroq
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import StructuredQueryOutputParser, get_query_constructor_prompt
from langchain.retrievers.self_query.chroma import ChromaTranslator

embed_model = OllamaEmbeddings(model=os.getenv('OLLAMA_EMBED'))
llama = ChatGroq(model=os.getenv("LLAMA_70B_GROQ"))
gemini = ChatGoogleGenerativeAI(model=os.getenv("GEMINI_FLASH"))

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
sephora_data = pd.read_csv(r"C:\Users\abhis\Downloads\archive\sephora_website_dataset.csv")
sephora_data.fillna("", inplace=True)

sephora_data["combined_context"] = (
    "Brand : " + sephora_data["brand"] + "\n" +
    "Category" + sephora_data["category"] + "\n" + 
    "name" + sephora_data["name"] + "\n" +
    "details" + sephora_data["details"] + "\n" +
    "Ingridients" + sephora_data["ingredients"] + "\n" 
)

docs = list()

for i, row  in sephora_data.iterrows():

    metadata = {
        "brand": row['brand'],
        "category": row['category'],
        "price": row['price'],
        "rating": row['rating'],
        "number_of_reviews": row['number_of_reviews']
    }

    doc = Document(
        page_content= row['combined_context'],
        metadata= metadata
    )

    docs.append(doc)

docs = docs[:166]

In [3]:
embed_model = OllamaEmbeddings(model=os.getenv('OLLAMA_EMBED'))
llama = ChatGroq(model=os.getenv("LLAMA_70B_GROQ"))
gemini = ChatGoogleGenerativeAI(model=os.getenv("GEMINI_FLASH"))

In [6]:
vector_store = Chroma.from_documents(documents=docs, collection_name='sephora_store', persist_directory='chroma/sephora_store_db', embedding=embed_model,)

In [7]:
metadata_field_info = [
    AttributeInfo(
        name='brand',
        description="The brand of the product. Examples include 'sephora collection', 'Fenty Beauty' etc",
        type="string"
    ),
    AttributeInfo(
        name='category',
        description="T  he category of the product such as 'skincare', 'makeup', 'hair' etc",
        type="string"
    ),
    AttributeInfo(
        name='price',
        description="The price of the product in USD",
        type="float"
    ),
    AttributeInfo(
        name='rating',
        description="The average user rating for a product from a sacle of 1 to 5",
        type="float"
    ),
    AttributeInfo(
        name='number_of_reviews',
        description="The total number of reviews given to a product",
        type="integer"
    ),
]

document_content_description = "COmbined  textual description of the product. Including ingredients and product details"

In [8]:
prompt = get_query_constructor_prompt(document_content_description, metadata_field_info)
output_parser = StructuredQueryOutputParser.from_components()
query_constructor = prompt | llama | output_parser

# Checking that our input is rendered as final entry into FewShotPrompt Template
print(prompt.suffix)

<< Example 3. >>
Data Source:
```json
{{
    "content": "COmbined  textual description of the product. Including ingredients and product details",
    "attributes": {{
    "brand": {{
        "description": "The brand of the product. Examples include 'sephora collection', 'Fenty Beauty' etc",
        "type": "string"
    }},
    "category": {{
        "description": "T  he category of the product such as 'skincare', 'makeup', 'hair' etc",
        "type": "string"
    }},
    "price": {{
        "description": "The price of the product in USD",
        "type": "float"
    }},
    "rating": {{
        "description": "The average user rating for a product from a sacle of 1 to 5",
        "type": "float"
    }},
    "number_of_reviews": {{
        "description": "The total number of reviews given to a product",
        "type": "integer"
    }}
}}
}}
```

User Query:
{query}

Structured Request:



In [9]:
retriever = SelfQueryRetriever(
    query_constructor=query_constructor,
    vectorstore=vector_store,
    structured_query_translator=ChromaTranslator()
)

In [14]:
prompt_obj = prompt.invoke({"query": "show me products with rating higher that 4.5"})

llama.invoke(prompt_obj).content

'```json\n{\n    "query": "",\n    "filter": "gt(\\"rating\\", 4.5)"\n}\n```'

In [11]:
query_constructor.invoke("show me products with rating higher that 4.5")

StructuredQuery(query=' ', filter=Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=4.5), limit=None)

In [12]:
# Testing the Retriever
retriever.invoke("show me products with rating higher that 4.5")

[Document(metadata={'brand': 'Algenist', 'category': 'Value & Gift Sets', 'number_of_reviews': 2, 'price': 95.0, 'rating': 5.0}, page_content="Brand : Algenist\nCategoryValue & Gift Sets\nnameAlgenist Weekender Kit\ndetailsWhat it is:  A set that includes all travel-size must-haves for an effective anti-aging skincare regimen on the go.Skin Type: Normal- Dry- Combination- and Oily Skincare Concerns: Fine Lines and Wrinkles- Dryness- Dullness and Uneven Texture Highlighted Ingredients:- Alguronic Acid: Helps to support the skin for a more youthful appearance.- Plant-Derived Collagen: Gives skin a firmer- more lifted appearance. - Algae Prebiotic: Helps the balance of skin’s natural surface ecosystem.Ingredient Callouts: Free of parabens- formaldehydes- formaldehyde-releasing agents- phthalates- oxybenzone- coal tar- hydroquinone- triclosan- and triclocarban. These products are also vegan and come in recyclable packaging. What Else You Need to Know:  This kit is formulated with alguronic