<a href="https://colab.research.google.com/github/mahesh-from-sirsi/All_My_AI_Work/blob/main/BuildFastWithAI_2_5_Self_Query_Retriever.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Self Query Retriever

In [3]:
%pip install -qU openai langchain-community langchain langchain-openai chromadb langchain_chroma

In [4]:
import os

os.environ["OPENAI_API_KEY"] = ""

In [5]:
from langchain_openai import OpenAIEmbeddings
from langchain.schema import Document
from langchain_community.vectorstores import Chroma

embeddings = OpenAIEmbeddings()

Creating a Self Query Retriever

In [6]:
from langchain_chroma import Chroma
from langchain_core.documents import Document

docs = [
    Document(
        page_content="A poor but big-hearted man takes orphans into his home. After discovering his scientist father's invisibility device, he rises to the occasion and fights to save his children and all of India from the clutches of a greedy gangster",
        metadata={"year": 2006, "director": "Rakesh Roshan", "rating": 7.1, "genre": "science fiction"},
    ),
    Document(
        page_content="The story of six young Indians who assist an English woman to film a documentary on the freedom fighters from their past, and the events that lead them to relive the long-forgotten saga of freedom",
        metadata={"year": 2006, "director": "Rakeysh Omprakash Mehra", "rating": 9.1, "genre": "drama"},
    ),
    Document(
        page_content="A depressed wealthy businessman finds his life changing after he meets a spunky and care-free young woman",
        metadata={"year": 2007, "director": "Anurag Basu", "rating": 6.8, "genre": "romance"},
    ),
    Document(
        page_content="A schoolteacher's world turns upside down when he realizes that his former student, who is now a world-famous artist, may have plagiarized his work",
        metadata={"year": 2023, "director": "R. Balki", "rating": 7.8, "genre": "drama"},
    ),
    Document(
        page_content="A man returns to his country in order to marry his childhood sweetheart and proceeds to create misunderstanding between the families",
        metadata={"year": 1995, "director": "Aditya Chopra", "rating": 8.1, "genre": "romance"},
    ),
    Document(
        page_content="The story of an Indian army officer guarding a picket alone in the Kargil conflict between India and Pakistan",
        metadata={"year": 2003, "director": "J.P. Dutta", "rating": 7.9, "genre": "war"},
    ),
    Document(
        page_content="Three young men from different parts of India arrive in Mumbai, seeking fame and fortune",
        metadata={"year": 1975, "director": "Ramesh Sippy", "rating": 8.2, "genre": "action"},
    ),
    Document(
        page_content="A simple man from a village falls in love with his new neighbor. He enlists the help of his musical-theater friends to woo the lovely girl-next-door away from her music teacher",
        metadata={"year": 1990, "director": "Sooraj Barjatya", "rating": 7.7, "genre": "musical"},
    ),
    Document(
        page_content="A young mute girl from Pakistan loses herself in India with no way to head back. A devoted man undertakes the task to get her back to her homeland and unite her with her family",
        metadata={"year": 2015, "director": "Kabir Khan", "rating": 8.0, "genre": "drama"},
    ),
    Document(
        page_content="Three idiots embark on a quest for a lost buddy. This journey takes them on a hilarious and meaningful adventure through memory lane and gives them a chance to relive their college days",
        metadata={"year": 2009, "director": "Rajkumar Hirani", "rating": 9.4, "genre": "comedy"},
    ),
]

vectorstore = Chroma.from_documents(docs, OpenAIEmbeddings())

In [7]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import ChatOpenAI

metadata_field_info = [
    AttributeInfo(
        name="genre",
        description="The genre of the movie.",
        type="string",
    ),
    AttributeInfo(
        name="year",
        description="The year the movie was released",
        type="integer",
    ),
    AttributeInfo(
        name="director",
        description="The name of the movie director",
        type="string",
    ),
    AttributeInfo(
        name="rating", description="A 1-10 rating for the movie", type="float"
    ),
]
document_content_description = "Brief summary of a movie"
llm = ChatOpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
)

In [8]:
# This example only specifies a filter
retriever.invoke("I want to watch a movie rated higher than 8")

[Document(id='31c9f961-ab3a-418d-82f6-63f05285c52c', metadata={'year': 2009, 'director': 'Rajkumar Hirani', 'rating': 9.4, 'genre': 'comedy'}, page_content='Three idiots embark on a quest for a lost buddy. This journey takes them on a hilarious and meaningful adventure through memory lane and gives them a chance to relive their college days'),
 Document(id='d5b264b0-d5bb-4ef0-9e81-c73fb0a9e6eb', metadata={'year': 1995, 'genre': 'romance', 'rating': 8.1, 'director': 'Aditya Chopra'}, page_content='A man returns to his country in order to marry his childhood sweetheart and proceeds to create misunderstanding between the families'),
 Document(id='28ba3f21-f0d2-4b96-a2c3-994d2d0d74ac', metadata={'genre': 'drama', 'director': 'Rakeysh Omprakash Mehra', 'rating': 9.1, 'year': 2006}, page_content='The story of six young Indians who assist an English woman to film a documentary on the freedom fighters from their past, and the events that lead them to relive the long-forgotten saga of freedom')

In [10]:
retriever.invoke(" I want to watch a movie by Rajkumar Hirani which is about college life")

[Document(id='31c9f961-ab3a-418d-82f6-63f05285c52c', metadata={'director': 'Rajkumar Hirani', 'genre': 'comedy', 'year': 2009, 'rating': 9.4}, page_content='Three idiots embark on a quest for a lost buddy. This journey takes them on a hilarious and meaningful adventure through memory lane and gives them a chance to relive their college days')]

In [11]:
from langchain.chains.query_constructor.base import (
    StructuredQueryOutputParser,
    get_query_constructor_prompt,
)

prompt = get_query_constructor_prompt(
    document_content_description,
    metadata_field_info,
)
output_parser = StructuredQueryOutputParser.from_components()
query_constructor = prompt | llm | output_parser

In [12]:
query_constructor.invoke(
    {
        "query": "I want to watch a movie rated higher than 9.0"
    }
)

StructuredQuery(query=' ', filter=Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=9.0), limit=None)

In [13]:
query_constructor.invoke(
    {
        "query": "I want to watch a movie rated higher than 9.0 which has suspense plot"
    }
)

StructuredQuery(query='suspense plot', filter=Comparison(comparator=<Comparator.GT: 'gt'>, attribute='rating', value=9.0), limit=None)

In [14]:
query_constructor.invoke(
    {
        "query": "I want to watch a movie by Rajkumar Hirani which is about college life"
    }
)

StructuredQuery(query='college life', filter=Comparison(comparator=<Comparator.EQ: 'eq'>, attribute='director', value='Rajkumar Hirani'), limit=None)