# SQR Using Builtin functions

In [1]:
import pandas as pd

data = [
    {"page_content": "A bunch of scientists bring back dinosaurs and mayhem breaks loose", "year": 1993, "rating": 7.7, "genre": "science fiction", "director": "Steven Spielberg"},
    {"page_content": "Leo DiCaprio gets lost in a dream within a dream within a dream within a ...", "year": 2010, "rating": 8.2, "genre": "science fiction", "director": "Christopher Nolan"},
    {"page_content": "A psychologist / detective gets lost in a series of dreams within dreams within dreams", "year": 2006, "rating": 8.6, "genre": "anime", "director": "Satoshi Kon"},
    {"page_content": "A bunch of normal-sized women are supremely wholesome and some men pine after them", "year": 2019, "rating": 8.3, "genre": "drama", "director": "Greta Gerwig"},
    {"page_content": "Toys come alive and have a blast doing so", "year": 1995, "rating": 8.3, "genre": "animated", "director": "John Lasseter"},
    {"page_content": "Three men walk into the Zone, three men walk out of the Zone", "year": 1979, "rating": 9.9, "genre": "thriller", "director": "Andrei Tarkovsky"},
    {"page_content": "A superhero team-up to save the world", "year": 2012, "rating": 8.0, "genre": "action", "director": "Joss Whedon"},
    {"page_content": "A young lion prince flees his kingdom only to learn the true meaning of responsibility and bravery", "year": 1994, "rating": 8.5, "genre": "animated", "director": "Roger Allers"},
    {"page_content": "A space crew travels through a wormhole in search of a new habitable planet", "year": 2014, "rating": 8.6, "genre": "science fiction", "director": "Christopher Nolan"},
    {"page_content": "A man with short-term memory loss uses notes and tattoos to hunt for the man he thinks killed his wife", "year": 2000, "rating": 8.4, "genre": "thriller", "director": "Christopher Nolan"},
    {"page_content": "A group of intergalactic criminals must pull together to stop a fanatical warrior with plans to purge the universe", "year": 2014, "rating": 8.0, "genre": "science fiction", "director": "James Gunn"},
    {"page_content": "A computer hacker learns from mysterious rebels about the true nature of his reality and his role in the war against its controllers", "year": 1999, "rating": 8.7, "genre": "science fiction", "director": "Lana Wachowski"},
    {"page_content": "A group of friends venture deep into the streets of New York on a rescue mission during a rampaging monster attack", "year": 2008, "rating": 7.0, "genre": "horror", "director": "Matt Reeves"},
    {"page_content": "A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into the mind of a CEO", "year": 2010, "rating": 8.8, "genre": "science fiction", "director": "Christopher Nolan"},
    {"page_content": "A woman living in New York takes care of a girl abandoned by her parents and tries to find her biological mother", "year": 2006, "rating": 7.1, "genre": "drama", "director": "Satoshi Kon"},
    {"page_content": "A young programmer is selected to participate in a ground-breaking experiment in synthetic intelligence by evaluating the human qualities of a highly advanced humanoid A.I.", "year": 2014, "rating": 7.7, "genre": "science fiction", "director": "Alex Garland"},
    {"page_content": "A cyborg policewoman must track down a mysterious hacker known as the Puppet Master", "year": 1995, "rating": 8.0, "genre": "anime", "director": "Mamoru Oshii"},
    {"page_content": "A young African prince battles the forces of evil using his new-found superpowers", "year": 2018, "rating": 7.3, "genre": "superhero", "director": "Ryan Coogler"},
    {"page_content": "A billionaire industrialist and genius inventor becomes a superhero and fights against evil", "year": 2008, "rating": 7.9, "genre": "superhero", "director": "Jon Favreau"},
    {"page_content": "An ordinary man becomes a vigilante and fights crime in a city overrun by criminals and corruption", "year": 2005, "rating": 8.2, "genre": "action", "director": "Christopher Nolan"},
]

df = pd.DataFrame(data)

df.to_excel("movies_data.xlsx", index=False)

print("Excel file 'movies_data.xlsx' created successfully!")


Excel file 'movies_data.xlsx' created successfully!


In [2]:
import pandas as pd
from langchain_core.documents import Document
from langchain_chroma import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

In [3]:
df = pd.read_excel("movies_data.xlsx")


In [4]:
docs = [
    Document(
        page_content=row['page_content'],
        metadata={
            "year": row.get("year"),
            "rating": row.get("rating"),
            "genre": row.get("genre"),
            "director": row.get("director")
        }
    )
    for index, row in df.iterrows()
]

In [5]:
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange
Found Intel OpenMP ('libiomp') and LLVM OpenMP ('libomp') loaded at
the same time. Both libraries are known to be incompatible and this
can cause random crashes or deadlocks on Linux when loaded in the
same Python program.
Using threadpoolctl may cause crashes or deadlocks. For more
information and possible workarounds, please see
    https://github.com/joblib/threadpoolctl/blob/master/multiple_openmp.md



In [6]:
db2 = Chroma.from_documents(docs, embedding=embedding, persist_directory="./chroma_db/SQR")

In [7]:
db3 = Chroma(persist_directory="./chroma_db/SQR", embedding_function=embedding)

In [8]:
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever

In [9]:
metadata_field_info = [
    AttributeInfo(
        name="genre",
        description="The genre of the movie. One of ['science fiction', 'comedy', 'drama', 'thriller', 'romance', 'action', 'animated']",
        type="string",
    ),
    AttributeInfo(
        name="year",
        description="The year the movie was released",
        type="integer",
    ),
    AttributeInfo(
        name="director",
        description="The name of the movie director",
        type="string",
    ),
    AttributeInfo(
        name="rating",
        description="A 1-10 rating for the movie",
        type="float"
    ),
]


In [10]:
document_content_description = "Brief summary of a movie"

In [11]:
from langchain_community.llms import Ollama

In [12]:
llm = Ollama(model="llama3")

In [13]:
retriever = SelfQueryRetriever.from_llm(
    llm,
    db3,
    document_content_description,
    metadata_field_info,
)

In [17]:
retriever.invoke(
    "Can you find a movie directed by Christopher Nolan that has a rating of at least 7?"
)

[Document(metadata={'director': 'Christopher Nolan', 'genre': 'science fiction', 'rating': 8.2, 'year': 2010}, page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...'),
 Document(metadata={'director': 'Christopher Nolan', 'genre': 'action', 'rating': 8.2, 'year': 2005}, page_content='An ordinary man becomes a vigilante and fights crime in a city overrun by criminals and corruption'),
 Document(metadata={'director': 'Christopher Nolan', 'genre': 'science fiction', 'rating': 8.8, 'year': 2010}, page_content='A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into the mind of a CEO'),
 Document(metadata={'director': 'Christopher Nolan', 'genre': 'science fiction', 'rating': 8.6, 'year': 2014}, page_content='A space crew travels through a wormhole in search of a new habitable planet')]

In [18]:
retriever.invoke(
    "an animated movie released between 1995 and 2005 that involves toys?"
)

[Document(metadata={'director': 'John Lasseter', 'genre': 'animated', 'rating': 8.3, 'year': 1995}, page_content='Toys come alive and have a blast doing so')]

# SQR From Scratch

In [19]:
from langchain.chains.query_constructor.base import (
    StructuredQueryOutputParser,
    get_query_constructor_prompt,
)


In [20]:
prompt = get_query_constructor_prompt(
    document_content_description,
    metadata_field_info,
)

In [21]:
output_parser = StructuredQueryOutputParser.from_components()


In [22]:
query_constructor = prompt | llm | output_parser

In [23]:
from langchain.retrievers.self_query.chroma import ChromaTranslator

In [25]:
retriever = SelfQueryRetriever(
    query_constructor=query_constructor,
    vectorstore=db3,
    structured_query_translator=ChromaTranslator(),
)

In [26]:
retriever.invoke(
    "What's a movie after 1990 but before 2005 that's all about toys, and preferably is animated"
)

[Document(metadata={'director': 'John Lasseter', 'genre': 'animated', 'rating': 8.3, 'year': 1995}, page_content='Toys come alive and have a blast doing so'),
 Document(metadata={'director': 'Roger Allers', 'genre': 'animated', 'rating': 8.5, 'year': 1994}, page_content='A young lion prince flees his kingdom only to learn the true meaning of responsibility and bravery')]