#6.6 RetrievalQA

In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langsmith import traceable
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.embeddings import CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA


In [3]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

cache_dir = LocalFileStore("./.cache/")

splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=600,
    chunk_overlap=100,
)

loader = TextLoader("./files/chapter_one.txt")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

@traceable
def generate_response(chain, prompt):
    return chain.invoke(prompt)


In [4]:
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(),
)

chain.invoke("Describe Victory Mansions")


{'query': 'Describe Victory Mansions',
 'result': 'Victory Mansions is a building in London, Airstrip One.  The hallway smells of boiled cabbage and old rag mats.  It has a lift that is seldom working, and the electric current is cut off during daylight hours at the time of the story.  The building is seven flights high, and posters depicting Big Brother are located on each landing.  From the roof, you can see the four Ministries simultaneously.\n'}

In [5]:
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="refine",
    retriever=vectorstore.as_retriever(),
)

generate_response(chain, "Describe Victory Mansions")

{'query': 'Describe Victory Mansions',
 'result': "The provided text focuses on the Two Minutes Hate, a ritualistic event, not a description of Victory Mansions itself.  Therefore, the original answer remains largely accurate.  While the excerpt doesn't describe the physical structure of Victory Mansions, it reinforces the understanding that its inhabitants are subject to the Party's oppressive control, as evidenced by their participation in the orchestrated emotional outburst of the Two Minutes Hate.  The squalor implied by Winston's living conditions in Victory Mansions is further contextualized by the contrast between the visceral hatred directed at the screen and the unquestioning devotion to Big Brother displayed immediately afterward. The building, therefore, serves as a backdrop to the psychological manipulation and control exerted by the Party over its citizens, rather than being described in detail itself.\n"}

In [6]:
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_reduce",
    retriever=vectorstore.as_retriever(),
)

generate_response(chain, "Describe Victory Mansions")


{'query': 'Describe Victory Mansions',
 'result': "Based on the provided text, Victory Mansions is a building with a hallway that smells of boiled cabbage and old rag mats, a large poster of a man's face on the wall, a seldom-working lift, and electricity cut off during daylight hours.  The flats are seven flights up, and from the roof, all four Ministries are visible.\n"}

In [7]:
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="map_rerank",
    retriever=vectorstore.as_retriever(),
)

generate_response(chain, "Describe Victory Mansions")




{'query': 'Describe Victory Mansions',
 'result': 'Helpful Answer: Victory Mansions is a building with glass doors, hallways that smell of boiled cabbage and old rag mats, and a lift that is seldom working (currently off due to an economy drive).  The building has stairs leading to seven flights of flats.  Posters depicting a large face of a man (Big Brother) are displayed on each landing.'}