# Retrieval Augmented Generation

## Configure the chat model

In [22]:
import os

user = os.getenv('LOGNAME')
print(f'Hello, {user}')

Hello, mbklein


In [34]:
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_openai import AzureChatOpenAI
from langchain_aws import ChatBedrock

In [58]:
gpt4 = AzureChatOpenAI(azure_deployment=os.getenv('AZURE_OPENAI_DEPLOYMENT'))
sonnet = ChatBedrock(model_id='anthropic.claude-3-sonnet-20240229-v1:0')

gpt4.name = 'GPT-4'
sonnet.name = 'Claude Sonnet v3'

## Initialize the Vectorstore

### Import the required classes and initialize the `PGVector` vectorstore.

In [59]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_core.documents import Document
from langchain_postgres import PGVector
from langchain_postgres.vectorstores import PGVector

connection = f'postgresql+psycopg://{user}:{user}@localhost:5432/{user}'
collection_name = "code4lib2024"
embeddings = HuggingFaceEmbeddings(model_name='nomic-ai/nomic-embed-text-v1.5', model_kwargs={'trust_remote_code':True})

vectorstore = PGVector(
    embeddings=embeddings,
    collection_name=collection_name,
    connection=connection,
    use_jsonb=True,  
)

print(f"Vectorstore collection name: {vectorstore.collection_name}")

<All keys matched successfully>


Vectorstore collection name: code4lib2024


## Index your data

In [60]:
docs = [
    Document(
        page_content="Interlibrary loan requests can be made online or at the service desk",
        metadata={"id": 1, "location": "library", "topic": "borrowing"},
    ),
    Document(
        page_content="Course reserves are available for checkout at the circulation desk",
        metadata={"id": 2, "location": "library", "topic": "borrowing"},
    ),
    Document(
        page_content="Study rooms can be reserved up to two weeks in advance",
        metadata={"id": 3, "location": "library", "topic": "reservations"},
    ),
    Document(
        page_content="Library workshops on database research are held monthly",
        metadata={"id": 4, "location": "library", "topic": "workshops"},
    ),
    Document(
        page_content="Access to digital archives is available through the library portal",
        metadata={"id": 5, "location": "library", "topic": "online resources"},
    ),
    Document(
        page_content="Renew your borrowed items online or at any library kiosk",
        metadata={"id": 6, "location": "library", "topic": "borrowing"},
    ),
    Document(
        page_content="Special collections can be accessed in the reading room",
        metadata={"id": 7, "location": "library", "topic": "borrowing"},
    ),
    Document(
        page_content="Library orientation tours are available for new users",
        metadata={"id": 8, "location": "library", "topic": "facilities"},
    ),
    Document(
        page_content="The library offers free Wi-Fi to all visitors",
        metadata={"id": 9, "location": "library", "topic": "facilities"},
    ),
    Document(
        page_content="Photocopying and printing services are available on the ground floor",
        metadata={"id": 10, "location": "library", "topic": "printing services"},
    ),
]

vectorstore.add_documents(docs, ids=[doc.metadata["id"] for doc in docs])

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

## Generation

In [67]:
llm = sonnet

In [74]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def format_response(llm, response):
    return f"""
According to {llm.name}:

{response['answer']}
    """

In [63]:
retriever = vectorstore.as_retriever(
    search_type="similarity_score_threshold", search_kwargs={"k": 8, "score_threshold": 0.4}
)

In [68]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
Cite the context provided in your response!

Context: {context}

Question: {question}

Helpful Answer:"""
prompt = PromptTemplate.from_template(template)

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

In [69]:
response = rag_chain_with_source.invoke("Does the library have an ILL program?")

In [75]:
print(format_response(llm, response))


According to Claude Sonnet v3:

Yes, the library has an interlibrary loan (ILL) program according to the context provided. This is evident from the following statement:

"Interlibrary loan requests can be made online or at the service desk"

However, it's worth noting that another piece of context mentions:

"Interlibrary loan has been temporarily suspended due to a party in the break room"

So while the library does have an ILL program, it seems to be temporarily unavailable or suspended at the moment.
    


In [31]:
updated_docs = [
  Document(
        page_content="Interlibrary loan has been temporarily suspended due to a party in the break room",
        metadata={"id": 11, "location": "library", "topic": "news"},
    )
]
vectorstore.add_documents(updated_docs, ids=[doc.metadata["id"] for doc in updated_docs])

[11]

In [76]:
response = rag_chain_with_source.invoke("Does the library have an ILL program?")

In [77]:
print(format_response(llm, response))


According to Claude Sonnet v3:

Based on the context provided, yes, the library has an interlibrary loan (ILL) program. This is evidenced by the following statements:

"Interlibrary loan requests can be made online or at the service desk"

"Interlibrary loan has been temporarily suspended due to a party in the break room"

These statements directly indicate that the library offers an interlibrary loan service, where patrons can request materials from other libraries. While the second statement suggests that the ILL program is currently suspended, it confirms the existence of such a program at the library.
    


In [33]:
for doc in response["context"]:
    print(f"Document ID: {doc.metadata['id']}")
    print(f"Content: {doc.page_content}")
    print("-" * 40)

Document ID: 8
Content: Library orientation tours are available for new users
----------------------------------------
Document ID: 1
Content: Interlibrary loan requests can be made online or at the service desk
----------------------------------------
Document ID: 7
Content: Special collections can be accessed in the reading room
----------------------------------------
Document ID: 5
Content: Access to digital archives is available through the library portal
----------------------------------------
Document ID: 9
Content: The library offers free Wi-Fi to all visitors
----------------------------------------
Document ID: 6
Content: Renew your borrowed items online or at any library kiosk
----------------------------------------
Document ID: 11
Content: Interlibrary loan has been temporarily suspended due to a party in the break room
----------------------------------------
Document ID: 2
Content: Course reserves are available for checkout at the circulation desk
----------------------

In [None]:
response = rag_chain_with_source.invoke("Can I head down from my office to the ILL desk right now?")
print(response["answer"])

### Let's update a document!

In [None]:
response = rag_chain_with_source.invoke("Can I head down from my office to the ILL desk right now?")

In [None]:
print(response["answer"])

In [None]:
for doc in response["context"]:
    print(f"Document ID: {doc.metadata['id']}")
    print(f"Content: {doc.page_content}")
    print("-" * 40)