In [1]:
import os
from dotenv import load_dotenv

load_dotenv("../../.env")

from datasets import load_dataset
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT"))

from langchain.embeddings import HuggingFaceEmbeddings
from langchain_postgres import PGVector
from langchain_postgres.vectorstores import PGVector

connection = "postgresql+psycopg://postgres:code4lib@localhost:5432/code4lib"
collection_name = "tiny_shakespeare"
embeddings = HuggingFaceEmbeddings(model_name='nomic-ai/nomic-embed-text-v1.5', model_kwargs={'trust_remote_code':True})

vectorstore = PGVector(
    embeddings=embeddings,
    collection_name=collection_name,
    connection=connection,
    use_jsonb=True,  
)

print(f"Vectorstore collection name: {vectorstore.collection_name}")

  from .autonotebook import tqdm as notebook_tqdm
<All keys matched successfully>


Vectorstore collection name: tiny_shakespeare


In [2]:
dataset = load_dataset("Trelis/tiny-shakespeare")

texts = dataset["train"]["Text"] + dataset["test"]["Text"]
print(len(texts))
print(texts[42][:100])

521
Mildly!

BRUTUS:
In this point charge him home, that he affects
Tyrannical power: if he evade us the


In [3]:
vectorstore.add_texts(texts)

['47332e2d-1c56-4333-96a7-39ba0f2ca1b6',
 'e937b21b-e01e-4beb-aad8-5e389e898032',
 'bc747a52-4241-4b6c-aa2b-397407abed67',
 'a8a68bcf-faab-490f-a9e4-483a6f910633',
 'd617ecc6-adb4-4bf0-9c7e-2cb15f9f90d1',
 '534f3158-6e9e-43ad-a8c5-b992c4490fea',
 'aa197c6e-a6fd-42a2-b74f-fa91523f30c7',
 'c12aa6f2-5dae-4ecf-b9f0-8e282aba87c4',
 'c8820f7c-aa48-4a73-98dc-acdaf0fa4326',
 '1d9ea7fc-00c4-4633-bcd5-c68701c9e570',
 '029d5a82-0fe5-4d52-974d-807ded0c2af1',
 '3a85988d-1821-48c8-958b-3487c93ee892',
 '6b821f03-d93f-49e4-8ef9-42f941311026',
 '877ed747-bca9-4521-bd67-f2f122174895',
 '725d49c3-531a-4ba5-9a2e-0448d06be5d7',
 '1caaf84e-0701-403b-b605-c340d10a77db',
 '4ca73e30-9a07-493c-acdd-11f6ac0ec628',
 '6df988d3-5935-4b82-93da-394b1a7cd3e7',
 '7b91ef7d-8404-44c8-b1e9-557cfb0b6403',
 '0ea461e9-21f9-4db3-9659-3d4fdf48e5a2',
 'a2cd600c-eec9-4559-afea-7d1c664e0fcd',
 '6c7b05a3-5fc0-4b16-8d8c-b1dcb74c2e45',
 '3744c0b4-5953-4d8a-bc37-4388c3b2ce94',
 '4b101249-e355-4b1a-b0bb-a0ce7d50fe4e',
 '37278da8-2f6c-

In [4]:
vectorstore.similarity_search_with_relevance_scores("doctors and nurses", k=5)

[(Document(page_content="Will it not be?\nWhat, dress'd! and in your clothes! and down again!\nI must needs wake you; Lady! lady! lady!\nAlas, alas! Help, help! my lady's dead!\nO, well-a-day, that ever I was born!\nSome aqua vitae, ho! My lord! my lady!\n\nLADY CAPULET:\nWhat noise is here?\n\nNurse:\nO lamentable day!\n\nLADY CAPULET:\nWhat is the matter?\n\nNurse:\nLook, look! O heavy day!\n\nLADY CAPULET:\nO me, O me! My child, my only life,\nRevive, look up, or I will die with thee!\nHelp, help! Call help.\n\nCAPULET:\nFor shame, bring Juliet forth; her lord is come.\n\nNurse:\nShe's dead, deceased, she's dead; alack the day!\n\nLADY CAPULET:\nAlack the day, she's dead, she's dead, she's dead!\nCAPULET:\nWill it not be?\nWhat, dress'd! and in your clothes! and down again!\nI must needs wake you; Lady! lady! lady!\nAlas, alas! Help, help! my lady's dead!\nO, well-a-day, that ever I was born!\nSome aqua vitae, ho! My lord! my lady!\n\nLADY CAPULET:\nWhat noise is here?\n\nNurse:\nO 

In [5]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [6]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 8})

In [7]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

template = """You are an expert at answering the linguistic and philological aspects of William Shakespeare's writing.
Use the provided Shakespeare passages to answer the question at the end.
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
Cite the context provided in your response!

Context: {context}

Question: {question}

Helpful Answer:"""
prompt = PromptTemplate.from_template(template)

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

In [8]:
response = rag_chain_with_source.invoke("Describe Shakespeare's diction regarding doctors and nurses.")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [9]:
print(f"Context: {response['context']}")
print(f"LM Response: {response['answer']}")

for doc in response["context"]:
    print(f"Document: {doc.page_content[:200]}")
    print("-" * 40)

Context: [Document(page_content="What is yond gentleman?\n\nNurse:\nThe son and heir of old Tiberio.\n\nJULIET:\nWhat's he that now is going out of door?\n\nNurse:\nMarry, that, I think, be young Petrucio.\n\nJULIET:\nWhat's he that follows there, that would not dance?\n\nNurse:\nI know not.\n\nJULIET:\nGo ask his name: if he be married.\nMy grave is like to be my wedding bed.\n\nNurse:\nHis name is Romeo, and a Montague;\nWhat is yond gentleman?\n\nNurse:\nThe son and heir of old Tiberio.\n\nJULIET:\nWhat's he that now is going out of door?\n\nNurse:\nMarry, that, I think, be young Petrucio.\n\nJULIET:\nWhat's he that follows there, that would not dance?\n\nNurse:\nI know not.\n\nJULIET:\nGo ask his name: if he be married.\nMy grave is like to be my wedding bed.\n\nNurse:\nThe only son of your great enemy.\n\nJULIET:\nMy only love sprung from my only hate!\nToo early seen unknown, and known too late!\nProdigious birth of love it is to me,\nThat I must love a loathed enemy.\n\nNurse:\n