In [1]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_community import embeddings
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import BSHTMLLoader
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

In [2]:
model_local = ChatOllama(model="mistral")

In [3]:
loader = BSHTMLLoader("C:\\Users\\Barani\\Desktop\\local_ollama\\llm_testing_urls\\test_11.html", 
                       bs_kwargs={"features": "html.parser"})
data=loader.load()

In [4]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=400, chunk_overlap=40)
doc_splits = text_splitter.split_documents(data)

In [5]:
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=embeddings.ollama.OllamaEmbeddings(model='nomic-embed-text'),
)
retriever = vectorstore.as_retriever()

In [6]:
class format_json(BaseModel):
    university_name : str = Field(description="organization name from the given context")
    address: str = Field(description="address from the given context")
    email_address: str = Field(description="email address from the given context")
    contact_number: str = Field(description="contact number from the given context")

In [7]:
# 4. After RAG
print("\n########\nAfter RAG\n")
after_rag_template = """Answer the question based only on the following context:
{context} and only provide these details in this order only and the response will always will be containing these informations only such as address,contact number,email address in a json format
{format_instructions}
Question: {question}
"""
parser = JsonOutputParser(pydantic_object=format_json)
after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template,partial_variables={"format_instructions": parser.get_format_instructions()},)
after_rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | after_rag_prompt
    | model_local
    | parser
)


########
After RAG



In [8]:
print(after_rag_chain.invoke("provide the metioned entities from the given context"))

{'university_name': 'Makerere University College Of health Sciences (MakCHS)', 'address': '', 'email_address': '', 'contact_number': ''}
