In [1]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_community import embeddings
from langchain_community.chat_models import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import BSHTMLLoader
from langchain_community.document_loaders import DirectoryLoader



In [2]:
model_local = ChatOllama(model="mistral")

In [3]:
loader = BSHTMLLoader("C:\\Users\\Barani\\Desktop\\local_ollama\\mayo.html")

data = loader.load()
print(data)

[Document(page_content="\n\n      Addresses and phone numbers - About Us - Mayo Clinic\n  \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nThis content does not have an English version.\nThis content does not have an Arabic version.\n\n\n\nSkip to contentCare at \n   Mayo Clinic Patient-Centered Care About Mayo Clinic Request Appointment Find a Doctor Locations Clinical Trials Connect to Support Groups Patient & Visitor Guide Insurance & Billing Departments & Centers International Services Contact UsPatient & Visitor Guide Health \n   Library Diseases & Conditions Symptoms Tests & Procedures Drugs & Supplements Healthy Lifestyle Books & SubscriptionsDiseases & Conditions For Medical \n   Professionals Medical Professional Resources Refer a Patient Continuing Medical Education AskMayoExpert Mayo Clinic Laboratories Video Center Journals & Publications Mayo Clinic Alumni AssociationContinuing Medical Education Research & Educ

In [5]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=7500, chunk_overlap=100)
doc_splits = text_splitter.split_documents(data)

In [7]:
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=embeddings.ollama.OllamaEmbeddings(model='nomic-embed-text'),
)
retriever = vectorstore.as_retriever()

In [8]:
# 3. Before RAG
print("Before RAG\n")
before_rag_template = "Provide me {topic}"
before_rag_prompt = ChatPromptTemplate.from_template(before_rag_template)
before_rag_chain = before_rag_prompt | model_local | StrOutputParser()
print(before_rag_chain.invoke({"topic": "the addresses of mayo clinic which is in Rochester"}))

Before RAG

 The Mayo Clinic in Rochester, Minnesota, has multiple locations. Here are some addresses for specific clinics and departments:

1. Mayo Clinic Campus - 200 First St SW, Rochester, MN 55905, USA
   This is the main campus of Mayo Clinic, where you can find most of the medical specialties and services.

2. Mayo Clinic Hospital - 4500 St. Marie Boulevard, Rochester, MN 55901, USA
   This is the hospital building on the Mayo Clinic campus, which includes patient rooms, operating rooms, and other healthcare facilities.

3. Mayo Clinic Gonda Building for Women's Health - 200 17th St NW, Rochester, MN 55901, USA
   This building is dedicated to women's health, including obstetrics, gynecology, and related services.

4. Mayo Clinic Methodist Hospital - 3300 Mayowood Rd SW, Rochester, MN 55905, USA
   This hospital is part of the Mayo Clinic system and provides various medical services, including cardiovascular care, oncology, and neurosciences.

5. Mayo Clinic Saint Marys Hospital

In [9]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

In [10]:
class format_json(BaseModel):
    university_name : str = Field(description="organization name from the given context")
    address: str = Field(description="address from the given context")
    email_address: str = Field(description="email address from the given context")
    contact_number: str = Field(description="contact number from the given context")



In [11]:
parser = JsonOutputParser(pydantic_object=format_json)

In [14]:
# 4. After RAG
print("\n########\nAfter RAG\n")
after_rag_template = """Answer the question based only on the following context:
{context}
{format_instructions}
Question: {question}
"""
after_rag_prompt = ChatPromptTemplate.from_template(after_rag_template,partial_variables={"format_instructions": parser.get_format_instructions()},)
after_rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | after_rag_prompt
    | model_local
    | parser
)



########
After RAG



In [15]:
print(after_rag_chain.invoke("provide the address of Mayo clinic which is in Minnesota"))

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


OutputParserException: Invalid json output: To provide the specific address of Mayo Clinic located in Minnesota from the given text, you can extract it using regular expressions. Here's how to do it in Python:

```python
import re
text = """...
Mayo Clinic Hospital, Saint Marys Campus
including Mayo Eugenio Litta Children's Hospital
1216 Second St. SW
  Rochester, MN 55902
Contact Number:
General number    507-266-7890
...
Rochester, Minnesota, Office of Patient Experience
844-544-0036 (toll-free)
...
"""
pattern = r"Mayo Clinic Hospital\s+(?:including\s+[^.]+\s+)?([\w\s]+\n\d+\.\s+[\w\s]+)\n([\w\s,]+)\nContact Number:\nGeneral number\s+(\d{3}-\d{3}-\d{4})"
match = re.search(pattern, text)
if match:
    university_name, address, contact_number = match.groups()
    print("University Name:", university_name)
    print("Address:", address)
    print("Contact Number:", contact_number)
else:
    print("Unable to extract the address from the text.")
```

Output:

```
University Name: Mayo Clinic Hospital, Saint Marys Campus including Mayo Eugenio Litta Children's Hospital
Address: 1216 Second St. SW   Rochester, MN 55902
Contact Number: 507-266-7890
```