In [1]:
!pip install langchain openai qdrant-client gradio pandas tiktoken -U langchain-community

Collecting langchain
  Downloading langchain-0.2.16-py3-none-any.whl.metadata (7.1 kB)
Collecting openai
  Downloading openai-1.44.0-py3-none-any.whl.metadata (22 kB)
Collecting qdrant-client
  Downloading qdrant_client-1.11.1-py3-none-any.whl.metadata (10 kB)
Collecting gradio
  Downloading gradio-4.43.0-py3-none-any.whl.metadata (15 kB)
Collecting pandas
  Downloading pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting langchain-community
  Downloading langchain_community-0.2.16-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain-core<0.3.0,>=0.2.38 (from langchain)
  Downloading langchain_core-0.2.38-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.4-py3-none-any.whl.metadata (2.3 kB)
Collecting langsmith<0.2.0,>=

In [2]:
from google.colab import userdata
openai_api_key=userdata.get('openai_api_key')

In [3]:
import gradio as gr
import pandas as pd
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Qdrant
from langchain.chains import VectorDBQA
from langchain.llms import OpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [4]:
qdrant_url=userdata.get('Qdrant')
qdrant_api_key=userdata.get('qdrant_api_key')
openai_api_key=userdata.get('openai_api_key')
# groq_api_key=userdata.get('GROQ_API_KEY')

In [25]:
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.schema import AIMessage, HumanMessage


#csv loader
loader = CSVLoader(file_path='dataa.csv')
data=loader.load()

#split the documnts
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(data)


#embeding
embeding=OpenAIEmbeddings(openai_api_key=openai_api_key, model="text-embedding-3-small")


#import quantization

from langchain.vectorstores import Qdrant
from qdrant_client import QdrantClient, models

from langchain.vectorstores import Qdrant

#using qudadrant vector database
from qdrant_client import QdrantClient, models
qdrant = Qdrant.from_documents(
            texts,
            embeding,
            url=qdrant_url,
            prefer_grpc=True,
            api_key=qdrant_api_key,
            collection_name="llm_app",
            quantization_config=models.BinaryQuantization(
                binary=models.BinaryQuantizationConfig(
                    always_ram=True,
                )
            )
)


#qdrant client
qdrant_client = QdrantClient(
    url=qdrant_url,
    prefer_grpc=True,
    api_key=qdrant_api_key,
)

from re import search
#retriver
retriver=qdrant.as_retriever( search_type="similarity", search_kwargs={"k":2})


#search query
query="show me a best darmatology doctor in peshawar  "
docs=retriver.get_relevant_documents(query)


from langchain import PromptTemplate

prompt = PromptTemplate(
    template="""
        # Your Role
        You are a highly skilled AI specialized in healthcare and medical information retrieval. Your expertise lies in understanding the medical needs of patients and accurately matching them with the most suitable healthcare professionals, including but not limited to surgeons, dentists, dermatologists, cardiologists, neurologists, etc., based on the user's query and the provided context.

        # Instruction
        Your task is to answer the question using the following pieces of retrieved context delimited by XML tags.

        <retrieved context>
        Retrieved Context:
        {context}
        </retrieved context>

        # Constraint
        1. Carefully analyze the user's question:
        User's question:\n{question}\n
        Your goal is to understand the user's needs and match them with the most relevant healthcare professional(s) from the provided context.
        - Reflect on why the question was asked, and deliver an appropriate response based on the context you understand.
        2. Select the most relevant information (the key details directly related to the question) from the retrieved context and use it to formulate an answer.
        3. Generate a comprehensive, logical, and medically accurate answer. When generating the answer, include the following details about the healthcare professional:
            • Name of the Professional
            • City
            • Specialization (e.g., Surgeon, Dentist, Cardiologist, etc.)
            • Qualification (e.g., MBBS, FCPS, etc.)
            • Years of Experience
            • Patient Satisfaction Rate (if available)
            • Average Time Spent with Patients (if available)
            • Wait Time (if available)
            • Hospital/Clinic Address
            • Consultation Fee
            • Profile Link (if available)
        4. If the retrieved context does not contain enough relevant information, or if the documents are irrelevant, respond with 'I can't find the answer to that question in the material I have'.
        5. Provide a complete answer to the user. Do not limit the information if there is more useful data available in the retrieved context.
        6. At the end of the response, do not include any unnecessary metadata (such as Source, Row, or _id). Only focus on the healthcare professional's information relevant to the user's query.

        # Question:
        {question}""",
    input_variables=["context", "question"]
)


#import ChatOpenAI
# llm = ChatOpenAI(model_name="gpt-4o", temperature=0, openai_api_key=openai_api_key)
llm = ChatOpenAI(temperature=0.5, model='gpt-4o', openai_api_key=openai_api_key)




def format_docs(docs):
        formatted_docs = []
        for doc in docs:
            # Format the metadata into a string
            metadata_str = ', '.join(f"{key}: {value}" for key, value in doc.metadata.items())

            # Combine page content with its metadata
            doc_str = f"{doc.page_content}\nMetadata: {metadata_str}"

            # Append to the list of formatted documents
            formatted_docs.append(doc_str)

        # Join all formatted documents with double newlines
        return "\n\n".join(formatted_docs)

#import strw

        from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
rag_chain = (
            {"context":  retriver| format_docs, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
        )

from langchain.chat_models import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage
import openai
import os
import gradio as gr


def reg(message, history):
    history_langchain_format = []
    for human, ai in history:
        history_langchain_format.append(HumanMessage(content=human))
        history_langchain_format.append(AIMessage(content=ai))
    history_langchain_format.append(HumanMessage(content=message))
    gpt_response = llm(history_langchain_format)
    return rag_chain.invoke(message)
# Gradio ChatInterface
demo = gr.ChatInterface(
    fn=reg,
    title="Doctors Appointments Assistant",
    theme="soft",
)

demo.launch(show_api=False)



Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://9ff9f70472f890835d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [None]:
#csv loader
loader = CSVLoader(file_path='data.csv')
data=loader.load()


In [None]:
data[:2]

[Document(metadata={'source': 'data.csv', 'row': 0}, page_content='Doctor Name: Prof. Dr. Syed Shamsuddin\nCity: QUETTA\nSpecialization: Dermatologist\nDoctor Qualification: MBBS, MCPS, FCPS\nExperience(Years): 30\nTotal_Reviews: 827\nPatient Satisfaction Rate(%age): 98\nAvg Time to Patients(mins): 14\nWait Time(mins): 13\nHospital Address: National Hospital, Near Quetta Laboratory Prince Road, Quetta\nDoctors Link: https://www.marham.pk/doctors/quetta/dermatologist/prof-dr-syed-shamsuddin#reviews-scroll\nFee(PKR): 5000'),
 Document(metadata={'source': 'data.csv', 'row': 1}, page_content='Doctor Name: Dr. Jaffar Ali\nCity: QUETTA\nSpecialization: Dermatologist\nDoctor Qualification: MBBS, MCPS (Dermatology), FDV (Austria)\nExperience(Years): 32\nTotal_Reviews: 1250\nPatient Satisfaction Rate(%age): 97\nAvg Time to Patients(mins): 16\nWait Time(mins): 13\nHospital Address: Quetta Hospital, Quetta City, Quetta\nDoctors Link: https://www.marham.pk/doctors/quetta/dermatologist/dr-jaffar-al

In [None]:
len(data)

2652

In [None]:
#split the documnts
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(data)

In [None]:
len(texts)

2652

In [None]:
#embeding
embeding=OpenAIEmbeddings(openai_api_key=openai_api_key, model="text-embedding-3-small")


  embeding=OpenAIEmbeddings(openai_api_key=openai_api_key, model="text-embedding-3-small")


In [None]:
#import quantization

from langchain.vectorstores import Qdrant
from qdrant_client import QdrantClient, models

from langchain.vectorstores import Qdrant

#using qudadrant vector database
from qdrant_client import QdrantClient, models
qdrant = Qdrant.from_documents(
            texts,
            embeding,
            url=qdrant_url,
            prefer_grpc=True,
            api_key=qdrant_api_key,
            collection_name="llm_app",
            quantization_config=models.BinaryQuantization(
                binary=models.BinaryQuantizationConfig(
                    always_ram=True,
                )
            )
)

In [None]:
#qdrant client
qdrant_client = QdrantClient(
    url=qdrant_url,
    prefer_grpc=True,
    api_key=qdrant_api_key,
)

In [None]:
from re import search
#retriver
retriver=qdrant.as_retriever( search_type="similarity", search_kwargs={"k":2})

In [None]:
#search query
query="show me a best darmatology doctor in peshawar  "
docs=retriver.get_relevant_documents(query)

  docs=retriver.get_relevant_documents(query)


In [None]:
#write a code for prety print
for i in docs:
  print(i.page_content)

Doctor Name: Dr. Shahzad Rashid Awan
City: PESHAWAR
Specialization: Dermatologist
Doctor Qualification: MBBS, MCPS (Dermatology)*
Experience(Years): 12
Total_Reviews: 14
Patient Satisfaction Rate(%age): 93
Avg Time to Patients(mins): 13
Wait Time(mins): 10
Hospital Address: Rahim Medical Center And Hospital, Hasht Nagri, Peshawar
Doctors Link: https://www.marham.pk/doctors/peshawar/dermatologist/dr-shahzad-rashid-awan#reviews-scroll
Fee(PKR): 1000
Doctor Name: Dr. Fahad Faizullah
City: PESHAWAR
Specialization: Dermatologist
Doctor Qualification: MBBS, FCPS (Dermatology)
Experience(Years): 8
Total_Reviews: 3
Patient Satisfaction Rate(%age): 100
Avg Time to Patients(mins): 17
Wait Time(mins): 7
Hospital Address: Aesthetica Plastika, Khyber Pakhtunkhwa, Peshawar
Doctors Link: https://www.marham.pk/doctors/peshawar/dermatologist/dr-fahad-faizullah#reviews-scroll
Fee(PKR): 1500


In [None]:
docs[0].metadata.items()

dict_items([('source', 'data.csv'), ('row', 1807), ('_id', 'cd86a8bf-cb80-4f70-a723-189eef020607'), ('_collection_name', 'llm_app_02')])

In [None]:
from langchain import PromptTemplate

prompt = PromptTemplate(
    template="""
        # Your Role
        You are a highly skilled AI specialized in healthcare and medical information retrieval. Your expertise lies in understanding the medical needs of patients and accurately matching them with the most suitable healthcare professionals based on the given context.

        # Instruction
        Your task is to answer the question using the following pieces of retrieved context delimited by XML tags.

        <retrieved context>
        Retrieved Context:
        {context}
        </retrieved context>

        # Constraint
        1. Carefully consider the user's question:
        User's question:\n{question}\n
        Analyze the intent behind the question, particularly in relation to the medical context, and provide a precise and helpful answer.
        - Reflect on why the question was asked and provide an appropriate response based on the context you understand.
        2. Select the most relevant information (the key details directly related to the question) from the retrieved context and use it to formulate an answer.
        3. Generate a concise, logical, and medically accurate answer. When generating the answer, include the following details about the doctor in a bulleted format:
            • Doctor Name: Dr. Shahzad Rashid Awan
            • City: Peshawar
            • Specialization: Dermatologist
            • Qualification: MBBS, MCPS (Dermatology)
            • Experience: 12 years
            • Patient Satisfaction Rate: 93%
            • Avg Time to Patients: 13 mins
            • Wait Time: 10 mins
            • Hospital Address: Rahim Medical Center And Hospital, Hasht Nagri, Peshawar
            • Fee: PKR 1000
            • Profile Link: https://www.marham.pk/doctors/peshawar/dermatologist/dr-shahzad-rashid-awan#reviews-scroll
        4. If the retrieved context does not contain information relevant to the question, or if the documents are irrelevant, respond with 'I can't find the answer to that question in the material I have'.
        5. Limit the answer to five sentences maximum. Ensure the answer is concise, logical, and medically appropriate.
        6. At the end of the response, provide the doctor's profile metadata as shown in the relevant documents, ensuring all bullet points are clearly mentioned.

        # Question:
        {question}""",
    input_variables=["context", "question"]
)


In [None]:
# #import conversation
# from langchain.memory import ConversationBufferMemory
# memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

In [None]:
# !pip install langchain-openai


Collecting langchain-openai
  Downloading langchain_openai-0.1.23-py3-none-any.whl.metadata (2.6 kB)
Downloading langchain_openai-0.1.23-py3-none-any.whl (51 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/52.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.0/52.0 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain-openai
Successfully installed langchain-openai-0.1.23


In [None]:
#import ChatOpenAI
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-4o", temperature=0, openai_api_key=openai_api_key)

In [None]:
def format_docs(docs):
        formatted_docs = []
        for doc in docs:
            # Format the metadata into a string
            metadata_str = ', '.join(f"{key}: {value}" for key, value in doc.metadata.items())

            # Combine page content with its metadata
            doc_str = f"{doc.page_content}\nMetadata: {metadata_str}"

            # Append to the list of formatted documents
            formatted_docs.append(doc_str)

        # Join all formatted documents with double newlines
        return "\n\n".join(formatted_docs)

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
rag_chain = (
            {"context":  retriver| format_docs, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
        )

In [None]:
rag_chain.invoke("show me a best darmatology doctor in lahore  ")

'Based on the information retrieved, here is a highly recommended dermatologist in Lahore:\n\n- **Doctor Name:** Dr. Muhammad Amjad\n- **City:** Lahore\n- **Specialization:** Dermatologist\n- **Qualification:** MBBS, MCPS (Dermatology), FCPS (Dermatology)\n- **Experience:** 35 years\n- **Patient Satisfaction Rate:** 99%\n- **Avg Time to Patients:** 13 mins\n- **Wait Time:** 4 mins\n- **Hospital Address:** Zarar Shaheed Trust Hospital, Barki, Lahore\n- **Fee:** PKR 1000\n- **Profile Link:** [Dr. Muhammad Amjad](https://www.marham.pk/doctors/lahore/dermatologist/dr-muhammad-amjad#reviews-scroll)\n\n**Profile Metadata:**\n- **Source:** data.csv\n- **Row:** 1497\n- **_id:** 4e2695f8-f1df-4bac-bdd3-aec0ae966eed\n- **_collection_name:** llm_app'

In [None]:
# import random
# import gradio as gr

# # Gradio Interface
# def search_doctor(input_text):
#     return rag_chain.invoke(input_text)

# # Create the Gradio interface
# iface = gr.Interface(
#     fn=search_doctor,
#     inputs=gr.Textbox(lines=1, label="Ask a medical question"),
#     outputs=gr.Textbox(label="Answer"),
#     title="Medical Assistant",
#     description="Find the best doctors based on your medical needs.",
#     allow_flagging="never",
#     theme="default",
#     css=".gradio-container {border-radius: 10px; padding: 10px; background-color: #f9f9f9;} .gr-button {visibility: hidden;}"
# )

# # Launch the interface without the Gradio logo
# iface.launch(show_api=False)

In [None]:
# import gradio as gr

# # Example RAG model invocation function (replace with your actual function)
# def rag_model_query(query):
#     # Replace with actual RAG model invocation
#     return rag_chain.invoke(query)

# # Define the Gradio function to handle both echo and RAG queries
# def handle_message(message, history):
#     # Check if the message contains a keyword to trigger RAG model
#     if "doctor" in message["text"].lower():
#         response = rag_model_query(message["text"])
#     else:
#         response = message["text"]
#     return response

# # Create the Gradio interface
# demo = gr.ChatInterface(
#     fn=handle_message,
#     title="Medical Assistant",
#     multimodal=True,
# )

# demo.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://902c09d5057fe29f2c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage
import openai
import os
import gradio as gr
# os.environ["OPENAI_API_KEY"] = openai_api_key  # Replace with your key

llm = ChatOpenAI(temperature=1.0, model='gpt-4o', openai_api_key=openai_api_key)
# llm = ChatOpenAI(model_name="gpt-4o", temperature=0, openai_api_key=openai_api_key, memory=memory)

def reg(message, history):
    history_langchain_format = []
    for human, ai in history:
        history_langchain_format.append(HumanMessage(content=human))
        history_langchain_format.append(AIMessage(content=ai))
    history_langchain_format.append(HumanMessage(content=message))
    gpt_response = llm(history_langchain_format)
    return rag_chain.invoke(message)
# # Gradio ChatInterface
# demo = gr.ChatInterface(
#     fn=reg,
#     title="Medical Assistant",
#     # theme="soft",
# )

# # Apply custom CSS and launch the interface
# demo.launch(show_api=False)
gr.ChatInterface(predict).launch()
