In [28]:
from dotenv import load_dotenv
from langchain.vectorstores.pgvector import PGVector
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import UnstructuredWordDocumentLoader
from langchain_community.document_loaders import PyPDFLoader
from langchain.prompts import PromptTemplate
from langchain.schema import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough
from langchain.text_splitter import CharacterTextSplitter
# Load environment variables
load_dotenv()
embeddings = OpenAIEmbeddings()

In [12]:
def file_loader(file_path):
    # If the file is a .docx file
    if file_path.endswith(".docx"):
        return UnstructuredWordDocumentLoader(file_path)
    # If the file is a .pdf file
    elif file_path.endswith(".pdf"):
        return PyPDFLoader(file_path)
    # If the file is a .txt file
    elif file_path.endswith(".txt"):
        return TextLoader(file_path)
    # If the file type is not supported
    else:
        # Return None
        return None

In [5]:
CONNECTION_STRING = "postgresql+psycopg2://postgres:123456@localhost:5432/doctor_vector_db"
COLLECTION_NAME = 'doctor_lib_vectors'

db = PGVector(
    embedding_function=embeddings,
    collection_name=COLLECTION_NAME,
    connection_string=CONNECTION_STRING,
)

In [6]:
retriever = db.as_retriever()

In [11]:
retriever

VectorStoreRetriever(tags=['PGVector', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.pgvector.PGVector object at 0x000001B9E86D7FD0>)

In [7]:
query = "I speak portugese, I need to visit a Orthodontist"
docs_retrieved = retriever.get_relevant_documents(query)
docs_retrieved

[Document(page_content='optimal oral health and a harmonious smile. If the time slots are not suitable, please call the practice.  \nSpoken languages  \nEnglish, Spanish, French and Portuguese  \n \nWebsite  \nSee the site  \n________________________________________  \nNational and university diplomas', metadata={'source': './datasource/Dr Catherine Lasvergnas Buffet Orthodontist.pdf', 'page': 0}),
 Document(page_content="mouth and to make patients' smiles more harmonious. The practi ce offers treatments for children, \nadolescents and adults (orthopedic appliances, vestibular technique, lingual technique and aligner \ntreatments).  \nNational and university diplomas  \nState diploma of doctor in dental surgery - UFR of odontology Garancière - Paris -Cité University  \nOthers formations", metadata={'source': './datasource/Dr Lassaad BEN HAOUIA, Orthodontist.pdf', 'page': 0}),
 Document(page_content="Doctor Lassaad Ben Haouia welcomes you to his office in Poissy. He is a qualified speci

In [10]:
docs_retrieved[0].metadata['source']

'./datasource/Dr Catherine Lasvergnas Buffet Orthodontist.pdf'

In [9]:
docs_retrieved[-1].metadata['source']

'./datasource/Dr Catherine Lasvergnas Buffet Orthodontist.pdf'

In [8]:
file_url = docs_retrieved[0].metadata['source']
file_url

'./datasource/Dr Catherine Lasvergnas Buffet Orthodontist.pdf'

In [20]:
chosen_loader = file_loader(file_url)
chosen_documents = chosen_loader.load()
chosen_text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)
chosen_texts = chosen_text_splitter.split_documents(chosen_documents)

In [22]:
CHOSEN_CONNECTION_STRING = "postgresql+psycopg2://postgres:123456@localhost:5432/chosen_vector_db"
CHOSEN_COLLECTION_NAME = 'chosen_doctor_vectors'
chosen_db = PGVector.from_documents(
    documents=chosen_texts,
    embedding=embeddings,
    collection_name=CHOSEN_COLLECTION_NAME,
    connection_string=CHOSEN_CONNECTION_STRING,
)

In [23]:
chosen_retriever = chosen_db.as_retriever()

In [29]:
# Initialize the OpenAI model
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# Define the prompt template for the chatbot
prompt_template = """
You are an healthcare assistant chatbot. Based on the retrieved context, answer the question. 
Explain why the information in the context can and, or cannot answer the question. 
The response is in the following format:
Recommended doctor is:
Why it's a good choice:
What's missing:
Source is:
If you don't know the answer, just say that you don't know. 
Context: {context}
Question: {question}  
Answer:
"""

# Create the prompt template for the chatbot
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

# Create the LangChain
rag_chain = (
    {
        "context": chosen_retriever, 
        "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [30]:
query = "I speak portugese, I need to visit a Orthodontist"
llm_response = rag_chain.invoke(query)

print(llm_response)

Recommended doctor is: Dr Catherine Lasvergnas Buffet

Why it's a good choice: Dr Catherine Lasvergnas Buffet is an orthodontist who offers a full range of orthodontic treatments for children, adolescents, and adults. She is a member of the European College of Orthodontics and the French Society of Lingual Orthodontics. She also has extensive experience as a clinical research associate at Bretonneau Hospital in Paris.

What's missing: The context does not provide information about whether Dr Catherine Lasvergnas Buffet speaks Portuguese. 

Source is: ./datasource/Dr Catherine Lasvergnas Buffet Orthodontist.pdf
