In [None]:
!pip install -U langchain langchain-community langchain-huggingface huggingface_hub transformers
!pip install langchain langchain-community langchain-huggingface faiss-cpu
!pip install pandas huggingface_hub tiktoken




In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
from langchain_huggingface import HuggingFaceEndpoint
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import os


os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

In [None]:
zip_path = '/content/drive/MyDrive/medical_chatbot/archive.zip'
import zipfile
import os

extract_path = '/content/drive/MyDrive/medical_chatbot'  # temp local folder in Colab
os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)


In [None]:
import pandas as pd
df=pd.read_csv('/content/drive/MyDrive/medical_chatbot/ai-medical-chatbot.csv')

df.dropna(subset=["Description", "Patient", "Doctor"], inplace=True)
df.head()

Unnamed: 0,Description,Patient,Doctor
0,Q. What does abutment of the nerve root mean?,"Hi doctor,I am just wondering what is abutting...",Hi. I have gone through your query with dilige...
1,Q. What should I do to reduce my weight gained...,"Hi doctor, I am a 22-year-old female who was d...",Hi. You have really done well with the hypothy...
2,Q. I have started to get lots of acne on my fa...,Hi doctor! I used to have clear skin but since...,Hi there Acne has multifactorial etiology. Onl...
3,Q. Why do I have uncomfortable feeling between...,"Hello doctor,I am having an uncomfortable feel...",Hello. The popping and discomfort what you fel...
4,Q. My symptoms after intercourse threatns me e...,"Hello doctor,Before two years had sex with a c...",Hello. The HIV test uses a finger prick blood ...


In [None]:
from langchain.schema import Document

documents = []

for idx, row in df.head(5).iterrows():
    full_convo = f"""Question: {row['Description'].strip()}
Patient: {row['Patient'].strip()}
Doctor: {row['Doctor'].strip()}"""

    doc = Document(
        page_content=full_convo,
        metadata={"row_id": idx}
    )
    documents.append(doc)


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Define the splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,     # max characters per chunk
    chunk_overlap=100,  # overlap between chunks
    separators=["\n\n", "\n", ".", " ", ""]  # try to split cleanly
)

# Split documents
split_docs = text_splitter.split_documents(documents)

print(f"Original docs: {len(documents)} → Split docs: {len(split_docs)}")


Original docs: 5 → Split docs: 23


In [None]:
from huggingface_hub import InferenceClient

embedding_model = InferenceClient(
    model="BAAI/bge-small-en-v1.5",
    token="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"
)

# Define embedding wrapper again
def embed_text(texts):
    return [embedding_model.feature_extraction(text) for text in texts]


# Get raw text from each chunk
texts = [doc.page_content for doc in split_docs]

# Get embeddings from API (this may take a few seconds depending on length)
embeddings = embed_text(texts)


In [None]:
# ✅ 1. Custom embedding wrapper using Hugging Face Inference API
from typing import List
from langchain.embeddings.base import Embeddings
from huggingface_hub import InferenceClient

class HFInferenceEmbeddings(Embeddings):
    def __init__(self, model: str, token: str):
        self.client = InferenceClient(model=model, token=token)

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        return [self.client.feature_extraction(text) for text in texts]

    def embed_query(self, text: str) -> List[float]:
        return self.client.feature_extraction(text)

# ✅ 2. Initialize the embedding model (API-only, no download)
HUGGINGFACE_API_TOKEN = "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"

embedding_model = HFInferenceEmbeddings(
    model="BAAI/bge-small-en-v1.5",
    token=HUGGINGFACE_API_TOKEN
)



In [None]:
# Build the vectorstore
vectorstore = FAISS.from_documents(
    documents=split_docs,
    embedding=embedding_model
)


In [None]:
'''from langchain.vectorstores import FAISS

# 1. Extract raw text from the Document objects
texts = [doc.page_content for doc in split_docs]

# 2. Create (text, vector) pairs
text_vector_pairs = list(zip(texts, embeddings))

# 3. Create FAISS vectorstore
vectorstore = FAISS.from_documents(
    documents=split_docs,
    embedding=embedding_model
)
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# 1. Correct embedding wrapper for BAAI model
embedding_model = HuggingFaceEmbeddings(
    model_name="BAAI/bge-small-en-v1.5"
)

# 2. Vector store from Documents using embedding model
vectorstore = FAISS.from_documents(
    documents=split_docs,
    embedding=embedding_model
)

# 4. Confirm
#print(f"✅ Vectorstore created with {len(text_vector_pairs)} entries.")

'''


'from langchain.vectorstores import FAISS\n\n# 1. Extract raw text from the Document objects\ntexts = [doc.page_content for doc in split_docs]\n\n# 2. Create (text, vector) pairs\ntext_vector_pairs = list(zip(texts, embeddings))\n\n# 3. Create FAISS vectorstore\nvectorstore = FAISS.from_documents(\n    documents=split_docs,\n    embedding=embedding_model\n)\nfrom langchain.embeddings import HuggingFaceEmbeddings\nfrom langchain.vectorstores import FAISS\n\n# 1. Correct embedding wrapper for BAAI model\nembedding_model = HuggingFaceEmbeddings(\n    model_name="BAAI/bge-small-en-v1.5"\n)\n\n# 2. Vector store from Documents using embedding model\nvectorstore = FAISS.from_documents(\n    documents=split_docs,\n    embedding=embedding_model\n)\n\n# 4. Confirm\n#print(f"✅ Vectorstore created with {len(text_vector_pairs)} entries.")\n\n'

In [None]:
query = "What is the treatment for nerve root abutment?"

# Use the wrapper's embed_query method (returns a vector)
query_embedding = embedding_model.embed_query(query)

# Perform similarity search by vector
results_with_scores = vectorstore.similarity_search_with_score_by_vector(query_embedding, k=3)

# Print the results
for i, (doc, score) in enumerate(results_with_scores, 1):
    print(f"Result {i} (Score: {score:.4f}):\n{doc.page_content}\n")


Result 1 (Score: 0.5440):
Question: Q. What does abutment of the nerve root mean?
Patient: Hi doctor,I am just wondering what is abutting and abutment of the nerve root means in a back issue. Please explain. What treatment is required for annular bulging and tear?
Doctor: Hi. I have gone through your query with diligence and would like you to know that I am here to help you. For further information consult a neurologist online -->

Result 2 (Score: 0.9038):
. I bought an acne soap and have been using it for a month now but I'm not sure if it works. I hope you can help me because it has been affecting my mental state lately :((((

Result 3 (Score: 0.9416):
Doctor: Hi there Acne has multifactorial etiology. Only acne soap does not improve if ypu have grade 2 or more grade acne. You need to have oral and topical medications. This before writing medicines i need to confirm your grade of acne. For mild grade topical clindamycin or retenoic acud derivative would suffice whereas for higher gr

In [None]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate



llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.2",
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03,
    provider="auto",  # let Hugging Face choose the best provider for you
)

chat_model = ChatHuggingFace(llm=llm)
retriever = vectorstore.as_retriever()




custom_prompt = PromptTemplate.from_template("""
You are a professional medical assistant. Your role is to answer user health-related questions **briefly and accurately**, using the information provided.

Instructions:
- Use **only** the information from the retrieved context **OR** your verified internal medical knowledge.
- If neither provides enough information, respond with: "I'm not certain based on the provided information."
- **Do not guess, assume, or fabricate any part of the answer.**
- Always respond in **one concise and medically sound short sentence**.
-  **always mention if the answer is from retrieved context or from your trusted knowledge**.

Chat History:
{chat_history}

Retrieved Context:
{context}

User Question:
{question}

Answer:
""")




memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=chat_model,
    retriever=retriever,
    memory=memory,
    combine_docs_chain_kwargs={"prompt": custom_prompt},
    verbose=True # optional: prints internal logic
)
while True:
    query = input("You: ")
    if query.lower() in ["exit", "quit"]:
        break
    response = qa_chain.run(query)
    print("Bot:", response)



You: what is a neurolgist


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are a professional medical assistant. Your role is to answer user health-related questions **briefly and accurately**, using the information provided.

Instructions:
- Use **only** the information from the retrieved context **OR** your verified internal medical knowledge.
- If neither provides enough information, respond with: "I'm not certain based on the provided information."
- **Do not guess, assume, or fabricate any part of the answer.**
- Always respond in **one concise and medically sound sentence**.
-  **always mention if the answer is from retrieved context or from your trusted knowledge**.

Chat History:


Retrieved Context:
Question: Q. What does abutment of the nerve root mean?
Patient: Hi doctor,I am just wondering what is abutting and abutment of the nerve root means in a back issue. Please explain. What trea

HfHubHTTPError: 402 Client Error: Payment Required for url: https://router.huggingface.co/featherless-ai/v1/chat/completions (Request ID: Root=1-689097bb-650da13f42bf402f434383f3;bef767cc-afc4-42a1-ad4d-1c20b203ce94)

You have exceeded your monthly included credits for Inference Providers. Subscribe to PRO to get 20x more monthly included credits.