In [1]:
%pwd

'c:\\Users\\vdine\\Medicalchatbot\\resources'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'c:\\Users\\vdine\\Medicalchatbot'

In [4]:
from langchain.document_loaders import PyPDFLoader,DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
def load_pdf_files(data):
    loader=DirectoryLoader(data,glob="*.pdf",loader_cls=PyPDFLoader)
    documents=loader.load()
    return documents


In [6]:
extract_data=load_pdf_files("data")

In [7]:
len(extract_data)

637

In [8]:
from typing import List
from langchain.schema import Document
def filter_to_minimaldocs(docs:List[Document])->List[Document]:
    minimal_docs: List[Document]=[]
    for doc in docs:
        src=doc.metadata.get("source")
        minimal_docs.append(
            Document(
                page_content=doc.page_content,
                metadata={"source":src}      
            )
        )
    return minimal_docs

In [9]:
minimal_docs=filter_to_minimaldocs(extract_data)

In [10]:
def text_split(minimal_docs):
    text_splitter=RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=20
    )
    texts_chunks=text_splitter.split_documents(minimal_docs)
    return texts_chunks

In [11]:
chunked_data=text_split(minimal_docs)

In [12]:
len(chunked_data)

5859

In [13]:
from langchain_huggingface import HuggingFaceEmbeddings

import torch
def download_embeddings():
    model_name="sentence-transformers/all-MiniLM-L6-v2"
    embeddings=HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs={"device":"cuda" if torch.cuda.is_available() else "cpu"}
    )
    return embeddings

embedding=download_embeddings()

In [14]:
vector=embedding.embed_query("hello")
vector

[-0.0627717599272728,
 0.05495879426598549,
 0.052164845168590546,
 0.0857900083065033,
 -0.0827488973736763,
 -0.07457297295331955,
 0.06855470687150955,
 0.018396394327282906,
 -0.08201129734516144,
 -0.03738484904170036,
 0.012124893255531788,
 0.0035183229483664036,
 -0.004134293645620346,
 -0.04378441721200943,
 0.021807299926877022,
 -0.00510272104293108,
 0.019546564668416977,
 -0.0423487089574337,
 -0.11035966873168945,
 0.005424527917057276,
 -0.055734783411026,
 0.02805238962173462,
 -0.02315872348845005,
 0.028481375426054,
 -0.05370955914258957,
 -0.052601538598537445,
 0.03393925353884697,
 0.045388635247945786,
 0.023718422278761864,
 -0.07312081754207611,
 0.05477774888277054,
 0.0170473363250494,
 0.08136039972305298,
 -0.0028627056162804365,
 0.011958062648773193,
 0.07355856150388718,
 -0.09423744678497314,
 -0.0813620463013649,
 0.04001539200544357,
 0.0006921913009136915,
 -0.01339334063231945,
 -0.05453811213374138,
 0.005151401273906231,
 -0.026139769703149796,
 0

In [15]:
len(vector)

384

In [16]:
from dotenv import load_dotenv
load_dotenv()
import os


In [17]:
weaviate_api_key = os.getenv("weaviate_api_key")
openrouter_api_key = os.getenv("OPENROUTER_API_KEY") 
weaviate_url = os.getenv("weaviate_url")

In [18]:
os.environ["WEAVIATE_API_KEY"] = weaviate_api_key
os.environ["OPENROUTER_API_KEY"] = openrouter_api_key
os.environ["WEAVIATE_URL"] = weaviate_url

In [19]:
from weaviate import connect_to_weaviate_cloud
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# ✅ Connect to Weaviate Cloud (new API)
client = connect_to_weaviate_cloud(
    cluster_url=os.getenv("WEAVIATE_URL"),          # Example: "https://your-cluster.weaviate.network"
    auth_credentials=os.getenv("WEAVIATE_API_KEY")  # Your API key
)

# ✅ Check connection
if client.is_ready():
    print("✅ Connected to Weaviate successfully!")
else:
    print("❌ Connection failed.")


✅ Connected to Weaviate successfully!


In [21]:
from weaviate.classes.config import Property, DataType

collection_name = "MedicalChatbot"

# ✅ Fetch all existing collections (returns list of strings)
existing_collections = client.collections.list_all()

# ✅ Create collection if not exists
if collection_name not in existing_collections:
    client.collections.create(
        name=collection_name,
        vectorizer_config=None,  # Custom embeddings since we use HuggingFace
        properties=[
            Property(name="content", data_type=DataType.TEXT),
            Property(name="source", data_type=DataType.TEXT)
        ]
    )
    print(f"✅ Created collection '{collection_name}'")
else:
    print(f"ℹ️ Collection '{collection_name}' already exists")

# ✅ Get the collection handle
collection = client.collections.get(collection_name)


ℹ️ Collection 'MedicalChatbot' already exists


In [22]:
from langchain_weaviate import WeaviateVectorStore


docsearch = WeaviateVectorStore.from_documents(
    client=client,
    documents=chunked_data,
    embedding=embedding,
    index_name="MedicalChatbot"
)

print("✅ Documents added to Weaviate collection successfully!")


✅ Documents added to Weaviate collection successfully!


In [32]:
from langchain_weaviate import WeaviateVectorStore


docsearch = WeaviateVectorStore(
    client=client,
    index_name="MedicalChatbot",
    text_key="text", 
    embedding=embedding
)

print("✅ Connected to existing Weaviate collection successfully!")


✅ Connected to existing Weaviate collection successfully!


In [33]:
from langchain.schema import Document

new_doc = Document(
    page_content="Skin hydration plays a key role in preventing acne formation.",
    metadata={"source": "Medical Tips"}
)

# Add this new document to the same collection
docsearch.add_documents([new_doc])
print("✅ Added new document successfully!")


✅ Added new document successfully!


In [34]:
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [38]:
query = "What causes acne?"
retrieved_docs = retriever.invoke(query)
for doc in retrieved_docs:
    print(f"📄 {doc.page_content[:200]}...\nSource: {doc.metadata.get('source')}")


📄 shoulders, and back since these are the parts of the body
where the most sebaceous follicles are found.
Causes and symptoms
The exact cause of acne is unknown. Several risk
factors have been identifie...
Source: data\Medical_book.pdf
📄 Acidosis see Respiratory acidosis; Renal
tubular acidosis; Metabolic acidosis
Acne
Definition
Acne is a common skin disease characterized by
pimples on the face, chest, and back. It occurs when the
po...
Source: data\Medical_book.pdf
📄 GALE ENCYCLOPEDIA OF MEDICINE 2 25
Acne
Acne vulgaris affecting a woman’s face. Acne is the general
name given to a skin disorder in which the sebaceous
glands become inflamed. (Photograph by Biophoto...
Source: data\Medical_book.pdf


In [75]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_weaviate import WeaviateVectorStore
from langchain.schema import Document

# ✅ Load environment variables
load_dotenv()

# ✅ Initialize Chat model using OpenRouter
chat_model = ChatOpenAI(
    model="mistralai/mistral-nemo:free",  # OpenRouter model
    temperature=0.2,
    max_tokens=1024,
    openai_api_key=os.getenv("OPENROUTER_API_KEY"),
    openai_api_base=os.getenv("OPENROUTER_API_BASE")
)

# ✅ Create system prompt template
system_prompt = (
    "You are a medical assistant for question-answering tasks. "
    "Use the following context to answer the question in a concise way (max 3 sentences). "
    "If you don't know the answer, say you don't know.\n\n"
    "Context:\n{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


In [77]:
question_answer_chain = create_stuff_documents_chain(chat_model, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [79]:
response = rag_chain.invoke({"input": "what is Acromegaly and gigantism?"})
print(response["answer"])

Acromegaly is a hormonal disorder caused by excess growth hormone (GH) from the pituitary gland, leading to enlargement of bones and soft tissues, particularly in the hands, feet, and face. Gigantism is a similar condition but occurs when excess GH is produced during childhood, resulting in excessive growth in height.


In [80]:
response = rag_chain.invoke({"input": "what is Acne?"})
print(response["answer"])

Acne is a common skin condition caused by clogged pores with oil, dead skin cells, and bacteria, leading to pimples.
