# **Conversational Medicine Assistant**

---



# Prerequisite

In [1]:
!pip install -U gradio langchain langchain-core langchain-community langchain-google-genai langchain-neo4j neo4j pydantic langchain-experimental

Collecting gradio
  Downloading gradio-5.34.0-py3-none-any.whl.metadata (16 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.25-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.5-py3-none-any.whl.metadata (5.2 kB)
Collecting langchain-neo4j
  Downloading langchain_neo4j-0.4.0-py3-none-any.whl.metadata (4.5 kB)
Collecting neo4j
  Downloading neo4j-5.28.1-py3-none-any.whl.metadata (5.9 kB)
Collecting pydantic
  Downloading pydantic-2.11.7-py3-none-any.whl.metadata (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.0/68.0 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-experimental
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Collecting gradio-client==1.10.3 (from gradio)
  Downloading gradio_client-1.10.3-py3-none-any.whl.metadata (7.1 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading data

In [2]:
import os
import gradio as gr
import pandas as pd
import json
import re
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_neo4j import Neo4jGraph, GraphCypherQAChain
from langchain.prompts import PromptTemplate
from neo4j import GraphDatabase
from neo4j.exceptions import ClientError

# LLM and Database Setup


In [None]:
os.environ["GOOGLE_API_KEY"] = "GOOGLE_API_KEY"

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")
url = "bolt://ip:port"
username = "username"
password = "password"

graph = Neo4jGraph(
    url=url,
    username=username,
    password=password
)

driver = GraphDatabase.driver(url, auth=(username, password))

In [28]:
graph.refresh_schema()
print(graph.schema)

Node properties:
Medicine {name: STRING, image_url: STRING, excellent_review: STRING, average_review: STRING, poor_review: STRING}
Manufacturer {name: STRING}
Composition {name: STRING}
Use {name: STRING}
SideEffect {name: STRING}
Relationship properties:

The relationships:
(:Medicine)-[:MADE_BY]->(:Manufacturer)
(:Medicine)-[:HAS_COMPOSITION]->(:Composition)
(:Medicine)-[:USED_FOR]->(:Use)
(:Medicine)-[:CAUSES]->(:SideEffect)


# Graph Projection

In [30]:
with driver.session(database="neo4j") as session:

    session.run("CALL gds.graph.drop('compositionGraph', false) YIELD graphName")
    session.run("""
    CALL gds.graph.project(
        'compositionGraph',
        ['Medicine', 'Composition'],
        {
            HAS_COMPOSITION: {
                type: 'HAS_COMPOSITION',
                orientation: 'UNDIRECTED'
            }
        }
    )
    """)

In [31]:
with driver.session(database="neo4j") as session:

    session.run("CALL gds.graph.drop('sideeffectGraph', false) YIELD graphName")
    session.run("""
    CALL gds.graph.project(
        'sideeffectGraph',
        ['Medicine', 'SideEffect'],
        {
            CAUSES: {
                type: 'CAUSES',
                orientation: 'UNDIRECTED'
            }
        }
    )
    """)

In [32]:
with driver.session(database="neo4j") as session:

    session.run("CALL gds.graph.drop('useGraph', false) YIELD graphName")
    session.run("""
    CALL gds.graph.project(
        'useGraph',
        ['Medicine', 'Use'],
        {
            USED_FOR: {
                type: 'USED_FOR',
                orientation: 'UNDIRECTED'
            }
        }
    )
    """)

In [33]:
with driver.session(database="neo4j") as session:
    result = session.run("""
        CALL gds.graph.list()
        YIELD graphName, nodeCount, relationshipCount
        RETURN graphName, nodeCount, relationshipCount
    """)

    print("List Graph Projection:")
    for record in result:
        print(f"- {record['graphName']} (Nodes: {record['nodeCount']}, Rels: {record['relationshipCount']})")


List Graph Projection:
- sideeffectGraph (Nodes: 12263, Rels: 160016)
- compositionGraph (Nodes: 12559, Rels: 35252)
- medicineGraph (Nodes: 14041, Rels: 218318)
- useGraph (Nodes: 12215, Rels: 23050)


# Jaccard Similarity

In [46]:
with driver.session(database="neo4j") as session:
    session.run("""
    CALL gds.nodeSimilarity.write('compositionGraph', {
        writeRelationshipType: 'SIMILAR_COMPOSITION',
        writeProperty: 'score',
        topK: 5
    })
    YIELD nodesCompared, relationshipsWritten, similarityDistribution
    """)

In [52]:
with driver.session(database="neo4j") as session:
    session.run("""
    CALL gds.nodeSimilarity.write('sideeffectGraph', {
        writeRelationshipType: 'SIMILAR_SIDE_EFFECT',
        writeProperty: 'score',
        topK: 5
    })
    YIELD nodesCompared, relationshipsWritten, similarityDistribution
    """)

In [53]:
with driver.session(database="neo4j") as session:
    session.run("""
    CALL gds.nodeSimilarity.write('useGraph', {
        writeRelationshipType: 'SIMILAR_USE',
        writeProperty: 'score',
        topK: 5
    })
    YIELD nodesCompared, relationshipsWritten, similarityDistribution
    """)

In [56]:
with driver.session(database="neo4j") as session:
    result = session.run("""
    MATCH (m1:Medicine)-[r1:SIMILAR_COMPOSITION]->(m2:Medicine)
    OPTIONAL MATCH (m1)-[r2:SIMILAR_SIDE_EFFECT]->(m2)
    OPTIONAL MATCH (m1)-[r3:SIMILAR_USE]->(m2)
    RETURN
      m1.name AS medicine1,
      m2.name AS medicine2,
      r1.score AS composition_score,
      coalesce(r2.score, 0.0) AS sideeffect_score,
      coalesce(r3.score, 0.0) AS use_score
    ORDER BY composition_score DESC
    """)

    data = [row.data() for row in result]
    df = pd.DataFrame(data)

df

Unnamed: 0,medicine1,medicine2,composition_score,sideeffect_score,use_score
0,Amoxyclav 625 Tablet,Augmentin 625 Duo Tablet,1.000000,1.0,1.0
1,Augmentin Duo Oral Suspension,Augmentin 625 Duo Tablet,1.000000,0.0,0.0
2,Augmentin DDS Suspension,Augmentin 625 Duo Tablet,1.000000,0.0,0.0
3,Augmentin 1000 Duo Tablet,Augmentin 625 Duo Tablet,1.000000,1.0,1.0
4,Augmentin 1.2gm Injection,Augmentin 625 Duo Tablet,1.000000,1.0,1.0
...,...,...,...,...,...
54948,Mibcal Soft Gelatin Capsule,Mecofol 10 Tablet For Vitamin B12 Deficiency,0.142857,0.0,0.0
54949,Argifizz 1500mg Tablet,Ovafuze Tablet,0.125000,0.0,0.0
54950,Otski Injection,New Alene Tablet,0.100000,0.0,0.0
54951,Otski Injection,Nuhenz Capsule,0.100000,0.0,0.0


# Prompt Configuration

In [16]:
cypher_prompt = PromptTemplate.from_template("""
You are a Cypher expert. Generate Cypher queries to retrieve information from a medical knowledge graph.
Always use `toLower()` on string comparisons to make them case-insensitive.

If the user asks for medicines that are similar to a given medicine (e.g. "similar to paracetamol", "related to X", "alternative to Y"), use the appropriate similarity relationship based on the user's intent:

- If the user refers to **composition**, use SIMILAR_COMPOSITION.
- If the user refers to **side effects**, use SIMILAR_SIDE_EFFECT.
- If the user refers to **uses or indications**, use SIMILAR_USE.
- If the user simply asks for "similar medicine" without mentioning the type, return results from all three types, combined.

Use the following query formats:

### Similar by Composition
MATCH (m1:Medicine {{name: "{{medicine_name}}"}})-[r:SIMILAR_COMPOSITION]->(m2:Medicine)
RETURN m2.name AS similar_medicine, r.score AS score, "composition" AS source
ORDER BY r.score DESC
LIMIT 10

### Similar by Side Effect
MATCH (m1:Medicine {{name: "{{medicine_name}}"}})-[r:SIMILAR_SIDE_EFFECT]->(m2:Medicine)
RETURN m2.name AS similar_medicine, r.score AS score, "side_effect" AS source
ORDER BY r.score DESC
LIMIT 10

### Similar by Use
MATCH (m1:Medicine {{name: "{{medicine_name}}"}})-[r:SIMILAR_USE]->(m2:Medicine)
RETURN m2.name AS similar_medicine, r.score AS score, "use" AS source
ORDER BY r.score DESC
LIMIT 10

### Fallback: Combine all types if user is unclear
MATCH (m1:Medicine {{name: "{{medicine_name}}"}})-[r1:SIMILAR_COMPOSITION]->(m2:Medicine)
RETURN m2.name AS similar_medicine, r1.score AS score, "composition" AS source
ORDER BY r1.score DESC
LIMIT 5

UNION

MATCH (m1:Medicine {{name: "{{medicine_name}}"}})-[r2:SIMILAR_SIDE_EFFECT]->(m2:Medicine)
RETURN m2.name AS similar_medicine, r2.score AS score, "side_effect" AS source
ORDER BY r2.score DESC
LIMIT 3

UNION

MATCH (m1:Medicine {{name: "{{medicine_name}}"}})-[r3:SIMILAR_USE]->(m2:Medicine)
RETURN m2.name AS similar_medicine, r3.score AS score, "use" AS source
ORDER BY r3.score DESC
LIMIT 2

If no similarity is requested, generate an appropriate Cypher query based on the question and schema.

Question: {question}
Schema: {schema}
""")

In [6]:
qa_prompt = PromptTemplate.from_template("""
You are a helpful medical assistant answering based on a graph database.
The graph contains information about medicines, their compositions, uses, manufacturers, and side effects.

If the question asks about what a medicine is used for (e.g., "What is Paracetamol used for?"), answer by listing its uses from the graph.
Present them as clear, readable terms such as "Paracetamol is commonly used to treat...".

If the question asks for similar medicines, use the data provided and respond with a list or paragraph of medicines.
If a medicine appears in both composition-based similarity and similarity data (e.g., from embeddings or side effects), mention that medicine first.
Do not include any numeric similarity scores in your response.
Instead, describe similarities using natural phrases like "based on composition", "based on side effects", "based on use", or "based on both".
Avoid mentioning technical terms such as "score", "ranking", or numerical values.

Always give a general health disclaimer such as:
"Please consult a healthcare professional before taking any medication."

Present your answer as a clear, readable list or paragraph, depending on the number of medicines or uses.

Question: {question}
Data: {context}

Answer:
""")

In [7]:
evaluation_prompt = PromptTemplate.from_template("""
You are an expert medical QA evaluator. Your job is to assess the quality of an assistant's answer to a question based on information from a medical knowledge graph.

Evaluate the answer using the following criteria:
- Relevance: Does the answer directly respond to the question?
- Completeness: Are all key pieces of information from the context included?
- Clarity: Is the answer well-written and understandable to non-experts?
- Factual accuracy: Does the answer only mention information present in the context?

Question: {question}
Context: {context}
Answer: {answer}

Now rate the answer on a scale of 1 to 5:
- 5: Excellent – fully accurate, relevant, complete, and clearly written
- 4: Good – mostly accurate and clear, with minor omissions or vague wording
- 3: Fair – partially correct, but some key information is missing or unclear
- 2: Poor – mostly incorrect or hard to understand
- 1: Very poor – wrong or irrelevant answer

Give the numerical rating followed by a short justification.
""")

# QA Chain Setup

In [17]:
chain = GraphCypherQAChain.from_llm(
    llm=llm,
    graph=graph,
    cypher_prompt=cypher_prompt,
    qa_prompt=qa_prompt,
    verbose=True,
    allow_dangerous_requests=True,
)

In [9]:
response = chain.run("Which medicines contain amoxycillin?")
print(response)

  response = chain.run("Which medicines contain amoxycillin?")




[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (m:Medicine)-[:HAS_COMPOSITION]->(c:Composition)
WHERE toLower(c.name) = "amoxycillin"
RETURN m.name AS medicine
LIMIT 10
[0m
Full Context:
[32;1m[1;3m[{'medicine': 'Augmentin 625 Duo Tablet'}, {'medicine': 'Amoxyclav 625 Tablet'}, {'medicine': 'Augmentin Duo Oral Suspension'}, {'medicine': 'Augmentin DDS Suspension'}, {'medicine': 'Almox 500 Capsule'}, {'medicine': 'Augmentin 1000 Duo Tablet'}, {'medicine': 'Augmentin 1.2gm Injection'}, {'medicine': 'Augmentin 375 Tablet'}, {'medicine': 'Advent Forte 457mg Syrup Tangy Orange'}, {'medicine': 'Advent 625 Tablet'}][0m

[1m> Finished chain.[0m
The following medicines contain amoxycillin:

*   Augmentin 625 Duo Tablet
*   Amoxyclav 625 Tablet
*   Augmentin Duo Oral Suspension
*   Augmentin DDS Suspension
*   Almox 500 Capsule
*   Augmentin 1000 Duo Tablet
*   Augmentin 1.2gm Injection
*   Augmentin 375 Tablet
*   Advent Forte 457mg Syrup 

In [10]:
response = chain.run("Which medicines are similar to Amoxyclav 625 Tablet?")
print(response)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (m1:Medicine {name: "Amoxyclav 625 Tablet"})-[r:SIMILAR_COMPOSITION]->(m2:Medicine)
RETURN m2.name AS similar_medicine, r.score AS score, "composition" AS source
ORDER BY r.score DESC
LIMIT 10

UNION

MATCH (m1:Medicine {name: "Amoxyclav 625 Tablet"})-[r:SIMILAR_SIDE_EFFECT]->(m2:Medicine)
RETURN m2.name AS similar_medicine, r.score AS score, "side_effect" AS source
ORDER BY r.score DESC
LIMIT 10

UNION

MATCH (m1:Medicine {name: "Amoxyclav 625 Tablet"})-[r:SIMILAR_USE]->(m2:Medicine)
RETURN m2.name AS similar_medicine, r.score AS score, "use" AS source
ORDER BY r.score DESC
LIMIT 10
[0m
Full Context:
[32;1m[1;3m[{'similar_medicine': 'Augmentin 625 Duo Tablet', 'score': 1.0, 'source': 'composition'}, {'similar_medicine': 'Augmentin Duo Oral Suspension', 'score': 1.0, 'source': 'composition'}, {'similar_medicine': 'Augmentin DDS Suspension', 'score': 1.0, 'source': 'composition'}, {'simil

# Evaluation

In [18]:
def parse_rating_justification(text):
    rating_match = re.search(r"Rating:\s*(\d+)", text)
    justification_match = re.search(r"Justification:\s*(.*)", text, re.DOTALL)

    rating = int(rating_match.group(1)) if rating_match else None
    justification = justification_match.group(1).strip() if justification_match else text.strip()

    return rating, justification

In [19]:
def ask_and_evaluate(question: str):
    result = chain.invoke({"query": question})

    if isinstance(result, str):
        answer = result
        context = "Not available"
    elif isinstance(result, dict):
        answer = result.get("result", result)
        context = result.get("context", "Not available")
    else:
        answer = str(result)
        context = "Not available"

    evaluation_input = evaluation_prompt.format(
        question=question,
        context=context,
        answer=answer
    )

    evaluation_result = llm.invoke(evaluation_input)

    rating, justification = parse_rating_justification(evaluation_result.content)

    evaluation_data = {
        "rating": rating,
        "justification": justification,
        "metadata": {
            "additional_kwargs": getattr(evaluation_result, "additional_kwargs", {}),
            "response_metadata": getattr(evaluation_result, "response_metadata", {}),
            "id": getattr(evaluation_result, "id", None),
            "usage_metadata": getattr(evaluation_result, "usage_metadata", {})
        }
    }

    print(question)
    print(answer)
    print(json.dumps(evaluation_data, indent=2, ensure_ascii=False))

    return result, evaluation_data

In [13]:
question = "List medicines used for dandruff in the database"
result, evaluation_data = ask_and_evaluate(question)



[1m> Entering new GraphCypherQAChain chain...[0m
Generated Cypher:
[32;1m[1;3mcypher
MATCH (m:Medicine)-[:USED_FOR]->(u:Use)
WHERE toLower(u.name) CONTAINS "dandruff"
RETURN m.name AS medicine
LIMIT 10
[0m
Full Context:
[32;1m[1;3m[{'medicine': 'AF-K Lotion'}, {'medicine': 'Anti Druf Lotion'}, {'medicine': 'Danclear  Cream'}, {'medicine': 'Dencross Scalp Lotion'}, {'medicine': 'Danblock Lotion'}, {'medicine': 'Dan Shine Lotion'}, {'medicine': 'Hairshine Ktz Soap'}, {'medicine': 'IZ K Lotion'}, {'medicine': 'Ketostar Anti Dandruff Lotion'}, {'medicine': 'KZ Plus Lotion'}][0m

[1m> Finished chain.[0m
List medicines used for dandruff in the database
The following medicines are used for dandruff based on the data in the database:

*   AF-K Lotion
*   Anti Druf Lotion
*   Danclear Cream
*   Dencross Scalp Lotion
*   Danblock Lotion
*   Dan Shine Lotion
*   Hairshine Ktz Soap
*   IZ K Lotion
*   Ketostar Anti Dandruff Lotion
*   KZ Plus Lotion

Please consult a healthcare profess

# Live Demo

In [20]:
def answer_question(question):
    try:
        result = chain.invoke({"query": question})

        if isinstance(result, dict):
            answer = result.get("result", str(result))
            context = result.get("context", "Not available")
        else:
            answer = result
            context = "Not available"

        evaluation_input = evaluation_prompt.format(
            question=question,
            context=context,
            answer=answer
        )

        evaluation_result = llm.invoke(evaluation_input)
        rating, justification = parse_rating_justification(evaluation_result.content)

        evaluation_str = f"({rating}) {justification}"
        return answer, evaluation_str

    except Exception as e:
        return f"Error: {e}", ""

In [21]:
demo = gr.Interface(
    fn=answer_question,
    inputs=gr.Textbox(label="Question"),
    outputs=[
        gr.Textbox(label="Answer"),
        gr.Textbox(label="Evaluation (1-5)")
    ],
    title="Conversational Medicine Assistant",
)

demo.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://084e4499f2d1d0beb6.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


