In [1]:
import os
from dotenv import load_dotenv
from pprint import pprint
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['NEO4J_URI'] = os.getenv('NEO4J_URI')
os.environ['NEO4J_USERNAME'] = os.getenv('NEO4J_USERNAME')
os.environ['NEO4J_PASSWORD'] = os.getenv('NEO4J_PASSWORD')
os.environ['together_api_key'] = os.getenv('TOGETHER_API_KEY')


In [5]:
from langchain_text_splitters import MarkdownHeaderTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.document_loaders import DirectoryLoader



In [6]:
# loader = TextLoader("disease_text/Asthma.txt")
# markdown_document = loader.load()
loader = DirectoryLoader('disease_text', glob="*.txt", show_progress=True, loader_cls=TextLoader)
docs = loader.load()

100%|██████████| 77/77 [00:00<00:00, 505.99it/s]


In [8]:
(docs[0].page_content)
markdown_document = docs

In [12]:
array = [len(x.page_content.split(' ') )for x in markdown_document]

In [23]:

headers_to_split_on = [
    ("##", "Header 1"),
    # ("###", "Header 2"),
    # ("####", "Header 3"),
    # ("#####", "Header 4"),
]

markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
md_header_splits = []

for doc in markdown_document:
    md_header_splits+=(markdown_splitter.split_text(doc.page_content))


In [17]:

from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-0125")
# Nodes
nodes = [
    "Disease",
    "Symptom",
    "Treatment",
    "Risk Factor",
    "Prevention",
    "Complication",
    "Epidemiology",
    "Research",
    "Diagnosis",
    "Pathophysiology",
    "Genetics",
    "Prognosis",
    "Prevalence",
    "Age Group"
]

# Relationships
relationships = [
    "HAS_SYMPTOM",
    "HAS_TREATMENT",
    "HAS_RISK_FACTOR",
    "HAS_PREVENTION",
    "CAN_LEAD_TO",
    "HAS_EPIDEMIOLOGY",
    "HAS_RESEARCH",
    "DIAGNOSED_BY",
    "AFFECTS",
    "INHERITS",
    "PROGNOSIS_FOR",
    "COMORBID_WITH",
    "LEADS_TO",
    "EPIDEMIC_IN"
]

# Example usage:
print("Nodes:", nodes)
print("Relationships:", relationships)


llm_transformer = LLMGraphTransformer(llm=llm, allowed_nodes = nodes, allowed_relationships=relationships)

  1%|▏         | 1/77 [09:10<11:37:26, 550.61s/it]


Nodes: ['Disease', 'Symptom', 'Treatment', 'Risk Factor', 'Prevention', 'Complication', 'Epidemiology', 'Research', 'Diagnosis', 'Pathophysiology', 'Genetics', 'Prognosis', 'Prevalence', 'Age Group']
Relationships: ['HAS_SYMPTOM', 'HAS_TREATMENT', 'HAS_RISK_FACTOR', 'HAS_PREVENTION', 'CAN_LEAD_TO', 'HAS_EPIDEMIOLOGY', 'HAS_RESEARCH', 'DIAGNOSED_BY', 'AFFECTS', 'INHERITS', 'PROGNOSIS_FOR', 'COMORBID_WITH', 'LEADS_TO', 'EPIDEMIC_IN']


In [None]:
import pickle
graph_documents = llm_transformer.convert_to_graph_documents(md_header_splits)
with open('graph_documents.pkl', 'wb') as file:
    pickle.dump(graph_documents, file)

In [27]:
from langchain_community.graphs import Neo4jGraph
graph = Neo4jGraph()


In [30]:
import pickle
with open('graph_documents.pkl', 'rb') as file:
    graph_documents = pickle.load(file)
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [31]:
from langchain.vectorstores.neo4j_vector import Neo4jVector

vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)



In [32]:

from langchain_core.prompts import ChatPromptTemplate
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import CommaSeparatedListOutputParser, StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

In [57]:
template = """
Extract all entities related to persons, diseases, medical conditions, symptoms, and diagnoses from the given USER_QUERY. 
{format_instructions}

USER_QUERY:
{user_query}
"""

parser = CommaSeparatedListOutputParser()
prompt = PromptTemplate(
    input_variables=['history','user_query'],
    template = template,
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

In [60]:
input_text = "what are some Control methods in Asthma, Vibhanshu has High blood pressure also he lives in delhi"

structured_llm = prompt | llm | parser
structured_llm.invoke({'user_query': input_text})

['Asthma', 'High blood pressure', 'Delhi']

In [108]:
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars

def generate_full_text_query(input: str) -> str:
    """
    Generate a full-text search query for a given input string.

    This function constructs a query string suitable for a full-text
    search. It processes the input string by splitting it into words and 
    appending a similarity threshold (~2 changed characters) to each
    word, then combines them using the AND operator. Useful for mapping
    entities from user questions to database values, and allows for some 
    misspelings.
    """
    full_text_query = ""
    words = [el for el in remove_lucene_chars(input).split() if el]
    for word in words[:-1]:
        full_text_query += f" {word}~2 AND"
    full_text_query += f" {words[-1]}~2"
    return full_text_query.strip()

In [109]:
def structured_retriever(question: str) -> str:
    """
    Collects the neighborhood of entities mentioned
    in the question
    """
    result = ""
    entities = structured_llm.invoke({'user_query': question, 'history':""})
    print(entities)
    for entity in entities:
        response = graph.query(
            """CALL db.index.fulltext.queryNodes('entity', $query, 
            {limit:1})
            YIELD node,score
            CALL {
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS 
              output
              UNION
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' +  node.id AS 
              output
            }
            RETURN output LIMIT 6
            """,
            {"query": generate_full_text_query(entity)},
        )
        result += "\n".join([el['output'] for el in response])
    return result

In [110]:
easy_questions = [
    "What lifestyle factors can trigger asthma symptoms?",
    "Why is it important to avoid cigarette smoke for people with asthma?",
    "Name one recommended lifestyle modification for improving asthma control."
]

medium_questions = [
    "Compare the effectiveness of short-acting and long-acting medications for asthma management.",
    "Discuss the role of corticosteroids in the long-term control of asthma.",
    "Explain how avoiding allergens can help in managing asthma symptoms."
]

hard_questions = [
    "Evaluate the effectiveness of cognitive behavioral therapy in improving asthma control and quality of life.",
    "Discuss the controversies surrounding the use of LABA (Long-Acting Beta Agonists) in children's asthma treatment.",
    "Explain the potential benefits and risks associated with using macrolide antibiotics in treating severe, refractory asthma."
]

# Combine all questions into one list
all_questions = easy_questions + medium_questions + hard_questions

In [111]:
print(structured_retriever('I am having Wheezing treatment'))


['Wheezing', 'treatment']
Asthma - HAS_SYMPTOM -> Symptoms
Asthma - HAS_SYMPTOM -> Airflow Obstruction
Asthma - HAS_SYMPTOM -> Bronchospasms
Asthma - HAS_SYMPTOM -> Wheezing
Asthma - HAS_SYMPTOM -> Coughing
Asthma - HAS_SYMPTOM -> Chest Tightness


In [125]:
def retriever(question: str):
    print(f"Search query: {question}")
    structured_data = structured_retriever(question)
    unstructured_data = [el.page_content for el in vector_index.similarity_search(question)]
    final_data = f"""You are a helpful Medical assistant and you have to answer user queries from the given context in the form of Structured and Unstructured data.
    Structured data:
{structured_data}
Unstructured data:
{"#Document ". join(unstructured_data)}
    """
    return final_data

In [113]:
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from typing import Tuple, List, Optional
from langchain_core.messages import AIMessage, HumanMessage

In [114]:
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question,
in its original language.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""  # noqa: E501
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

_search_query = RunnableBranch(
    # If input includes chat_history, we condense it with the follow-up question
    (
        RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
            run_name="HasChatHistoryCheck"
        ),  # Condense follow-up question and chat into a standalone_question
        RunnablePassthrough.assign(
            chat_history=lambda x: _format_chat_history(x["chat_history"])
        )
        | CONDENSE_QUESTION_PROMPT
        | ChatOpenAI(temperature=0)
        | StrOutputParser(),
    ),
    # Else, we have no chat history, so just pass through the question
    RunnableLambda(lambda x : x["question"]),
)

In [115]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    RunnableParallel(
        {
            "context": _search_query | retriever,
            "question": RunnablePassthrough(),
        }
    )
    | prompt
    | llm
    | StrOutputParser()
)

In [122]:
CHAT_HISTORY =[]
def chat_bot(question, history):
    result = chain.invoke({"question": question})
    return result


In [126]:
import gradio as gr

def yes_man(message, history):
    if message.endswith("?"):
        return "Yes"
    else:
        return "Ask me anything!"

gr.ChatInterface(
    chat_bot,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Your personal medical assistant", container=False, scale=7),
    title="Mediassist 🧑‍⚕️🖨️🤖",
    theme="soft",
    cache_examples=True,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()

Running on local URL:  http://127.0.0.1:7864

To create a public link, set `share=True` in `launch()`.




Search query: hi
['No entities related to persons', 'diseases', 'medical conditions', 'symptoms', 'or diagnoses were mentioned in the USER_QUERY.']
Search query: how many people have asthma?
['asthma', 'people']
Search query: what are common symptoms of it?
['common symptoms']
Search query: who are you?
['No entities related to persons', 'diseases', 'medical conditions', 'symptoms', 'or diagnoses were mentioned in the given USER_QUERY.']
