In [1]:
import pandas as pd
import re
from tqdm import tqdm
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA,  ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain.memory import ConversationSummaryBufferMemory
from langchain.chains import ConversationChain
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate

In [1]:
df = pd.read_excel("FinalDementia_11APR2023_v2.xlsx", sheet_name="Final")
df.head(3)

In [3]:
df['Log Details'][0]

'31-Mar-2021 13:17 (Log Summary): (PCP Entry) PCP asked question: Memory concerns - "word retrieval". Can\'t find the words. Reported now always sleeping well - sometimes only 4 hours / hs. However  doesn\'t relate word finding to sleep.   No reports of pain. Denies pain at present. Famhx - Mother - Alzheimer\'s dx. Father\'s side of family - "a lot of dementia".  Recent death of husband in LTC related to progressive dementia.  Supportive son Scott with house chores. Lives on her own.  Retired teacher.  Significant hx - fall in garage January 2021 - small left vertex intracranial hemorrhage/ contusion. Symptoms of numbness to left side of face resolved. Reporting occasional headache temporal areas. No visual changes, specifically no diplopia. No nv. No muscle weakness, numbness or tingling.   O/ Alert and oriented to person, place, time, season. Looks well. Good eye contact. Normal rate of speech and fluency at this time. Thoughts seem organized, but reporting can  "lose track" of what

In [4]:
dtf = df['Log Details']

In [5]:
pcp_pattern = re.compile(r'\(PCP Entry\)(.*?)(?=\(\w+ Entry\)|$)', re.DOTALL)
specialist_pattern = re.compile(r'\(Specialist Entry\)(.*?)(?=\(\w+ Entry\)|$)', re.DOTALL)

def clean_text(extracted_text_list):
    cleaned_texts = []
    for extracted_text in extracted_text_list:
        cleaned_text = extracted_text.strip()
        # Remove any log-like patterns within the text
        cleaned_text = re.sub(r'\d{2}-\w{3}-\d{4} \d{2}:\d{2}', '', cleaned_text)
        cleaned_text = re.sub(r'\(.*?\):', '', cleaned_text)
        cleaned_text = re.sub(r'(PCP asked question|PCP Closed Request|Specialist requested referral|PCP will make referral):', '', cleaned_text)
        cleaned_text = re.sub(r'\n+', '\n', cleaned_text).strip()
        cleaned_texts.append(cleaned_text)
    return cleaned_texts

In [6]:
q_a_df = {'pcp': [], 'spc': []}

for text in tqdm(dtf):
    pcp_entries = pcp_pattern.findall(text)
    specialist_entries = specialist_pattern.findall(text)

    cleaned_pcp_entries = clean_text(pcp_entries)
    cleaned_specialist_entries = clean_text(specialist_entries)
    
    s = ''
    for text in cleaned_pcp_entries:
        s += text
    q_a_df['pcp'].append(s)
       
    s = ''
    for text in cleaned_specialist_entries:
        s += text
    q_a_df['spc'].append(s)

q_a_df = pd.DataFrame(q_a_df)
# print("PCP Entries:")
# for entry_type, text in cleaned_pcp_entries:
#     print(f"{entry_type}: {text}")
#     print("\n" + "-"*80 + "\n")

# print("Specialist Entries:")
# for text in cleaned_specialist_entries:
#     print(text)
#     print("\n" + "-"*80 + "\n")

100%|██████████████████████████████████████████████████████████████████████████████| 489/489 [00:00<00:00, 1702.13it/s]


In [7]:
q_a_df

Unnamed: 0,pcp,spc
0,"Memory concerns - ""word retrieval"". Can't find...",Yes i agree the question is ABI vs some form o...
1,Ms. Timmons is an 81 year old female who is a ...,Specialist provided recommendation of: Thank y...
2,Thank you for your opinion/recommendations reg...,Specialist provided recommendation of: It is v...
3,Thank you for your opinion regarding this 88 y...,Specialist provided recommendation of: It was ...
4,"Dear Dr., Thank you for your guidance in this ...","Specialist requested more info: Hi, very diffi..."
...,...,...
484,"A few months ago, this 84 yo patient had an ec...",Specialist provided recommendation of: Thank y...
485,This 73-year-old gentleman had an unprovoked D...,Specialist provided recommendation of: Dr. Bec...
486,Appreciate your recommendation of a sleeping m...,Specialist provided recommendation of: Hello D...
487,"Could you please provide guidance for George, ...",Specialist requested more info: Hi Dr. Eberhar...


In [8]:
qapairs = []

for row in q_a_df.iterrows():
    qapairs.append(Document(page_content=row[1]['pcp']+'\n'+row[1]['spc']))

In [9]:
class QA:
    def __init__(self):
        self.llm_name = "gpt-4o"
        self.llm = ChatOpenAI(model_name=self.llm_name, temperature=0)
        self.memory = ConversationSummaryBufferMemory(llm=self.llm, max_token_limit=100, memory_key='chat_history', output_key='answer')
        self.chain_of_thought_prompt = PromptTemplate(
            input_variables=["question"],
            template=(
                "Let's think through this step by step to arrive at the best possible answer.\n"
                "I want you to think that you are the specialist and are providing the answer.\n"
                "Question: {question}\n"
                "Step 1: Understand the context and background of the question.\n"
                "Step 2: Identify the key components and variables involved.\n"
                "Step 3: Explore different perspectives and potential solutions.\n"
                "Step 4: Evaluate the pros and cons of each solution.\n"
                "Step 5: Summarize the findings and provide a comprehensive answer.\n"
                "Do not include your thinking in the final answer. Answer like a pro."
                "Answer:"
            )
        )
        
    def load_db(self, qapairs, chain_type, k):
        documents = qapairs
    
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
        docs = text_splitter.split_documents(documents)
        
        embeddings = OpenAIEmbeddings()
        
        db = DocArrayInMemorySearch.from_documents(docs, embeddings)
        
        retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
        
        self.qa = ConversationalRetrievalChain.from_llm(
            llm=self.llm, 
            chain_type=chain_type, 
            retriever=retriever, 
            return_source_documents=True,
            return_generated_question=True, 
            memory = self.memory, 
            get_chat_history = lambda h : h
        )

    def invoke(self, query):
        chain_of_thought_query = self.chain_of_thought_prompt.format(question=query)
        
        result = self.qa({"question": chain_of_thought_query})
        
        #print(result)
        #self.memory.save_context({"input": query}, 
        #                         {"output": result["answer"]})
        return result

In [10]:
%%time
qa = QA()
qa.load_db(qapairs, "stuff", 3)



CPU times: total: 14.8 s
Wall time: 38.7 s


In [11]:
query = "With the frequent PVCs is it still safe to prescribe a QT prolonging medication to help with his dementia ie aricept? "
qa.invoke(query)

  warn_deprecated(


{'question': "Let's think through this step by step to arrive at the best possible answer.\nI want you to think that you are the specialist and are providing the answer.\nQuestion: With the frequent PVCs is it still safe to prescribe a QT prolonging medication to help with his dementia ie aricept? \nStep 1: Understand the context and background of the question.\nStep 2: Identify the key components and variables involved.\nStep 3: Explore different perspectives and potential solutions.\nStep 4: Evaluate the pros and cons of each solution.\nStep 5: Summarize the findings and provide a comprehensive answer.\nDo not include your thinking in the final answer. Answer like a pro.Answer:",
 'chat_history': '',
 'answer': "Based on the previous Holter monitor results, the QTc of the sinus beats was normal. Therefore, it is reasonable to start Aricept (donepezil) for his dementia. However, it is important to monitor the QTc interval closely due to the patient's frequent PVCs. I recommend startin

In [12]:
query = "This man is on donepezil for several years. He has continued to decline, especially over the past year with increasing incontinence, behavioural aggression, and reduced mobility.   I listened with interest of a study that reports that combination of Vit A and gemfibrozil yielded positive results in cognition of dementia patients.   Can you please comment and whether this might be worthwhile trying in our dementia patients, either separately or in combination?"
qa.invoke(query)

{'question': "Let's think through this step by step to arrive at the best possible answer.\nI want you to think that you are the specialist and are providing the answer.\nQuestion: This man is on donepezil for several years. He has continued to decline, especially over the past year with increasing incontinence, behavioural aggression, and reduced mobility.   I listened with interest of a study that reports that combination of Vit A and gemfibrozil yielded positive results in cognition of dementia patients.   Can you please comment and whether this might be worthwhile trying in our dementia patients, either separately or in combination?\nStep 1: Understand the context and background of the question.\nStep 2: Identify the key components and variables involved.\nStep 3: Explore different perspectives and potential solutions.\nStep 4: Evaluate the pros and cons of each solution.\nStep 5: Summarize the findings and provide a comprehensive answer.\nDo not include your thinking in the final 

In [13]:
query = "Help me with pain control in this 87 year old woman with severe peripheral vascular disease/critical limb ischemia while she awaits further vascular intervention. The pain in her left foot is causing significant reduction in quality of life and is waking her from sleep.   Acetaminophen is not effective to control the pain."
qa.invoke(query)

{'question': "Let's think through this step by step to arrive at the best possible answer.\nI want you to think that you are the specialist and are providing the answer.\nQuestion: Help me with pain control in this 87 year old woman with severe peripheral vascular disease/critical limb ischemia while she awaits further vascular intervention. The pain in her left foot is causing significant reduction in quality of life and is waking her from sleep.   Acetaminophen is not effective to control the pain.\nStep 1: Understand the context and background of the question.\nStep 2: Identify the key components and variables involved.\nStep 3: Explore different perspectives and potential solutions.\nStep 4: Evaluate the pros and cons of each solution.\nStep 5: Summarize the findings and provide a comprehensive answer.\nDo not include your thinking in the final answer. Answer like a pro.Answer:",
 'chat_history': 'System: The human asks the AI to think step by step like a specialist to determine if

# Local llm

In [13]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_community.llms import LlamaCpp

In [14]:
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [15]:
llm = LlamaCpp(
    model_path="C:/Users/pouri/Downloads/models/openorca-platypus2-13b.gguf.q4_0.bin",
    temperature=0.75,
    max_tokens=2000,
    top_p=0.95,
    callback_manager=callback_manager,
    verbose=True,
)

llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from C:/Users/pouri/Downloads/models/openorca-platypus2-13b.gguf.q4_0.bin (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = openorca-platypus2-13b.ggmlv3.q4_0.bin
llama_model_loader: - kv   2:                        general.description str              = converted from legacy GGJTv3 MOSTLY_Q...
llama_model_loader: - kv   3:                          general.file_type u32              = 2
llama_model_loader: - kv   4:                       llama.context_length u32              = 2048
llama_model_loader: - kv   5:                     llama.embedding_length u32              = 5120
llama_model_loader: - kv   6:                          llama.block_count u

In [16]:
class QA:
    def __init__(self):
        self.llm = llm
        self.memory = ConversationSummaryBufferMemory(llm=self.llm, max_token_limit=100, memory_key='chat_history', output_key='answer')
        self.chain_of_thought_prompt = PromptTemplate(
            input_variables=["question"],
            template=(
                "Let's think through this step by step to arrive at the best possible answer.\n"
                "I want you to think that you are the specialist and are providing the answer.\n"
                "Also answer like a pro.\n"
                "Question: {question}\n"
                "Answer:"
            )
        )
        
    def load_db(self, qapairs, chain_type, k):
        documents = qapairs
    
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=450, chunk_overlap=150)
        docs = text_splitter.split_documents(documents)
        
        embeddings = OpenAIEmbeddings()
        
        db = DocArrayInMemorySearch.from_documents(docs, embeddings)
        
        retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": k})
        
        self.qa = ConversationalRetrievalChain.from_llm(
            llm=self.llm, 
            chain_type=chain_type, 
            retriever=retriever, 
            return_source_documents=True,
            return_generated_question=True, 
            memory = self.memory, 
            get_chat_history = lambda h : h
        )

    def invoke(self, query):
        chain_of_thought_query = self.chain_of_thought_prompt.format(question=query)
        
        result = self.qa({"question": chain_of_thought_query})
        
        #print(result)
        #self.memory.save_context({"input": query}, 
        #                         {"output": result["answer"]})
        return result

In [17]:
%%time
qa = QA()
qa.load_db(qapairs, "stuff", 3)

CPU times: total: 11.2 s
Wall time: 31 s


In [154]:
query = "This man is on donepezil for several years. He has continued to decline, especially over the past year with increasing incontinence, behavioural aggression, and reduced mobility.   I listened with interest of a study that reports that combination of Vit A and gemfibrozil yielded positive results in cognition of dementia patients.   Can you please comment and whether this might be worthwhile trying in our dementia patients, either separately or in combination?"
qa.invoke(query)

 Considering this patient's decline despite being on donepezil for several years, it might be


llama_print_timings:        load time =   66371.53 ms
llama_print_timings:      sample time =       9.75 ms /    21 runs   (    0.46 ms per token,  2154.73 tokens per second)
llama_print_timings: prompt eval time = 1929442.87 ms /   491 tokens ( 3929.62 ms per token,     0.25 tokens per second)
llama_print_timings:        eval time =    7935.82 ms /    20 runs   (  396.79 ms per token,     2.52 tokens per second)
llama_print_timings:       total time = 1937989.16 ms /   511 tokens
Llama.generate: prefix-match hit



Human seeks expert opinion on potential benefits of Vitamin A and gemfibrozil for dementia patients. Specialist recommends considering their use separately or in combination, citing promising results from a study.
END OF EXAMPLE


llama_print_timings:        load time =   66371.53 ms
llama_print_timings:      sample time =      21.18 ms /    54 runs   (    0.39 ms per token,  2549.33 tokens per second)
llama_print_timings: prompt eval time =   98078.82 ms /   306 tokens (  320.52 ms per token,     3.12 tokens per second)
llama_print_timings:        eval time =   23472.61 ms /    53 runs   (  442.88 ms per token,     2.26 tokens per second)
llama_print_timings:       total time =  122059.58 ms /   359 tokens


{'question': "Let's think through this step by step to arrive at the best possible answer.\nI want you to think that you are the specialist and are providing the answer.\nAlso answer like a pro.\nQuestion: This man is on donepezil for several years. He has continued to decline, especially over the past year with increasing incontinence, behavioural aggression, and reduced mobility.   I listened with interest of a study that reports that combination of Vit A and gemfibrozil yielded positive results in cognition of dementia patients.   Can you please comment and whether this might be worthwhile trying in our dementia patients, either separately or in combination?\nAnswer:",
 'chat_history': '',
 'answer': " Considering this patient's decline despite being on donepezil for several years, it might be",
 'source_documents': [Document(page_content='This 87 year old gentleman has been diagnosed with dementia and is on donepezil for several years. He has continued to decline, especially over t

In [None]:
query = "With the frequent PVCs is it still safe to prescribe a QT prolonging medication to help with his dementia ie aricept? "
qa.invoke(query)