In [1]:
from glob import glob
import re

import ipywidgets as widgets
from IPython.display import display, HTML, Javascript, clear_output

from langchain.document_loaders import DirectoryLoader
from langchain.schema import Document
from langchain.vectorstores import Qdrant
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import LlamaForCausalLM, LlamaTokenizer, GenerationConfig
from peft import PeftModel


Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 7.5
CUDA SETUP: Detected CUDA version 110
CUDA SETUP: Loading binary /opt/conda/envs/py310/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda110.so...


  warn(msg)


In [5]:
print("Loading transformer embeddings...")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

print("Loading Vicuna tokenizer")
tokenizer = LlamaTokenizer.from_pretrained("eachadea/vicuna-13b-1.1")

print("Loading Vicuna model")
model = LlamaForCausalLM.from_pretrained("eachadea/vicuna-13b-1.1", load_in_8bit=True, device_map="auto")
model = PeftModel.from_pretrained(model, "kmnis/DocScribe")

# clear_output()

In [2]:
def run_all_cells_below(ev):
    display(
        Javascript(
            'IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.ncells())'
        )
    )

In [3]:
def custom_splitter(documents):
    docs = []
    for d in documents:
        doc = []
        text = d.page_content
        matches = re.findall("[,]*[A-Z ]*[,]+[A-Z ]+:[ ]*[,]*[ ]*", text)
        matches.reverse()
        for i, m in enumerate(matches):
            t = text.rsplit(m, 1)
            assert len(t) == 2
            m = re.sub("[^a-zA-Z ]", "", m).strip()
            doc.append(Document(page_content=m + ": " + t[1], metadata=d.metadata))
            text = t[0]

            if i == len(matches) - 1:
                doc.append(Document(page_content=t[0].strip(), metadata=d.metadata))
        doc.reverse()
        docs += doc
    return docs

In [5]:
user_query = widgets.Textarea(
    value='Describe the medical history of the patient',
    placeholder='Ask a question',
    options=patient_names,
    description='',
    layout=widgets.Layout(width='50%', height='200px'),
    rows=5,
)

analyze_btn = widgets.Button(
    description='Analyze Transcript',
    disabled=False,
    button_style='info',
    icon='submit'
)

upload_btn = widgets.Button(
    description='Upload Transcript',
    disabled=True,
    button_style='info',
    icon='submit'
)

h1 = widgets.HTML("<h2><center>Patient Medical Transcript Analyzer</center></h2>")
h2 = widgets.HTML("<i><center>Upload a pdf transcript to analyze.</center></i>")

analyze_btn.on_click(run_all_cells_below)

h3 = widgets.HTML("<b>Please enter the patient's name and your query</b>")
display(widgets.VBox([h1, h2, h3, user_query, widgets.HBox([upload_btn, analyze_btn])]))

VBox(children=(HTML(value='<h2><center>Patient Medical Transcript Analyzer</center></h2>'), HTML(value="<i><ce…

In [9]:
if user_name.value and user_name.value.capitalize() not in patient_names:
    clear_output()
    display(HTML(f"<b><p style='color: red'>No records found for the patient {user_name.value.vapitalize()}</p></b>"))
elif (not user_name.value) and (not user_query.value):
    clear_output()
    display(HTML(f"<b><p style='color: red'>Please select a patient and enter a query</p></b>"))
elif not user_query.value:
    clear_output()
    display(HTML(f"<b><p style='color: red'>Please enter a query</p></b>"))
elif user_name.value:
    loader = DirectoryLoader('transcript-docs/', glob=f"{user_name.value.capitalize()}/*.txt")
    documents = loader.load()
    docs = custom_splitter(documents)
    
    qdrant = Qdrant.from_documents(
        docs, embeddings, 
        location=":memory:",  # Local mode with in-memory storage only
        collection_name=user_name.value,
    )

    search_results = qdrant.similarity_search_with_score(user_query.value, k=3)
    relevant_prompts = " ".join([r[0].page_content for r in search_results])
    relevant_docs = [r[0].metadata["source"] for r in search_results]

    prompt = f"""### Instruction:
Below is an abstract from the medical transcription of a patient. Write a response to answer the question. Answer only from the given abstract. If you can't find the answer in the abstract, say answer not found.

### Abstract:
{relevant_prompts}

### Question:
{user_query.value}

### Response:
"""

    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()

    generation_config = GenerationConfig(temperature=0.6, top_p=0.95, repetition_penalty=1.15)

    generation_output = model.generate(input_ids=input_ids, generation_config=generation_config,
                                       return_dict_in_generate=True, output_scores=False, max_new_tokens=100)

    for s in generation_output.sequences:
        print(tokenizer.decode(s))
        print("---------------------------------------------------------\n")

<s>### Instruction:
Below is an abstract from the medical transcription of a patient. Write a response to answer the question. Answer only from the given abstract. If you can't find the answer in the abstract, say answer not found.

### Abstract:
PAST MEDICAL HISTORY: He has difficulty climbing stairs, difficulty with airline seats, tying shoes, used to public seating, difficulty walking, high cholesterol, and high blood pressure.  He has asthma and difficulty walking two blocks or going eight to ten steps.  He has sleep apnea and snoring.  He is a diabetic, on medication.  He has joint pain, knee pain, back pain, foot and ankle pain, leg and foot swelling.  He has hemorrhoids. CULTURES TAKEN: None.,PATIENT'S CONDITION: , Stable. REVIEW OF SYSTEMS: Negative for head, neck, heart, lungs, GI, GU, orthopedic, or skin.  He also is positive for gout.  He denies chest pain, heart attack, coronary artery disease, congestive heart failure, arrhythmia, atrial fibrillation, pacemaker, pulmonary 

### Transcript Analyzer

In [12]:
transcript = open(f"transcript-docs/Shannon/Visit-1.txt").read()

In [7]:
sections = ["Patient Demographic or Personal Information", "Chief Complaint", "History of Present Illness", "Medical History", "Review of Systems (ROS)",
            "Physical Examination", "Diagnostic Tests", "Assessment and Plan", "Discharge Summary", "Medication Reconciliation"]
for s in sections:
    prompt = f"""### Instruction:
Below is a transcribed medical report of a patient. Write a response to answer the question. Answer only from the given abstract.

### Report:
{transcript}

### Question:
Find the part of this transcript that talks about {s} and summarize that part in two or less lines. If it's not mentioned anywhere, say Not Found.

### Response:
"""
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()

    generation_config = GenerationConfig(temperature=0.6, top_p=0.95, repetition_penalty=1.15)

    generation_output = model.generate(input_ids=input_ids, generation_config=generation_config,
                                       return_dict_in_generate=True, output_scores=False, max_new_tokens=100)

    for out in generation_output.sequences:
        out = tokenizer.decode(out)
        out = out.split("### Response:")[1].split("</s>")[0].strip()
        if "not found" not in out.lower() and "not mention" not in out.lower() and "not include" not in out.lower():
            display(HTML(f"<b>{s}:</b>"))
            print(out + "\n")

Patient Age: 42
Patient Height: 5'9"
Patient Current Weight: 344 pounds



The chief complaint is not explicitly stated in the provided transcript.



The patient, ABC, is seeking consultation for laparoscopic gastric bypass due to being overweight for ten years and experiencing difficulties such as getting tired easily, not exercising, and having trouble losing weight.



The patient has a history of difficulty climbing stairs, difficulty with airline seats, tying shoes, using to public seating, difficulty walking, high cholesterol, high blood pressure, asthma, difficulty walking two blocks or going eight to ten steps, sleep apnea, snoring, joint pain, knee pain, back pain, foot and ankle pain, leg and foot swelling, hemorrhoids, and obesity.



The patient's physical examination revealed positive wheezing bilaterally, a regular heart rhythm and rate, and soft and supple neck. The abdomen was soft, and there was 1+ pitting edema.



Assessment and Plan: The patient has been recommended for laparoscopic gastric bypass surgery after undergoing a consultation. A detailed explanation of the risks and potential complications associated with the procedure was provided to the patient, and they expressed their willingness to proceed with the workup and evaluation process. They will require several tests and evaluations, including an upper endoscopy, a sleep study, and H. pylori testing. Once all necessary tests



### Treatment Plan

In [31]:
def prompt_model(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda()

    generation_config = GenerationConfig(temperature=0.6, top_p=0.95, repetition_penalty=1.15)

    generation_output = model.generate(input_ids=input_ids, generation_config=generation_config,
                                       return_dict_in_generate=True, output_scores=False, max_new_tokens=500)

    for out in generation_output.sequences:
        out = tokenizer.decode(out)
        out = out.split("### Response:")[1].split("</s>")[0].strip()
        if "not found" not in out.lower() and "not mention" not in out.lower() and "not include" not in out.lower():
            return out
    return None

In [23]:
prompt = f"""### Instruction:
Below is a transcribed medical report of a patient. Answer the below question truthfully and only from the given abstract.

### Report:
\"\"\"{transcript}\"\"\"

### Question:
Extract all the medical symptoms from this report. If it's not mentioned anywhere, say Not Found.

### Response:
"""
symp = prompt_model(prompt)

prompt = f"""### Instruction:
Below is a transcribed medical report of a patient. Answer the below question truthfully and only from the given abstract.

### Report:
\"\"\"{transcript}\"\"\"

### Question:
Extract the medical history of the patient from this report. If it's not mentioned anywhere, say Not Found.

### Response:
"""
hist = prompt_model(prompt)

prompt = f"""### Instruction:
Below are symptoms and medical history for a patient. Answer the below question truthfully and only from the given abstract.

### Symptoms:
\"\"\"{symp}\"\"\"

### Medical History:
\"\"\"{hist}\"\"\"

### Question:
Create a diagnosis or treatment plan for the patient based on their medical history and symptoms.

### Response:
"""
treat = prompt_model(prompt)

display(HTML("<b>----- Extracted Symptoms -----</b>"))
print(symp, "\n")

display(HTML("<b>----- Medical History -----</b>"))
print(hist, "\n")

display(HTML("<b>----- Diagnosis or Treatment Plan -----</b>"))
print(treat, "\n")

HISTORY OF PRESENT ILLNESS:

* Overweight for ten years (since age 33)
* Highest weight: 358 pounds
* Lowest weight: 260 pounds
* BMI: 51
* Difficulty climbing stairs, difficulty with airline seats, tying shoes, etc.
* Joint pain, knee pain, back pain, foot and ankle pain, leg and foot swelling, hemorrhoids
* High cholesterol, high blood pressure, asthma, difficulty breathing, sleep apnea, snoring
* Diabetic, on medication
* Current smoker (stopped two weeks ago), occasional alcohol consumption (10-12 drinks per week)

PAST MEDICAL HISTORY:

* Orthopedic surgery

SOCIAL H 



The patient's medical history includes difficulty climbing stairs, difficulty with airline seats, tying shoes, using to public seating, difficulty walking, high cholesterol, high blood pressure, asthma, difficulty walking two blocks or going eight to ten steps, having sleep apnea and snoring, being a diabetic, having joint pain, knee pain, back pain, foot and ankle pain, leg and foot swelling, hemorrhoids, obesity, heart disease, and diabetes in family history. 



Based on the provided information, it appears that the patient is experiencing multiple health issues related to their obesity, including joint pain, difficulty climbing stairs and walking, sleep apnea, high blood pressure, high cholesterol, and diabetes. A comprehensive treatment plan would likely involve lifestyle changes such as dietary modifications and increased physical activity, as well as management of any underlying conditions through medication and other treatments as appropriate. Additionally, addressing the patient's smoking habit and occasional alcohol consumption may also be important in improving their overall health and reducing the risk of further complications. 



### Adverse Reactions

In [24]:
prompt = f"""### Instruction:
Below is a transcribed medical report of a patient. Answer the below question truthfully and only from the given abstract.

### Report:
\"\"\"{transcript}\"\"\"

### Question:
Extract the patient's current medication from this report. If it's not mentioned anywhere, say Not Found.

### Response:
"""
med = prompt_model(prompt)

prompt = f"""### Instruction:
Below are current medication and medical history for a patient. Answer the below question truthfully and only from the given abstract.

### Current Medication:
\"\"\"{med}\"\"\"

### Medical History:
\"\"\"{hist}\"\"\"

### Question:
Identify potential drug interactions or adverse reactions based on their medical history and current medications.

### Response:
"""
react = prompt_model(prompt)

display(HTML("<b>----- Current Medications -----</b>"))
print(med, "\n")

display(HTML("<b>----- Medical History -----</b>"))
print(hist, "\n")

display(HTML("<b>----- Possible Adverse Reactions -----</b>"))
print(react, "\n")

Current Medications: Diovan (name unknown), Crestor, and Tricor 



The patient's medical history includes difficulty climbing stairs, difficulty with airline seats, tying shoes, using to public seating, difficulty walking, high cholesterol, high blood pressure, asthma, difficulty walking two blocks or going eight to ten steps, having sleep apnea and snoring, being a diabetic, having joint pain, knee pain, back pain, foot and ankle pain, leg and foot swelling, hemorrhoids, obesity, heart disease, and diabetes in family history. 



Based on the provided medical history and current medications, there are several potential drug interactions or adverse reactions that should be considered. These include:

1. Drug Interaction between Diovan and Crestor - Both of these medications are used to treat hypertension and may interact if taken together. It is important to monitor blood pressure levels closely if both drugs are prescribed.
2. Adverse Reaction to Diovan - The patient's difficulty climbing stairs and difficulty with airline seats could be related to side effects of the medication Diovan. It is important to discuss this with the patient's healthcare provider.
3. Adverse Reaction to Crestor - The patient's difficulty with airline seats could also be related to side effects of Crestor. It is important to discuss this with the patient's healthcare provider.
4. Potential Adverse Reaction to Tricor - The 



### Risk of Developing Possible Future Medical Condition

In [30]:
prompt = f"""### Instruction:
Below is a transcribed medical report of a patient. Answer the below question truthfully and only from the given abstract.

### Report:
\"\"\"{transcript}\"\"\"

### Question:
Extract the patient's demographic information such as name, age, ethnicity, weight, etc from this report. If it's not mentioned anywhere, say Not Found.

### Response:
"""
demo = prompt_model(prompt)

prompt = f"""### Instruction:
Below are the demographic information and medical history for a patient. Answer the below question truthfully and only from the given abstract.

### Current Medication:
\"\"\"{demo}\"\"\"

### Medical History:
\"\"\"{hist}\"\"\"

### Question:
Imagine you're a doctor. Predict the likelihood of the patient developing a particular medical condition based on their medical history and demographic information.

### Response:
"""
cond = prompt_model(prompt)

display(HTML("<b>----- Demographic Information -----</b>"))
print(demo, "\n")

display(HTML("<b>----- Medical History -----</b>"))
print(hist, "\n")

display(HTML("<b>----- Possible Future Medical Conditions -----</b>"))
print(cond, "\n")

Name: Shannon
Age: 42
BMI: 51 (344 pounds)
Height: 5'9" 



The patient's medical history includes difficulty climbing stairs, difficulty with airline seats, tying shoes, using to public seating, difficulty walking, high cholesterol, high blood pressure, asthma, difficulty walking two blocks or going eight to ten steps, having sleep apnea and snoring, being a diabetic, having joint pain, knee pain, back pain, foot and ankle pain, leg and foot swelling, hemorrhoids, obesity, heart disease, and diabetes in family history. 



As a doctor, I would say that this patient is at a very high risk of developing several medical conditions due to their medical history and demographic information. The patient has a BMI of 51 which puts them in the morbidly obese category, increasing their risk of developing conditions such as heart disease, stroke, type 2 diabetes, and certain types of cancer. Additionally, the patient's difficulty with physical activities such as climbing stairs, walking, and standing for long periods increases their risk of developing musculoskeletal disorders such as osteoarthritis. Furthermore, the patient's history of high cholesterol, high blood pressure, and sleep apnea also increase their risk of developing cardiovascular diseases and other chronic conditions. It is important that the patient works closely with their healthcare provider to manage these risks through lifestyle modifications and appropriate medications. 



### Issues in the report and ways to improve it

In [34]:
prompt = f"""### Instruction:
Below is a transcribed medical report of a patient. Answer the below question truthfully and only from the given abstract.

### Report:
\"\"\"{transcript}\"\"\"

### Question:
Analyze this report and identify any errors, inconsistencies, or other issues. Also, suggest ways to improve them

### Response:
"""
err = prompt_model(prompt)
display(HTML("<b>----- Possible Errors and suggestions -----</b>"))
print(err)

There are several errors, inconsistencies, and issues in this report that can affect the accuracy and quality of care provided to the patient. Some of the identified issues are:

1. Missing information: The report is missing some basic information such as the date of consultation, follow-up plans, and specific treatment recommendations. These details are essential for effective communication between healthcare providers and patients.
2. Incomplete medical history: Although the report provides a detailed medical history, there are some sections that seem incomplete. For example, the social history section only mentions the patient’s current marital status and smoking habits but does not cover other important factors such as occupation, leisure activities, and family support.
3. Conflicting information: There are some conflicting statements in the report. For instance, the patient claims that he has difficulty climbing stairs but later states that he can walk up to eight blocks without r

### Automatic Medical Coding

In [37]:
prompt = f"""### Instruction:
Below is a transcribed medical report of a patient. Answer the below question truthfully and only from the given abstract.

### Report:
\"\"\"{transcript}\"\"\"

### Question:
Analyze this report and create ICD10 codes for it.

### Response:
"""
code = prompt_model(prompt)
display(HTML("<b>----- Medical Coding -----</b>"))
print(code)

The given report describes a patient seeking consultation for laparoscopic gastric bypass surgery. The patient is obese with a BMI of 51, has a history of unsuccessful weight loss attempts, and suffers from various medical conditions such as high cholesterol, high blood pressure, diabetes, joint pain, and sleep apnea. The patient reports a history of smoking and alcohol consumption, although he has recently quit smoking and reduced his alcohol intake.

Based on the information provided in the report, the following ICD-10 codes could be assigned:

* F33.2 (Overweight)
* F33.9 (Obese)
* N16.8 (Unspecified asthenia)
* T81.9 (Other specified symptoms, not elsewhere classified)
* R65.10 (Polycythemia)
* D35.2 (High blood pressure, uncomplicated)
* D40.2 (Sleep apnea syndrome)
* K65.0 (Chronic bronchitis)
* J45.9 (Chronic obstructive pulmonary disease, unspecified)
* D35.6 (Hypertensive heart disease, left ventricular dysfunction)
* D45.9 (Coronary atherosclerosis of native coronary artery w

<b>--- TODO ---</b>

In [38]:
prompt = f"""### Instruction:
Below is a transcribed medical report of a patient. Answer the below question truthfully and only from the given abstract.

### Report:
\"\"\"{transcript}\"\"\"

### Question:
Generate an invoice and claims based on the data available in this report.

### Response:
"""
inv = prompt_model(prompt)
display(HTML("<b>----- Invoices and Claims generation -----</b>"))
print(inv)

I am sorry, but generating an invoice and claim requires specific information about services provided and costs associated with them, which is not available in this transcript. This report provides general information about the patient's medical history, current condition, and planned treatment options, but it does not contain enough details to generate an invoice or claim.
