<a href="https://colab.research.google.com/github/ravidu-hevaganinge/AI_Scribe/blob/MedGPT/BART%2Bmedspacy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers datasets medspacy
!pip install https://huggingface.co/kormilitzin/en_core_med7_trf/resolve/main/en_core_med7_trf-any-py3-none-any.whl

import spacy
import medspacy
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Load the fine-tuned BART model and tokenizer
model_name = "lidiya/bart-large-xsum-samsum"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Load medspaCy with a specific model for medical NER
nlp_med = medspacy.load("en_core_med7_trf")
nlp_med.add_pipe("medspacy_target_matcher")

# Function to generate a summary using BART
def generate_summary(text):
    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs, max_length=200, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary_text

# Function to extract medical entities using medspaCy
def extract_medical_entities(text):
    doc = nlp_med(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

# Sample patient-clinician interaction transcript
transcript_text = """[doctor] hi , andrew . how are you ? [patient] hey , good to see you . [doctor] i'm doing well , i'm doing well . [patient] good . [doctor] so , i know the nurse told you about dax . i'd like to tell dax a little bit about you . [patient] sure . [doctor] uh , so , andrew is a 59-year-old male with a past medical history , significant for depression , type two diabetes , and hypertension who presents today with an upper respiratory infection . so , andrew , what's going on ? [patient] yeah . we were doing a bit of work out in the yard in the last week or so and i started to feel really tired , was short of breath . um , we- we're not wearing masks as much at the end of the summer and i think i caught my first cold and i think it just got worse . [doctor] okay . all right . um , now , have you had your covid vaccines ? [patient] yeah , both . [doctor] okay . all right . and , um , do you have any history of any seasonal allergies at all ? [patient] none whatsoever . [doctor] okay . all right . and when you say you're having some shortness of breath , did you feel short of breath walking around or at rest ? [patient] uh , usually , it was lifting or carrying something . we were doing some landscaping , so i was carrying some heavy bags of soil and i , i got really winded . it really surprised me . [doctor] okay . and are you coughing up anything ? [patient] not yet , but i feel like that's next . [doctor] okay . and fevers ? [patient] uh , i felt a little warm , but i , i just thought it was because i was exerting myself . [doctor] okay . all right . and any other symptoms like muscle aches , joint pain , fatigue ? [patient] my elbows hurt quite a bit and my knees were pretty tired . l- like i said , i really felt some tension around my knees , but , uh , i think that was a lot to do with , uh , lifting the bags . [doctor] okay . all right . um , so , you know , how about , how are you doing in terms of your other medical problems , like your depression ? how are you doing with that ? i know we've , you know , talked about not putting you on medication for it because you're on medication for other things . what's going on ? [patient] i- it's been kind of a crazy year and a half . i was a little concerned about that but , for the most part , i've been , been doing well with it . my , my wife got me into barre classes , to help me relax and i think it's working . [doctor] okay . all right , great . and , and in terms of your diabetes , how are you doing watching your , your diet and your sugar intake ? [patient] uh , i've been monitoring my sugar levels while i am going to work during the week . uh , not so , uh , if its saturday or sunday i usually don't remember . uh , the diet's been pretty good for the most part , except for , you know , some house parties and things like that . but , uh , been good for the most part . [doctor] okay and have they been elevated at all since this episode of your- [patient] no . [doctor] okay . and then , how , lastly , for your high blood pressure , have you been monitoring your blood pressures at home ? did you buy the cuff like i suggested ? [patient] uh , same thing . during the while i'm going to work, i'm regular about monitoring it, but if its a saturday or sunday, not so much . but , uh , it's , it's been under control . [doctor] but you're taking your medication ? [patient] yes . [doctor] okay . all right . well , you know , i know that , you know , you've endorsed , you know , the shortness of breath and some joint pain . um , how about any other symptoms ? nausea or vomiting ? diarrhea ? [patient] no . [doctor] anything like that ? [patient] no . [doctor] okay . all right . well , i wan na go ahead and do a quick physical exam , all right ? hey , dragon , show me the vital signs . so , your vital signs here in the office look quite good . [patient] mm-hmm . [doctor] you know , everything's looking normal , you do n't have a fever , which is really good . um , i'm just gon na go ahead and listen to your heart and your lungs and , kind of , i'll let you know what i hear , okay ? [patient] sure . [doctor] okay . so , on your physical exam , you know , your heart sounds nice and strong . your lungs , you do have scattered ronchi bilaterally on your lung exam . uh , it clears with cough . um , i do notice a little bit of , um , some edema of your lower extremities and you do have some pain to palpation of your elbows bilaterally . um , so , let's go ahead , i want to look at some of your results , okay ? [patient] mm-hmm . [doctor] hey , dragon . show me the chest x-ray . [doctor] so , i reviewed the results of your chest x-ray and everything looks good . there's no airspace disease , there's no pneumonia , so that's all very , very good , okay ? [patient] good . [doctor] hey , dragon . show me the diabetic labs . [doctor] and here , looking at your diabetic labs , you know , your hemoglobin a1c is a little elevated at eight . [patient] mm-hmm . [doctor] i'd like to see that a little bit better , around six or seven , if possible . [patient] mm-hmm . [doctor] um , so let's talk a little bit about my assessment and my plan for you . [patient] mm-hmm . [doctor] so , for your first problem , this upper respiratory infection , i believe you , you have a viral syndrome , okay ? we'll go ahead and we'll send a covid test , just to make sure that you do n't have covid . [patient] mm-hmm . [doctor] uh , but overall , i think that , um , you know , this will resolve in a couple of days . i do n't think you have covid , you do n't have any exposures , that type of thing . [patient] mm-hmm . [doctor] so , i think that this will improve . i'll give you some robitussin for your cough and i would encourage you take some ibuprofen , tylenol for any fever , okay ? [patient] you got it . [doctor] for your next problem , your depression , you know , it sounds like you're doing well with that , but again , i'm happy to start on a med- , a medical regiment or ... [patient] mm-hmm . [doctor] . refer you to psychotherapy , if you think that that would be helpful . [patient] mm-hmm . [doctor] would you like that ? [patient] u- u- um , maybe not necessarily . maybe in a , uh , few months we'll check on that . [doctor] okay . all right . [doctor] for your third problem , your type two diabetes , i want to go ahead and increase your metformin to 1000 milligrams , twice daily . [patient] mm-hmm . [doctor] and i'm gon na get an- another hemoglobin a1c in four months , okay ? [patient] okay , sure . [doctor] hey , dragon . order a hemoglobin a1c . [doctor] and lastly , for your high blood pressure , it looks like you're doing a really good job managing that . i want to go ahead and continue you on the , um , lisinopril , 20 milligrams a day . [patient] mm-hmm . [doctor] and i'm gon na go ahead and order a lipid panel , okay ? [patient] sure . [doctor] do you need a refill of the lisinopril ? [patient] actually , i do . [doctor] okay . hey , dragon . order lisinopril , 20 milligrams daily . [doctor] so , the nurse will be in , she'll help you , uh , make a follow-up appointment with me . i want to see you again in about four months . [patient] okay . [doctor] let me know if your symptoms worsen and we can talk more about it , okay ? [patient] you got it . [doctor] all right . hey , dragon . finalize the note ."""

# Generate a preliminary summary
summary_text = generate_summary(transcript_text)

# Extract medical entities from both the original transcript and the summary
entities_transcript = extract_medical_entities(transcript_text)
entities_summary = extract_medical_entities(summary_text)

# Combine and organize the information for HPI generation
# This step would involve custom logic to integrate the summary context with the extracted entities
def generate_hpi(summary_text, extracted_entities):
    """
    Generate a structured HPI section using the summary from BART and entities from medspaCy.

    Args:
        summary_text (str): Summary of the patient-clinician interaction generated by BART.
        extracted_entities (list): List of tuples containing extracted entities and their labels.

    Returns:
        str: Structured HPI section.
    """
    hpi_template = {
        "Onset": "",
        "Location": "",
        "Duration": "",
        "Character": "",
        "Aggravating factors": "",
        "Relieving factors": "",
        "Timing": "",
        "Severity": "",
    }

    # Iterate through extracted entities to fill the HPI template
    for entity, label in extracted_entities:
        if label.upper() in hpi_template:
            # For simplicity, we concatenate entities of the same category; consider more complex logic as needed
            hpi_template[label.upper()] += entity + "; "

    # Generate the HPI section text
    hpi_section = "HPI: \n"
    for key, value in hpi_template.items():
        if value:  # Only add the category if there's information
            hpi_section += f"{key}: {value}\n"

    # Append the BART summary for any additional context not covered by the extracted entities
    hpi_section += "\nAdditional context from summary: \n" + summary_text

    return hpi_section

# # Example usage:
# summary_text = "Patient reports experiencing sharp pain in the lower abdomen for the past week, rating the pain as 7 out of 10 in severity. Pain worsens when sitting for long periods or lifting heavy objects."
# extracted_entities = [
#     ("sharp pain", "Character"),
#     ("lower abdomen", "Location"),
#     ("past week", "Duration"),
#     ("7 out of 10", "Severity"),
#     ("sitting for long periods", "Aggravating factors"),
#     ("lifting heavy objects", "Aggravating factors"),
#     # Add more extracted entities as needed
# ]

hpi_section = generate_hpi(summary_text, extracted_entities)
print(hpi_section)


# For demonstration, printing extracted entities
print("Extracted Entities from Transcript:", entities_transcript)
print("Extracted Entities from Summary:", entities_summary)

# Note: Implement custom logic to merge these extracted details with the summary for a comprehensive HPI


In [1]:
def generate_hpi(summary_text, extracted_entities):
    """
    Generate a structured HPI section using the summary from BART and entities from medspaCy.

    Args:
        summary_text (str): Summary of the patient-clinician interaction generated by BART.
        extracted_entities (list): List of tuples containing extracted entities and their labels.

    Returns:
        str: Structured HPI section.
    """
    hpi_template = {
        "Onset": "",
        "Location": "",
        "Duration": "",
        "Character": "",
        "Aggravating factors": "",
        "Relieving factors": "",
        "Timing": "",
        "Severity": "",
    }

    # Iterate through extracted entities to fill the HPI template
    for entity, label in extracted_entities:
        if label.upper() in hpi_template:
            # For simplicity, we concatenate entities of the same category; consider more complex logic as needed
            hpi_template[label.upper()] += entity + "; "

    # Generate the HPI section text
    hpi_section = "HPI: \n"
    for key, value in hpi_template.items():
        if value:  # Only add the category if there's information
            hpi_section += f"{key}: {value}\n"

    # Append the BART summary for any additional context not covered by the extracted entities
    hpi_section += "\nAdditional context from summary: \n" + summary_text

    return hpi_section

# Example usage:
summary_text = "Patient reports experiencing sharp pain in the lower abdomen for the past week, rating the pain as 7 out of 10 in severity. Pain worsens when sitting for long periods or lifting heavy objects."
extracted_entities = [
    ("sharp pain", "Character"),
    ("lower abdomen", "Location"),
    ("past week", "Duration"),
    ("7 out of 10", "Severity"),
    ("sitting for long periods", "Aggravating factors"),
    ("lifting heavy objects", "Aggravating factors"),
    # Add more extracted entities as needed
]

hpi_section = generate_hpi(summary_text, extracted_entities)
print(hpi_section)


HPI: 

Additional context from summary: 
Patient reports experiencing sharp pain in the lower abdomen for the past week, rating the pain as 7 out of 10 in severity. Pain worsens when sitting for long periods or lifting heavy objects.


In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

# Load Bio-Clinical BERT: https://huggingface.co/emilyalsentzer/Bio_ClinicalBERT
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
model = AutoModelForTokenClassification.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

# Function to enhance summary with NER using Bio-Clinical BERT
def enhance_summary_with_ner(bart_summary):
    # Use BERT NER to identify entities in the BART summary
    ner_results = ner_pipeline(bart_summary)
    # Process NER results to enhance the summary
    enhanced_summary = bart_summary
    for entity in ner_results:
        entity_text = entity['word']
        entity_type = entity['entity_group']
        # Example of simple enhancement: appending entity type to each entity mention
        # For a more sophisticated approach, you could adjust the summary structure based on entity types
        enhanced_summary = enhanced_summary.replace(entity_text, f"{entity_text} ({entity_type})")
    return enhanced_summary

# Load medspaCy annotations from CSV
def load_medspacy_annotations(csv_file_path):
    df = pd.read_csv(csv_file_path)
    # Assuming the CSV has columns 'text' and 'label'
    annotations = df.to_dict('records')
    return annotations

# Example usage
csv_file_path = "path_to_your_medspacy_annotations.csv"
bart_summary = "Your summary from the fine-tuned BART model goes here."

# Load medspaCy annotations (optional, depending on how you want to use them)
medspacy_annotations = load_medspacy_annotations(csv_file_path)

# Enhance the BART summary with NER from Bio-Clinical BERT
enhanced_summary = enhance_summary_with_ner(bart_summary)

print("Enhanced Summary:\n", enhanced_summary)
