In [1]:
from google.colab import output
output.enable_custom_widget_manager()

In [2]:
import spacy
from transformers import pipeline
import ipywidgets as widgets
from IPython.display import display, clear_output
from datetime import datetime

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# AI summarizer
summarizer = pipeline("summarization", model="facebook/bart-base")



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cpu


In [3]:
keywords = ["pain", "hurt", "injury", "swelling", "bleeding",
            "knee", "arm", "back", "eye", "heart", "headache", "dizziness"]

def filter_symptom_sentences(text):
    sentences = text.split(".")
    filtered = [s for s in sentences if any(k in s.lower() for k in keywords)]
    return ". ".join(filtered)


In [14]:
def summarize_text(text):
    clean_text = text.encode('ascii', errors='ignore').decode()
    summary = summarizer(clean_text, max_new_tokens=40, do_sample=False)
    return summary[0]['summary_text']



In [13]:
def extract_entities(text):
    doc = nlp(text)
    entities = {"Date": [], "Doctor": [], "Hospital": [], "BodyPart": []}

    person_found = False
    for ent in doc.ents:
        if ent.label_ == "DATE" and "year-old" not in ent.text:
            entities["Date"].append(ent.text)
        elif ent.label_ == "PERSON":
            if not person_found:
                # skip first PERSON (client)
                person_found = True
            else:
                entities["Doctor"].append(ent.text)
        elif ent.label_ in ["ORG"]:
            entities["Hospital"].append(ent.text)

    # Try manual detection for hospital name if "hospital" keyword present
    if "hospital" in text.lower():
        if not entities["Hospital"]:
            # Simple regex: look for "<City> Hospital" pattern
            import re
            match = re.search(r'\b[A-Z][a-zA-Z]+\sHospital\b', text)
            if match:
                entities["Hospital"].append(match.group())
            else:
                entities["Hospital"].append("Unknown Hospital")

    # Detect body parts
    body_parts = ["knee", "heart", "eye", "nose", "back", "arm", "head"]
    for bp in body_parts:
        if bp in text.lower():
            entities["BodyPart"].append(bp)

    return entities



In [6]:
def intake_assistant(client_input):
    symptom_text = filter_symptom_sentences(client_input)
    summary = summarize_text(symptom_text)

    extracted_fields = extract_entities(client_input)

    case_data = {
        "Cause": "Not auto-detected yet",
        "Affected Area": extracted_fields.get("BodyPart"),
        "Duration": "Need manual entry",
        "Symptoms Summary": summary,
        "Doctor": extracted_fields.get("Doctor"),
        "Hospital": extracted_fields.get("Hospital"),
        "Date": extracted_fields.get("Date")
    }

    risk_flag = check_risk(case_data)

    return case_data, risk_flag


In [18]:
# Ask user to input/paste client report
client_text = input("Paste the client report here and press Enter:\n")

# Process the report
case_data, risk_flag = intake_assistant(client_text)

# Show results
print("Extracted Case Data:")
for key, value in case_data.items():
    print(f"{key}: {value}")

print("\nRisk Flag:", risk_flag)



Paste the client report here and press Enter:
My name is Aruna and I am a 62-year-old woman from Delhi.  On 12 August 2025, I slipped in the bathroom and hurt my right knee.  Since then, I have been feeling pain and swelling.  I tried using a knee brace and painkillers, but it still hurts when I walk.  I saw Dr. Rao at City Hospital in July.  I do not remember the exact date I started feeling pain. 


Your max_length is set to 128, but your input_length is only 65. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=32)


Extracted Case Data:
Cause: Not auto-detected yet
Affected Area: ['knee']
Duration: Need manual entry
Symptoms Summary:   On 12 August 2025, I slipped in the bathroom and hurt my right knee.   Since then, I have been feeling pain and swelling. Â I tried using a knee brace
Doctor: ['Rao']
Hospital: ['City Hospital']
Date: ['12 August 2025', 'July']

Risk Flag: ✅ No immediate risk
