importing libraries

In [1]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
#import gradio as gr

Mount Google Drive & Load Dataset

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Preprocessing and Text Segmentation

In [None]:
# Load and clean the text
with open("/content/drive/MyDrive/PEDIATRIC_PULMONOLOGY.txt", "r", encoding="utf-8") as file:
  with open("/content/drive/MyDrive/PEDIATRIC_PULMONOLOGY2.txt", "r", encoding="utf-8") as file:
    raw_text = file.read()

# data cleanup
cleaned_text = re.sub(r'\n+', '\n', raw_text)  # Reduce multiple newlines
cleaned_text = re.sub(r'\s+', ' ', cleaned_text)  # Normalize whitespace

cleaned_text



In [None]:
# Segment using uppercase headers as delimiters
condition_keywords = [
    "ASTHMA", "BRONCHIOLITIS", "PNEUMONIA", "AERODIGESTIVE DISORDERS",
   "CHRONIC COUGH", "PARADOXICAL VOCAL FOLD MOVEMENT", "SUBGLOTTIC STENOSIS", "ACUTE RESPIRATORY DISTRESS SYNDROME", "HEREDITARY HEMORRHAGIC TELANGIECTASIA",
    "TRACHEOESOPHAGEAL FISTULA", "ASBESTOSIS", "ESOPHAGEAL ATRESIA", "PULMONARY ARTERIAL HYPERTENSION", "PRIMARY CILIARY DYSKINESIA", "LARYNGEAL WEB"
]

# Split by uppercase headers
sections = re.split(r"\n(?=[A-Z][A-Z ]+\n)", raw_text)

Creating Rule-Based Pediatric Pulmonology Knowledge Base

In [None]:
#Parses sections into knowledge_base using keyword matching

knowledge_base = {
    "asthma": {
        "definition": ["Asthma is a chronic condition that causes inflammation and narrowing of the airways, leading to wheezing, breathlessness, and coughing."],
        "symptoms": [
            "wheezing",
            "shortness of breath",
            "coughing, especially at night or early morning",
            "tightness in the chest"
        ],
        "red_flags": [
            "severe difficulty breathing",
            "lips turning blue",
            "child unable to speak or cry",
            "no improvement with inhaler"
        ],
        "advice": ["Use a prescribed inhaler, keep the child in an upright position, avoid triggers like dust or pollen, and seek emergency care if symptoms worsen."]
    },
    "bronchiolitis": {
        "definition": ["Bronchiolitis is a common lung infection in infants and young children, usually caused by a virus, that leads to inflammation and congestion in the small airways."],
        "symptoms": [
            "cough",
            "runny nose",
            "wheezing",
            "fast or shallow breathing",
            "poor feeding"
        ],
        "red_flags": [
            "grunting or flaring nostrils while breathing",
            "difficulty feeding or drinking",
            "chest retractions",
            "cyanosis (bluish skin)"
        ],
        "advice": ["Keep the child well hydrated, monitor for worsening symptoms, and seek medical attention if breathing becomes labored or feeding decreases."]
    },
    "pneumonia": {
        "definition": ["Pneumonia is an infection of the lungs that causes the air sacs to fill with fluid or pus, leading to cough, fever, and difficulty breathing."],
        "symptoms": [
            "fever",
            "cough with phlegm",
            "chest pain",
            "rapid breathing",
            "fatigue"
        ],
        "red_flags": [
            "very high fever",
            "confusion or lethargy",
            "labored breathing",
            "cyanosis"
        ],
        "advice": ["Ensure the child rests, drinks plenty of fluids, and consult a doctor. Severe symptoms may require antibiotics or hospitalization."]
    },
    "chronic cough": {
        "definition": ["Chronic cough is a cough that lasts more than 4 weeks in children. It can be dry or productive and may indicate an underlying condition."],
        "symptoms": [
            "persistent cough for more than 4 weeks",
            "hoarseness",
            "dry or wet cough",
            "cough worsens at night or with exercise"
        ],
        "red_flags": [
            "cough with blood",
            "weight loss",
            "difficulty breathing",
            "loss of appetite"
        ],
        "advice": ["Avoid environmental irritants, keep the child hydrated, and seek medical evaluation to determine the underlying cause."]
    },
    "paradoxical vocal fold movement": {
        "definition": ["PVFM is a condition in which the vocal folds close when they should open during breathing, often triggered by stress or irritants."],
        "symptoms": [
            "stridor",
            "sudden shortness of breath",
            "tightness in the throat",
            "difficulty inhaling"
        ],
        "red_flags": [
            "sudden and total voice loss",
            "stridor during both inhale and exhale",
            "severe anxiety with breathing difficulty"
        ],
        "advice": ["Encourage relaxed throat breathing, avoid triggers, and work with a speech-language pathologist for breathing retraining."]
    },

    "subglottic stenosis": {
        "definition": ["Subglottic stenosis is a narrowing of the airway just below the vocal cords, which can be congenital or acquired."],
        "symptoms": [
            "noisy breathing (stridor)",
            "difficulty breathing during activity",
            "voice changes or hoarseness"
        ],
        "red_flags": [
            "severe breathing difficulty",
            "cyanosis (bluish skin or lips)",
            "stridor at rest"
        ],
        "advice": ["Avoid irritants, monitor breathing, and seek evaluation by an ENT specialist."]
    },
    "acute respiratory distress syndrome": {
        "definition": ["ARDS is a severe inflammatory reaction in the lungs causing fluid accumulation and difficulty in oxygen exchange."],
        "symptoms": [
            "rapid breathing",
            "shortness of breath",
            "low oxygen levels"
        ],
        "red_flags": [
            "extreme difficulty breathing",
            "requires mechanical ventilation",
            "persistent hypoxia"
        ],
        "advice": ["Requires ICU admission and oxygen support. Early recognition and treatment are crucial."]
    },
    "hereditary hemorrhagic telangiectasia": {
        "definition": ["HHT is a genetic disorder causing abnormal blood vessel formation, leading to bleeding in organs like lungs and brain."],
        "symptoms": [
            "frequent nosebleeds",
            "shortness of breath",
            "unexplained anemia"
        ],
        "red_flags": [
            "stroke-like symptoms",
            "brain or lung hemorrhage",
            "significant hemoptysis (coughing blood)"
        ],
        "advice": ["Genetic counseling, monitor for bleeding, and treat complications promptly."]
    },
    "tracheoesophageal fistula": {
        "definition": ["A TEF is an abnormal connection between the trachea and esophagus, often congenital."],
        "symptoms": [
            "coughing or choking during feeding",
            "recurrent respiratory infections",
            "difficulty swallowing"
        ],
        "red_flags": [
            "cyanosis while feeding",
            "aspiration pneumonia",
            "failure to thrive"
        ],
        "advice": ["Requires surgical correction. Ensure safe feeding methods until repaired."]
    },
    "laryngeal web": {
        "definition": ["Laryngeal web is a congenital or acquired membrane that partially obstructs the vocal cords."],
        "symptoms": [
            "weak or hoarse cry",
            "stridor",
            "breathing difficulty during exertion"
        ],
        "red_flags": [
            "airway obstruction",
            "progressive stridor",
            "poor weight gain due to effort in breathing"
        ],
        "advice": ["ENT evaluation for surgical intervention. Avoid airway irritants."]
    },
    "primary ciliary dyskinesia": {
        "definition": ["PCD is a rare genetic disorder where cilia in the lungs do not function properly, leading to mucus build-up and infections."],
        "symptoms": [
            "chronic wet cough",
            "nasal congestion",
            "recurrent ear and sinus infections"
        ],
        "red_flags": [
            "bronchiectasis",
            "hearing loss",
            "progressive lung damage"
        ],
        "advice": ["Airway clearance therapies, regular monitoring, and genetic counseling."]
    },
    "pulmonary arterial hypertension": {
        "definition": ["PAH is increased blood pressure in the arteries of the lungs, making it harder for the heart to pump blood."],
        "symptoms": [
            "fatigue",
            "shortness of breath during exertion",
            "fainting spells"
        ],
        "red_flags": [
            "cyanosis",
            "chest pain",
            "syncope (fainting)"
        ],
        "advice": ["Specialist care with medications to reduce pressure. Avoid strenuous activity."]
    },
    "esophageal atresia": {
        "definition": ["Esophageal atresia is a birth defect where the esophagus does not connect to the stomach."],
        "symptoms": [
            "frothy saliva",
            "difficulty feeding",
            "choking or coughing when feeding"
        ],
        "red_flags": [
            "aspiration pneumonia",
            "cyanosis during feeding",
            "inability to pass a feeding tube"
        ],
        "advice": ["Requires urgent surgical correction. Supportive care until surgery."]
    },
    "asbestosis": {
        "definition": ["Asbestosis is a chronic lung disease caused by inhaling asbestos fibers, rare in children unless exposed."],
        "symptoms": [
            "persistent dry cough",
            "chest tightness",
            "shortness of breath"
        ],
        "red_flags": [
            "respiratory failure",
            "clubbing of fingers",
            "cor pulmonale"
        ],
        "advice": ["Prevent exposure, monitor lung function, and seek pulmonary care."]
    }

}


for section in sections:
    for cond in condition_keywords:
        if cond in section:
            lines = section.split('\n')
            current = cond.lower()
            # Ensure the condition exists in the knowledge base before trying to access its keys
            if current in knowledge_base:
                for line in lines:
                    line = line.strip()
                    if re.search(r'\b(definition|described as|refers to|means)\b', line, re.I):
                        # Append to the definition list
                        knowledge_base[current]['definition'].append(line)
                    elif re.search(r'\b(wheeze|cough|fever|difficulty breathing|dyspnea|tightness|stridor)\b', line, re.I):
                        # Append to the symptoms list
                        knowledge_base[current]['symptoms'].append(line)
                    elif re.search(r'\b(bluish lips|hospital|urgent|life-threatening|emergency|ICU|choking)\b', line, re.I):
                        # Append to the red_flags list
                        knowledge_base[current]['red_flags'].append(line)
                    elif re.search(r'\b(advice|recommendation|treatment|inhaler|rest|hydration|consult)\b', line, re.I):
                        # Append to the advice list
                        knowledge_base[current]['advice'].append(line)

 Symptom Classifier

In [None]:
import spacy
nlp = spacy.load("en_core_web_sm")

def classify_symptom(text):
    doc = nlp(text.lower())
    for token in doc:
        if token.text in ["wheezing", "breathless", "cough"]:
            return "asthma"
        elif token.text in ["fever", "chills", "fast", "breathing"]:
            return "pneumonia"
        # Add more rules
    return "unknown"


Text Classification Training

In [None]:
# training_data.py

training_data = [
    ("My child is wheezing and coughing", {"cats": {"asthma": 1.0, "bronchiolitis": 0.0}}),
    ("He has trouble breathing and wheezing sounds", {"cats": {"asthma": 1.0, "bronchiolitis": 0.0}}),
    ("My baby has a runny nose and noisy breathing", {"cats": {"asthma": 0.0, "bronchiolitis": 1.0}}),
    ("The child has fast breathing and cough", {"cats": {"bronchiolitis": 1.0, "asthma": 0.0}}),
    ("Persistent cough with fever", {"cats": {"pneumonia": 1.0}}),
    ("Cough with chest pain and high fever", {"cats": {"pneumonia": 1.0}}),
    ("She has stridor and tight throat", {"cats": {"pvfm": 1.0}}),
    ("Noisy breathing when running and voice changes", {"cats": {"pvfm": 1.0}}),
]







In [None]:

from spacy.training.example import Example


# Create blank English model
nlp = spacy.blank("en")

# Add text classifier pipe
text_cat = nlp.add_pipe("textcat")
labels = ["asthma", "bronchiolitis", "pneumonia", "pvfm"]
for label in labels:
    text_cat.add_label(label)

# Train the model
optimizer = nlp.begin_training()
for i in range(10):
    losses = {}
    for text, annotations in training_data:
        example = Example.from_dict(nlp.make_doc(text), annotations)
        nlp.update([example], losses=losses)
    print(f"Iteration {i}, Loss: {losses['textcat']}")


def predict_condition(user_input):
    doc = nlp(user_input)
    scores = doc.cats
    predicted = max(scores, key=scores.get)
    confidence = scores[predicted]
    return predicted if confidence > 0.5 else "unknown"

Iteration 0, Loss: 1.5345534980297089
Iteration 1, Loss: 1.341783806681633
Iteration 2, Loss: 0.9367388486862183
Iteration 3, Loss: 0.429081154987216
Iteration 4, Loss: 0.11049534427002072
Iteration 5, Loss: 0.016654782579280436
Iteration 6, Loss: 0.0015858455190027598
Iteration 7, Loss: 0.0001908264803205384
Iteration 8, Loss: 4.5136206836104975e-05
Iteration 9, Loss: 1.8727453266365046e-05


Train spaCy for Intent Detection

In [None]:
# Intent Classification Training Script with spaCy (Using Enhanced Examples)

import spacy
from spacy.util import minibatch
from spacy.training.example import Example
import random

# 1. Prepare Enhanced Training Examples
intent_examples = {
   "Asthma": [
        "My child is wheezing",
        "He has a tight chest and can't breathe",
        "Child sounds breathless",
        "She's coughing and short of breath",
        "My baby has a noisy breath and wheezing",
        "Wheezing at night and tight chest",
        "He struggles to catch his breath",
        "Wheeze and dry cough every night",
        "I think it's asthma",
        "Shortness of breath when running"
    ],
     "Bronchiolitis": [
        "Baby has a stuffy nose and cough",
        "Infant wheezing with fever",
        "My baby coughs and breathes rapidly",
        "She's congested and breathing fast",
        "Small child with nasal flaring and coughing",
        "Severe chest congestion in baby",
        "He's wheezing and has a runny nose",
        "Persistent wet cough and fussiness",
        "Labored breathing in toddler",
        "Coughing, fever and vomiting"
    ],
    "Pneumonia": [
        "Child has chest pain and fever",
        "She's coughing up mucus and weak",
        "Breathing fast with chills and fatigue",
        "He’s very tired with shallow breathing",
        "Persistent fever and chest tightness",
        "Crackling sound in chest and fever",
        "Dry cough turning wet and heavy breathing",
        "Extreme fatigue and fever",
        "Fever, cough and chest pain",
        "Difficulty breathing with high fever"
    ],

    "Paradoxical Vocal Fold Movement": [
        "She has stridor when inhaling",
        "Tight throat and can't breathe in",
        "My child gasps for air and makes a whistling sound",
        "He loses voice suddenly and struggles to inhale",
        "Stridor during exercise",
        "Sounds like asthma but inhaler doesn't help",
        "Voice disappears when stressed",
        "Shortness of breath with throat tightness",
        "Breathing issue without wheeze",
        "Feels like choking but airway is clear"
    ],
    "Chronic Cough": [
        "He has been coughing for over 4 weeks",
        "Dry cough that won't stop",
        "Cough worsens at night",
        "Cough comes after running or playing",
        "She coughs without a cold",
        "Ongoing cough and hoarseness",
        "Persistent wet cough",
        "Cough doesn't respond to syrup",
        "Loud dry cough for weeks",
        "Long-term cough after infection"
    ],
    "Aerodigestive Disorders": [
        "My baby coughs while feeding",
        "He chokes on food often",
        "Recurrent chest infections with feeding",
        "Swallowing issues and breathing trouble",
        "Chronic aspiration with vomiting",
        "Breathing sounds odd after eating",
        "Vomits and gasps when feeding",
        "Feeding always leads to cough",
        "Reflux and pneumonia",
        "Gurgling sound when swallowing"
    ],
    "Pulmonary Hypertension": [
        "Shortness of breath with fatigue",
        "Swollen legs and difficulty breathing",
        "Fainting spells after walking",
        "Chest pressure when climbing stairs",
        "Heart races with little exertion",
        "Child turns blue when active",
        "Persistent fatigue and breathlessness",
        "He breathes fast with chest pain",
        "Bluish skin and trouble catching breath",
        "Swelling around the eyes and feet"
    ],
    "Tracheostomy": [
        "My child has a tracheostomy and is coughing",
        "Trouble breathing through trach tube",
        "There is mucus in the trach tube",
        "The tracheostomy area is swollen",
        "Child wheezes with trach tube in",
        "Coughing through trach",
        "My baby has noisy breathing from the trach",
        "Redness around trach site",
        "Difficulty suctioning trach",
        "Trouble speaking after tracheostomy"
    ],

    "Subglottic Stenosis": [
        "My child has a high-pitched breathing sound",
        "He struggles to breathe and has stridor",
        "Breathing is noisy and gets worse when lying down",
        "Chronic hoarseness and noisy breathing",
        "Trouble breathing after intubation",
        "She gets tired easily and breathes loudly",
        "My baby has breathing obstruction and cries hoarsely",
        "Feels like airway is blocked",
        "Repeated breathing problems after croup",
        "Breath sounds are harsh and squeaky"
    ],
    "Acute Respiratory Distress Syndrome": [
        "My baby is in the ICU with severe breathing difficulty",
        "Doctor says lungs are inflamed and not working",
        "Child needs high oxygen and still struggles",
        "She's on a ventilator due to infection",
        "He rapidly developed severe shortness of breath",
        "Oxygen saturation dropped suddenly",
        "They said it's ARDS from pneumonia",
        "Breathing support not helping much",
        "Chest x-ray shows white-out pattern",
        "Lung collapse after viral infection"
    ],
    "Hereditary Hemorrhagic Telangiectasia": [
        "My child has frequent nosebleeds",
        "She bleeds easily and bruises too",
        "Doctor said her vessels are fragile",
        "He had blood in his stool and nose",
        "The pediatrician suspects telangiectasia",
        "Chronic bleeding from mouth and nose",
        "Veins on skin look red and abnormal",
        "Sudden bleeding with no injury",
        "Nose bleeds every night",
        "Family history of HHT"
    ],
    "Laryngeal Web": [
        "My baby's voice is very weak and raspy",
        "He sounds hoarse since birth",
        "Stridor heard when breathing in and out",
        "Breathing is noisy but lungs are clear",
        "Soft cry and wheezing at the throat level",
        "Pediatric ENT mentioned a web in the airway",
        "Difficult to intubate during surgery",
        "Congenital voice issue and trouble breathing",
        "Loud snoring and stridor during sleep",
        "Speech delay and airway obstruction"
    ],
    "Esophageal Atresia": [
        "My newborn chokes when feeding",
        "Milk comes out through the nose",
        "He coughs every time I breastfeed him",
        "They said his food pipe didn't form properly",
        "Baby turns blue during feeding",
        "Swallowing is difficult and leads to vomiting",
        "Constant gagging when eating",
        "Tube feeding was required",
        "Doctor said esophagus is not connected",
        "Nasal regurgitation of milk"
    ],
    "Primary Ciliary Dyskinesia": [
        "Chronic cough since birth",
        "They said her cilia are not working",
        "Always has sinus infections and wet cough",
        "Sputum daily with constant nasal drip",
        "He has breathing problems and situs inversus",
        "Frequent ear infections and wheezing",
        "Persistent congestion not resolving",
        "Nasal blockage with bronchitis every month",
        "Chronic wet lung sounds",
        "Mucus builds up due to weak lung clearance"
    ],
    "Asbestosis": [
        "Dad used to work with asbestos, now child has cough",
        "Dry cough and chest pain after exposure",
        "CT scan showed fibrosis from inhalation",
        "Slow breathing decline with wheezing",
        "She has fine crackles and history of asbestos contact",
        "Shortness of breath and occupational exposure",
        "Progressive cough and reduced lung function",
        "Doctor suspects interstitial lung disease",
        "X-ray shows scarring in lungs",
        "Chronic breathlessness with crackles"
    ],
    "Tracheoesophageal Fistula": [
        "Food enters lungs while feeding",
        "My child coughs and chokes every time he eats",
        "Doctor says windpipe and foodpipe are connected",
        "Severe aspiration during feeding",
        "Milk comes out of nose while swallowing",
        "Baby has trouble breathing during feeding",
        "Abdominal swelling after eating",
        "Frequent pneumonia from food aspiration",
        "History of surgical correction for fistula",
        "Gastro issues with respiratory problems"
    ],
    "Pulmonary Arterial Hypertension": [
        "My child gets tired easily with chest pain",
        "He has high pressure in the lungs",
        "Breathes fast when walking",
        "Sweaty and tired with minimal effort",
        "Diagnosed with PAH due to heart defect",
        "Blue lips and dizziness",
        "Has to stop walking to catch breath",
        "Fainting spells with physical activity",
        "Enlarged heart and breathlessness",
        "Struggles with climbing stairs"
    ]


}

# 2. Convert to spaCy training format
train_data = []
labels = list(intent_examples.keys())

for intent, examples in intent_examples.items():
    for text in examples:
        cats = {label: float(label == intent) for label in labels}
        train_data.append((text, {"cats": cats}))

nlp = spacy.blank("en")

# 3. Create blank spaCy model
nlp = spacy.blank("en")

# Add the textcat component
#textcat = nlp.add_pipe("textcat", config={"model": {"exclusive_classes": True, "architecture": "simple_cnn"}})


# Create config for text categorizer
textcat_config = {
    "threshold": 0.5,
    "model": {
        "@architectures": "spacy.TextCatEnsemble.v2",
        "linear_model": {
            "@architectures": "spacy.TextCatBOW.v1",
            "exclusive_classes": True,
            "ngram_size": 1,
             "no_output_layer": False
        },
        "tok2vec": {
            "@architectures": "spacy.Tok2Vec.v2",
            "embed": {
                "@architectures": "spacy.MultiHashEmbed.v2",
                "width": 64,
                "rows": [1000, 1000, 1000],
                "attrs": ["ORTH", "LOWER", "PREFIX"]
            },
            "encode": {
                "@architectures": "spacy.MaxoutWindowEncoder.v2",
                "width": 64,
                "window_size": 1,
                "maxout_pieces": 3,
                "depth": 1
            }
        }
    }
}

# Add the textcat component
textcat = nlp.add_pipe("textcat", config=textcat_config)



for label in labels:
    textcat.add_label(label)

# 5. Train mode
nlp.initialize()
for i in range(50):  # ← Increase to 30–50 epochs
    random.shuffle(train_data)
    losses = {}
    batches = minibatch(train_data, size=4)
    for batch in batches:
        examples = [Example.from_dict(nlp.make_doc(text), ann) for text, ann in batch]
        nlp.update(examples, drop=0.3, losses=losses)
    if i % 5 == 0:
        print(f"Iteration {i + 1}: Loss = {losses['textcat']:.4f}")

unknown_examples = [
    "hello", "hi there", "how are you", "what’s up", "good morning",
    "thanks", "thank you", "bye", "okay", "alright"
]
for text in unknown_examples:
    cats = {label: 0.0 for label in labels}  # All zero = no intent match
    train_data.append((text, {"cats": cats}))


# 6. Save model
nlp.to_disk("peds_chatbot_model")

# 7. Load and test
nlp = spacy.load("peds_chatbot_model")

def predict_intent(text):
    doc = nlp(text)
    scores = doc.cats
    best = max(scores, key=scores.get)
    return best, scores[best]

# Test Predictions
examples = [
    "My child is wheezing and coughing",
    "Fever with fast breathing and crackles",
    "Baby has noisy breathing and trouble feeding"
]

for ex in examples:
    intent, confidence = predict_intent(ex)
    print(f"'{ex}' \u2192 Predicted: {intent.upper()} (Confidence: {confidence:.2f})")

Iteration 1: Loss = 2.3821
Iteration 6: Loss = 1.8729
Iteration 11: Loss = 0.7384
Iteration 16: Loss = 0.2535
Iteration 21: Loss = 0.0999
Iteration 26: Loss = 0.0324
Iteration 31: Loss = 0.0239
Iteration 36: Loss = 0.0092
Iteration 41: Loss = 0.0049
Iteration 46: Loss = 0.0086
'My child is wheezing and coughing' → Predicted: ASTHMA (Confidence: 1.00)
'Fever with fast breathing and crackles' → Predicted: PNEUMONIA (Confidence: 0.94)
'Baby has noisy breathing and trouble feeding' → Predicted: TRACHEOESOPHAGEAL FISTULA (Confidence: 0.96)


Model Evaluation & Testing

In [None]:
from sklearn.metrics import classification_report

# Create evaluation data
texts = [ex[0] for ex in train_data]
true_labels = [max(ex[1]["cats"], key=ex[1]["cats"].get) for ex in train_data]

pred_labels = []
for text in texts:
    intent, _ = predict_intent(text)
    pred_labels.append(intent)

print(classification_report(true_labels, pred_labels))


                                       precision    recall  f1-score   support

  Acute Respiratory Distress Syndrome       0.91      1.00      0.95        10
              Aerodigestive Disorders       0.91      1.00      0.95        10
                           Asbestosis       1.00      1.00      1.00        10
                               Asthma       1.00      0.55      0.71        20
                        Bronchiolitis       1.00      1.00      1.00        10
                        Chronic Cough       1.00      1.00      1.00        10
                   Esophageal Atresia       0.91      1.00      0.95        10
Hereditary Hemorrhagic Telangiectasia       1.00      1.00      1.00        10
                        Laryngeal Web       0.91      1.00      0.95        10
      Paradoxical Vocal Fold Movement       1.00      1.00      1.00        10
                            Pneumonia       0.91      1.00      0.95        10
           Primary Ciliary Dyskinesia       1.00   

Chatbot Response Generator

In [None]:
def get_chatbot_response(user_input):
    condition = classify_symptom(user_input)

    if condition in knowledge_base:
        return {
            "condition": condition,
            "symptoms": knowledge_base[condition]["symptoms"],
            "red_flags": knowledge_base[condition]["red_flags"],
            "advice": knowledge_base[condition]["advice"]
        }
    else:
        return "I'm sorry, I couldn't determine the condition. Please consult a doctor."


In [None]:
!pip install transformers
!pip install torch


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
!pip install sacremoses
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT")
model = AutoModelForCausalLM.from_pretrained("microsoft/BioGPT")

Collecting sacremoses
  Downloading sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)
Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.5/897.5 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sacremoses
Successfully installed sacremoses-0.1.1


Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


config.json:   0%|          | 0.00/595 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.56G [00:00<?, ?B/s]

In [None]:
input_ids = tokenizer("Explain asthma symptoms in children.", return_tensors="pt").input_ids
outputs = model.generate(input_ids, max_length=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Explain asthma symptoms in children.


In [None]:
def ask_llm(question):
    prompt = f"Patient (child): {question}\nDoctor:"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=100, do_sample=True, top_k=50)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Remove prompt from response
    answer = response.replace(prompt, "").strip()
    return answer

def smart_chatbot_response(user_input, threshold=0.5):
    intent, confidence = predict_intent(user_input)

    if confidence < threshold or intent == "unknown":
        # Use BioGPT for fallback
        return ask_llm(user_input)

    # Use rule-based + KB if confidence is high
    kb_entry = knowledge_base.get(intent.lower())
    if kb_entry:
        return make_response_conversational(intent, kb_entry)

    # Last fallback
    return ask_llm(user_input)


Interactive Chatbot

In [None]:
def predict_intent(user_input):
    """Predict condition based on user input using spaCy text categorizer."""
    doc = nlp(user_input)
    scores = doc.cats
    best_label = max(scores, key=scores.get)
    confidence = scores[best_label]
    return best_label, confidence

# Combine rule-based and ML
def get_final_prediction(user_input, threshold=0.6):
    # This function is not defined in the current notebook state, will remove it.
    # rule_intent = rule_based_classifier(user_input)
    # if rule_intent:
    #     return rule_intent, 1.0

    doc = nlp(user_input)
    scores = doc.cats
    best_label = max(scores, key=scores.get)
    confidence = scores[best_label]

    if confidence < threshold:
        return "unknown", confidence
    return best_label, confidence

# 🔄 Conversational Response Formatter
def make_response_conversational(condition, kb_entry):
    return f"""👩‍⚕️ It sounds like your child may be showing signs of **{condition.title()}**.
Here’s what you might notice:
• Symptoms: {', '.join(kb_entry['symptoms'][:3])}
• Red flags: {', '.join(kb_entry['red_flags'][:2])}

📘 Tip: {kb_entry['advice'][0]}"""


def respond_to_user(user_input, knowledge_base, threshold=0.5):
    """Use prediction and knowledge base to generate a response."""
    condition, confidence = get_final_prediction(user_input, threshold)

    if condition == "unknown":
        return "I'm not sure I understand. Could you please describe your child's symptoms in more detail?"

    # Normalize keys before lookup
    normalized_condition = condition.lower()
    data = knowledge_base.get(normalized_condition, None)
    if not data:
        return f"I believe this might be related to **{condition.replace('_', ' ').title()}**, but I don't have more information."

    return make_response_conversational(condition, data)


def get_response(condition):
    info = knowledge_base.get(condition)
    if not info:
        return "Sorry, I couldn’t confidently determine the condition. Please describe more clearly."

    return f"""🩺 {condition.title()}:
Definition: {info['definition']}

 Red Flags: {', '.join(info['red_flags'])}
 Symptoms: {', '.join(info['symptoms'])}
 Advice: {info['advice']}"""

def ask_biogpt(query, history=None):
    prompt = query if history is None else history + "\n" + query
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=200, do_sample=True, temperature=0.7)
    reply = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return reply

# Chat loop
if __name__ == "__main__":
    while True:
        user_input = input("👩‍👧 Parent: ")
        if user_input.lower() in ["exit", "quit", "bye"]:
            print("🤖 Bot: Take care! Always consult your doctor when unsure.")
            break

        # Try to get structured medical response from your pipeline
        reply = respond_to_user(user_input, knowledge_base)

        # If bot could not provide a confident condition or it's vague
        if "I'm not sure" in reply or "I couldn't determine the condition" in reply:
            print("\n🤖 BioGPT (LLM-enhanced):")
            print(ask_biogpt(user_input))  # Fallback to BioGPT for richer output
        else:
            print(f"\n🤖 Bot:\n{reply}\n")