In [1]:
# ==========================================
# BEAST v3 — BULLETPROOF MEDICAL ASSISTANT
# MedQuAD + FULL consumer + Reproductive + Semen
# Condition Engine + Generic Medical Engine
# Bilingual + Comprehensive + No Missing Solutions
# ==========================================

!pip -q install sentence-transformers langdetect googletrans==4.0.0-rc1 ipywidgets

import pandas as pd, numpy as np, os, zipfile, requests, re, xml.etree.ElementTree as ET
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from langdetect import detect
from googletrans import Translator
import ipywidgets as widgets
from IPython.display import display, clear_output

# ======================
# LOAD MEDQUAD
# ======================
zip_url="https://github.com/abachaa/MedQuAD/archive/refs/heads/master.zip"
open("MedQuAD.zip","wb").write(requests.get(zip_url).content)

with zipfile.ZipFile("MedQuAD.zip","r") as z:
    z.extractall(".")

mq_q,mq_a=[],[]
for root,_,files in os.walk("MedQuAD-master"):
    for f in files:
        if f.endswith(".xml"):
            tree=ET.parse(os.path.join(root,f))
            rxml=tree.getroot()
            for qa in rxml.findall(".//QAPair"):
                q=qa.findtext("Question")
                a=qa.findtext("Answer")
                if q and a:
                    mq_q.append(q)
                    mq_a.append(a)

medquad_df=pd.DataFrame({"question":mq_q,"answer":mq_a})

# ======================
# FULL CONSUMER + REPRODUCTIVE + SEMEN DATA
# ======================
consumer_data = [

# GENERAL
("fever","Fever occurs when the body fights infection."),
("fatigue","Fatigue occurs due to stress or illness."),
("body ache","Body aches occur with viral infection."),
("weakness","Weakness may occur due to dehydration or illness."),

# HEAD
("headache","Headache due to tension or dehydration."),
("head hurts","Head pain due to tension."),
("pain in head","Headache symptom."),

# RESP
("cough","Cough due to airway irritation."),
("cold","Cold viral infection."),
("sinus pain","Sinus inflammation."),

# DIGESTIVE
("gas","Gas causes bloating."),
("bloating","Gas accumulation."),
("acidity","Acid reflux."),
("stomach pain","Indigestion."),

# MUSCLE
("back pain","Back strain."),

# WOMEN
("vaginal discharge","Discharge infection or hormonal."),
("vaginal smell","Odor due to imbalance."),
("vaginal itching","Fungal irritation."),
("period cramps","Menstrual cramps."),

# URINARY
("burning urination","UTI symptom"),

# SEMEN COLOR / CONSISTENCY
("blood in semen","Blood in semen inflammation."),
("reddish semen","Blood mixed semen."),
("pink semen","Minor bleeding semen."),
("brown semen","Old blood semen."),
("blood in sperm","Prostate inflammation."),
("rust semen","Old blood."),
("semen color change","Blood presence."),
("runny semen","Low sperm."),
("watery semen","Thin semen."),
("thin semen","Low density sperm."),
("sperm watery","Low concentration.")
]

cons_df=pd.DataFrame(consumer_data,columns=["question","answer"])

# ======================
# MERGE KB
# ======================
df=pd.concat([medquad_df,cons_df],ignore_index=True)

# ======================
# EMBEDDINGS
# ======================
model=SentenceTransformer("all-MiniLM-L6-v2")
embeddings=model.encode(df["question"].tolist(),show_progress_bar=True)

# ======================
# TRANSLATION
# ======================
translator=Translator()

def detect_lang(q):
    try: return detect(q)
    except: return "en"

def to_en(q):
    if detect_lang(q)=="hi":
        return translator.translate(q,src="hi",dest="en").text
    return q

def to_hi(t):
    return translator.translate(t,src="en",dest="hi").text

# ======================
# CONDITION DETECTOR
# ======================
def detect_condition(q):
    q=q.lower()

    if any(w in q for w in ["semen","sperm","ejaculate"]) and \
       any(w in q for w in ["red","reddish","pink","brown","blood","rust"]):
        return "blood_semen"

    if any(w in q for w in ["semen","sperm"]) and \
       any(w in q for w in ["runny","watery","thin"]):
        return "watery_semen"

    if "vaginal" in q and any(w in q for w in ["smell","odor"]):
        return "vaginal_discharge_odor"

    if "vaginal" in q and "itch" in q:
        return "vaginal_itching"

    if any(w in q for w in ["burning urination","burn urine"]):
        return "uti"

    if any(w in q for w in ["headache","head hurts","head pain","pain in head"]):
        return "headache"

    if "sinus" in q:
        return "sinusitis"

    if any(w in q for w in ["gas","bloating","acidity"]):
        return "gastritis"

    if "back pain" in q:
        return "back_pain"

    if "period" in q and any(w in q for w in ["cramp","pain"]):
        return "period_cramps"

    return None

# ======================
# SOLUTIONS LIBRARY
# ======================
solutions={

"headache":{
"summary_en":"Headache usually occurs due to tension, dehydration, or fatigue.",
"summary_hi":"सिरदर्द अक्सर तनाव, पानी की कमी या थकान से होता है。",
"details_en":["Tension headache most common","Dehydration trigger","Screen strain factor"],
"details_hi":["टेंशन सामान्य","पानी कमी","स्क्रीन कारण"],
"care_en":["Drink water","Rest","Reduce screen","Pain relief if needed"],
"care_hi":["पानी","आराम","स्क्रीन कम","दवा"]
},

"blood_semen":{
"summary_en":"Blood in semen is usually due to prostate or seminal inflammation.",
"summary_hi":"वीर्य में खून प्रोस्टेट या ग्रंथि सूजन से होता है।",
"details_en":["Often benign","May follow activity","Resolves often"],
"details_hi":["अक्सर गंभीर नहीं","गतिविधि बाद","स्वयं ठीक"],
"care_en":["Hydrate","Reduce ejaculation","Hygiene","Urologist if recurrent"],
"care_hi":["पानी","आवृत्ति कम","स्वच्छता","बार-बार हो तो डॉक्टर"]
},

"watery_semen":{
"summary_en":"Watery semen may indicate low sperm concentration.",
"summary_hi":"पतला वीर्य कम शुक्राणु से हो सकता है।",
"details_en":["Frequent ejaculation","Low density","Often reversible"],
"details_hi":["बार-बार स्खलन","कम घनत्व","सुधर सकता"],
"care_en":["Nutrition","Hydration","Reduce frequency","Test if persistent"],
"care_hi":["पोषण","पानी","आवृत्ति कम","जांच"]
},

"vaginal_discharge_odor":{
"summary_en":"Foul vaginal odor often indicates bacterial imbalance.",
"summary_hi":"योनि की दुर्गंध बैक्टीरियल असंतुलन का संकेत है।",
"details_en":["BV common","pH imbalance","Infection possible"],
"details_hi":["BV सामान्य","pH गड़बड़ी","संक्रमण"],
"care_en":["Hygiene","Cotton underwear","Avoid scented wash","Gynecologist"],
"care_hi":["स्वच्छता","कॉटन","सुगंधित न","डॉक्टर"]
},

"vaginal_itching":{
"summary_en":"Vaginal itching often due to fungal infection.",
"summary_hi":"योनि खुजली फंगल संक्रमण से।",
"details_en":["Candida common","Moisture factor","Irritation"],
"details_hi":["कैंडिडा","नमी","जलन"],
"care_en":["Keep dry","Cotton","Avoid tight","Antifungal"],
"care_hi":["सूखा","कॉटन","ढीले कपड़े","दवा"]
},

"uti":{
"summary_en":"Burning urination due to urinary infection.",
"summary_hi":"पेशाब में जलन मूत्र संक्रमण से।",
"details_en":["Bacterial infection","Common female","Needs care"],
"details_hi":["बैक्टीरिया","महिलाओं में","उपचार"],
"care_en":["Water","Do not hold","Hygiene","Doctor"],
"care_hi":["पानी","न रोकें","स्वच्छता","डॉक्टर"]
}

}

# ======================
# GENERIC MEDICAL ENGINE
# ======================
def generic_medical_advice(text, lang="en"):
    t=text.lower()
    care_en=[]
    care_hi=[]

    if any(w in t for w in ["infection","viral","bacterial"]):
        care_en+=["Hydration","Hygiene","Medical care if persistent"]
        care_hi+=["पानी","स्वच्छता","समस्या रहे तो डॉक्टर"]

    if any(w in t for w in ["pain","ache"]):
        care_en+=["Rest","Pain relief if needed"]
        care_hi+=["आराम","दर्द निवारक"]

    if any(w in t for w in ["inflammation","swelling"]):
        care_en+=["Rest affected area","Anti-inflammatory"]
        care_hi+=["आराम","सूजनरोधी"]

    if any(w in t for w in ["genetic","syndrome","disorder"]):
        care_en+=["Specialist evaluation","Supportive care"]
        care_hi+=["विशेषज्ञ","सहायक देखभाल"]

    if not care_en:
        care_en=["Medical evaluation recommended"]
        care_hi=["डॉक्टर से परामर्श"]

    return care_hi if lang=="hi" else care_en

# ======================
# RETRIEVE
# ======================
def retrieve(q):
    q_en=to_en(q)
    emb=model.encode([q_en])
    sims=cosine_similarity(emb,embeddings)[0]
    return df.iloc[np.argmax(sims)]["answer"]

# ======================
# GENERATE RESPONSE
# ======================
def generate(q):
    L=detect_lang(q)
    cond=detect_condition(q)

    if cond in solutions:
        data=solutions[cond]
        summary=data["summary_hi"] if L=="hi" else data["summary_en"]
        details="\n".join([f"• {t}" for t in (data["details_hi"] if L=="hi" else data["details_en"])])
        care="\n".join([f"• {t}" for t in (data["care_hi"] if L=="hi" else data["care_en"])])

        return f"""
🩺 {"संभावित स्थिति" if L=="hi" else "Possible condition"}:
{summary}

📖 {"अधिक जानकारी" if L=="hi" else "Details"}:
{details}

✅ {"क्या करें" if L=="hi" else "What you can do"}:
{care}

⚠️ {"समस्या बनी रहे तो डॉक्टर से मिलें।" if L=="hi" else "Consult a doctor if persistent."}
"""

    ans=retrieve(q)
    short=re.split(r'(?<=[.!?]) +',ans)[0]
    if L=="hi":
        short=to_hi(short)

    generic=generic_medical_advice(ans,L)
    generic="\n".join([f"• {t}" for t in generic])

    return f"""
🩺 {"संभावित जानकारी" if L=="hi" else "Possible information"}:
{short}

✅ {"क्या करें" if L=="hi" else "What you can do"}:
{generic}

⚠️ {"सही निदान हेतु डॉक्टर से मिलें।" if L=="hi" else "Consult a doctor for diagnosis."}
"""

# ======================
# UI
# ======================
box=widgets.Text(
    placeholder="Describe symptoms in Hindi or English...",
    description="Query:",
    layout=widgets.Layout(width="80%")
)
out=widgets.Output()

def submit(sender):
    if sender.value.strip():
        with out:
            clear_output()
            print(generate(sender.value))

box.on_submit(submit)
display(box,out)

print("\n✅ BEAST v3 BULLETPROOF Assistant Ready.")


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/981.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/981.5 kB[0m [31m7.7 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m972.8/981.5 kB[0m [31m18.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.1/55.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/514 [00:00<?, ?it/s]

Text(value='', description='Query:', layout=Layout(width='80%'), placeholder='Describe symptoms in Hindi or En…

Output()


✅ BEAST v3 BULLETPROOF Assistant Ready.
