In [6]:
import json
from random import choice, randint, uniform
from pathlib import Path

# Simulated symptom entries from HPO-style data
symptoms = [
    {"id": "HP_0002090", "name": "Cough", "synonyms": ["Tussis", "Persistent cough"]},
    {"id": "HP_0001945", "name": "Fever", "synonyms": ["Pyrexia", "Elevated temperature"]},
    {"id": "HP_0001250", "name": "Seizure", "synonyms": ["Convulsion", "Epileptic seizure"]},
    {"id": "HP_0002011", "name": "Nausea", "synonyms": ["Queasiness"]},
    {"id": "HP_0001627", "name": "Chest pain", "synonyms": ["Thoracic pain"]},
    {"id": "HP_0002355", "name": "Fatigue", "synonyms": ["Tiredness", "Exhaustion"]},
    {"id": "HP_0000739", "name": "Anxiety", "synonyms": ["Nervousness", "Restlessness"]},
    {"id": "HP_0002591", "name": "Weight loss", "synonyms": ["Decreased body weight"]},
    {"id": "HP_0002237", "name": "Sore throat", "synonyms": ["Throat pain"]},
    {"id": "HP_0002105", "name": "Shortness of breath", "synonyms": ["Dyspnea"]},
    {"id": "HP_0002315", "name": "Headache", "synonyms": ["Cephalalgia"]},
    {"id": "HP_0001947", "name": "Vomiting", "synonyms": ["Emesis"]},
    {"id": "HP_0001873", "name": "Bleeding", "synonyms": ["Hemorrhage"]},
    {"id": "HP_0001871", "name": "Bruising", "synonyms": ["Ecchymosis"]},
    {"id": "HP_0002013", "name": "Abdominal pain", "synonyms": ["Stomach ache"]},
    {"id": "HP_0002354", "name": "Insomnia", "synonyms": ["Sleeplessness"]},
    {"id": "HP_0002326", "name": "Dizziness", "synonyms": ["Lightheadedness"]},
    {"id": "HP_0001251", "name": "Memory loss", "synonyms": ["Amnesia"]},
    {"id": "HP_0001882", "name": "Diarrhea", "synonyms": ["Frequent stools"]},
    {"id": "HP_0000736", "name": "Depression", "synonyms": ["Low mood"]},
]

# Simulated conditions
conditions = [
    {
        "id": "COND001",
        "name": "Tuberculosis",
        "description": "Infectious disease caused by Mycobacterium tuberculosis.",
        "demographic_relevance": {"age_range": "10-60", "gender": "all"}
    },
    {
        "id": "COND002",
        "name": "Anemia",
        "description": "Condition with decreased red blood cell count or hemoglobin.",
        "demographic_relevance": {"age_range": "0-100", "gender": "all"}
    },
    {
        "id": "COND003",
        "name": "Generalized Anxiety Disorder",
        "description": "Persistent and excessive worry about different things.",
        "demographic_relevance": {"age_range": "15-80", "gender": "all"}
    },
    {
        "id": "COND004",
        "name": "COVID-19",
        "description": "Viral respiratory infection caused by SARS-CoV-2.",
        "demographic_relevance": {"age_range": "0-100", "gender": "all"}
    },
    {
        "id": "COND005",
        "name": "Migraine",
        "description": "Recurrent headaches often accompanied by nausea and light sensitivity.",
        "demographic_relevance": {"age_range": "10-70", "gender": "all"}
    }
]

# Generate symptom-condition links with random weights
links = []
for symptom in symptoms:
    for _ in range(randint(1, 3)):
        condition = choice(conditions)
        links.append({
            "symptom_id": symptom["id"],
            "condition_id": condition["id"],
            "relationship_type": choice(["indicative_of", "supportive_of"]),
            "weight": round(uniform(0.4, 1.0), 2)
        })

# Save to files
output_dir = Path("/Users/samadeepsengupta/Library/Mobile Documents/com~apple~CloudDocs/ACADEMIC/CSE'25/Sem 8/NLP/aod-based-semantic/semantic-symptom-search/data")
output_dir.mkdir(parents=True, exist_ok=True)

(symptom_path := output_dir / "symptoms.json").write_text(json.dumps(symptoms, indent=2))
(condition_path := output_dir / "conditions.json").write_text(json.dumps(conditions, indent=2))
(link_path := output_dir / "links.json").write_text(json.dumps(links, indent=2))

symptom_path.name, condition_path.name, link_path.name

('symptoms.json', 'conditions.json', 'links.json')