# 🧬 Health Disparities & AI: CCDA Generator + Google Trends Tracker
Generate synthetic CCDA patient records and track public interest trends on AI in healthcare.

In [None]:
import pandas as pd
import random
from faker import Faker
from lxml import etree
from pytrends.request import TrendReq
from IPython.display import display, Markdown

# Setup
fake = Faker()
ethnicities = ["Black", "White", "Hispanic", "Asian", "AIAN", "NHPI"]
CCDA_CODES = {
    "demographics": "21112-8",
    "social_history": "29762-2",
    "conditions": "11450-4",
    "medications": "10160-0",
    "procedures": "47519-4"
}


In [None]:
# Load dataset
df = pd.read_csv("healthcare_dataset.csv")
df.head()


In [None]:
def create_ccda_from_row(row):
    root = etree.Element("ClinicalDocument")

    demo = etree.SubElement(root, "section", code=CCDA_CODES["demographics"])
    etree.SubElement(demo, "name").text = row["Name"].title()
    etree.SubElement(demo, "age").text = str(row["Age"])
    etree.SubElement(demo, "gender").text = row["Gender"]
    etree.SubElement(demo, "blood_type").text = row["Blood Type"]
    etree.SubElement(demo, "ethnicity").text = random.choice(ethnicities)

    social = etree.SubElement(root, "section", code=CCDA_CODES["social_history"])
    etree.SubElement(social, "insurance").text = row["Insurance Provider"]
    etree.SubElement(social, "admission_type").text = row["Admission Type"]
    etree.SubElement(social, "billing_amount").text = f"{row['Billing Amount']:.2f}"

    conditions = etree.SubElement(root, "section", code=CCDA_CODES["conditions"])
    etree.SubElement(conditions, "condition").text = row["Medical Condition"]
    etree.SubElement(conditions, "test_result").text = row["Test Results"]

    meds = etree.SubElement(root, "section", code=CCDA_CODES["medications"])
    etree.SubElement(meds, "medication").text = row["Medication"]

    procs = etree.SubElement(root, "section", code=CCDA_CODES["procedures"])
    etree.SubElement(procs, "procedure").text = f"Admitted on {row['Date of Admission']} for {row['Admission Type']}"

    return etree.tostring(root, pretty_print=True).decode("utf-8")


In [None]:
# Generate and display a sample XML
sample_xml = create_ccda_from_row(df.iloc[0])
print(sample_xml)


In [None]:
def track_public_interest(keywords):
    pytrends = TrendReq(hl='en-US', tz=360)
    pytrends.build_payload(keywords, cat=0, timeframe='today 12-m')
    df_trends = pytrends.interest_over_time()
    return df_trends

keywords = ["AI in healthcare", "health disparities", "artificial intelligence bias", "predictive health"]
trends_df = track_public_interest(keywords)
trends_df.head()
