
# GeneX: Milestone → Therapy Interactive Interview (MVP)

This notebook lets you:
- Load the GeneX milestone→therapy CSV (fixed schema)
- Ask parent-facing questions to estimate a child's **functional level** per therapy category
- Print a summary for **gross motor**, **fine motor**, **speech-language**, **social-communication**, and **self-help**
- Show three actionable **activities with URLs** for each category

> **Notes**  
> - This is an MVP using a binary-search style flow: if a milestone is met at an age, we search *up*; otherwise we search *down*.  
> - Outputs are **educational**; consult your pediatrician/therapist for personalized guidance.



## 1. Setup

- Update the CSV path below if needed.
- The file should be the **fixed** version with three activities and split duration/frequency columns.


In [5]:

# Configure the CSV path here (use the fixed schema CSV)
CSV_PATH = r"C:\Users\T490\Downloads\Genetics-Dashboard\src\mvp\genex_milestone_therapy_cleaned.csv"

# If running on this workspace, you can also use the sandbox path:
# CSV_PATH = "/mnt/data/genex_milestone_therapy_100_fixed.csv"



## 2. Imports and constants


In [6]:

import pandas as pd
from IPython.display import display


CATEGORIES = ["gross_motor", "fine_motor", "speech_language", "social_communication", "self_help"]
REQUIRED_COLS = [
    "age_range_months","max_age_months","milestone_category","milestone","observed_issue",
    "recommended_therapy",
    "activity_1_name","activity_1_url","duration_1","frequency_1",
    "activity_2_name","activity_2_url","duration_2","frequency_2",
    "activity_3_name","activity_3_url","duration_3","frequency_3",
    "red_flags","source_urls"
]



## 3. Load and preview the data

This cell:
- Loads the CSV
- Validates required columns
- Normalizes `max_age_months` to integer
- Shows a quick preview


In [7]:


def load_data(csv_path: str) -> pd.DataFrame:
    df = pd.read_csv(csv_path)
    # Validate schema
    missing = [c for c in REQUIRED_COLS if c not in df.columns]
    if missing:
        raise ValueError(f"CSV is missing required columns: {missing}")
    # Normalize ages
    df["max_age_months"] = pd.to_numeric(df["max_age_months"], errors="coerce")
    df = df.dropna(subset=["max_age_months"]).copy()
    df["max_age_months"] = df["max_age_months"].astype(int)
    # Strip selected text fields
    for c in ["milestone_category","milestone","observed_issue","recommended_therapy",
              "activity_1_name","activity_1_url","duration_1","frequency_1",
              "activity_2_name","activity_2_url","duration_2","frequency_2",
              "activity_3_name","activity_3_url","duration_3","frequency_3",
              "source_urls"]:
        df[c] = df[c].astype(str).str.strip()
    df_sorted = df.sort_values(by=["milestone_category", "max_age_months"], ascending=[True, True])
    return df_sorted

df = load_data(CSV_PATH)
print(f"Loaded {len(df)} rows from: {CSV_PATH}")
display(df.head(100))


ValueError: CSV is missing required columns: ['age_range_months', 'observed_issue', 'activity_1_name', 'activity_1_url', 'duration_1', 'frequency_1', 'activity_2_name', 'activity_2_url', 'duration_2', 'frequency_2', 'activity_3_name', 'activity_3_url', 'duration_3', 'frequency_3', 'red_flags', 'source_urls']


## 4. Interview utilities

We will:
- Build a representative table per category (one row per `max_age_months`)
- Ask **yes/no** milestone questions and adjust search bounds accordingly
- Return the highest age at which the milestone is met


In [5]:

def pick_representative_rows(df: pd.DataFrame, category: str) -> pd.DataFrame:
    """For each age within a category, pick the first row as representative milestone+activities."""
    sub = df[df["milestone_category"] == category].copy()
    sub = sub.sort_values(["max_age_months", "milestone"]).copy()
    rep = sub.drop_duplicates(subset=["max_age_months"], keep="first").copy()
    return rep[["max_age_months","milestone","recommended_therapy",
                "activity_1_name","activity_1_url","duration_1","frequency_1",
                "activity_2_name","activity_2_url","duration_2","frequency_2",
                "activity_3_name","activity_3_url","duration_3","frequency_3",
                "source_urls"]].reset_index(drop=True)

def ask_yes_no(prompt: str) -> bool:
    while True:
        ans = input(prompt + " (y/n): ").strip().lower()
        if ans in ["y", "yes"]:
            return True
        if ans in ["n", "no"]:
            return False
        print("Please answer 'y' or 'n'.")

def find_functional_level_for_category(rep_df: pd.DataFrame, starting_age: int) -> int:
    """Binary-search style to find the highest age where the child meets the milestone."""
    ages = sorted(set(rep_df["max_age_months"].tolist()))
    if not ages:
        raise ValueError("No ages available for this category.")
    lo, hi = 0, len(ages) - 1
    # start near the provided age
    mid = min(range(len(ages)), key=lambda i: abs(ages[i] - starting_age))
    best_met_idx = -1
    used = set()
    max_iter = max(10, len(ages) + 3)
    it = 0

    while it < max_iter and lo <= hi:
        it += 1
        idx = mid
        # avoid repeats
        if idx in used:
            idx = min(idx + 1, hi) if (idx + 1) not in used else max(idx - 1, lo)
            if idx in used:
                break
        used.add(idx)

        age = ages[idx]
        row = rep_df[rep_df["max_age_months"] == age].iloc[0]
        q = f"At ~{age} months, a typical milestone here is: \"{row['milestone']}\".\nDoes your child meet this milestone now?"
        meets = ask_yes_no(q)

        if meets:
            best_met_idx = max(best_met_idx, idx)
            lo = idx + 1
        else:
            hi = idx - 1

        if lo > hi:
            break
        mid = (lo + hi) // 2

    return ages[best_met_idx] if best_met_idx != -1 else ages[0]

def print_category_summary(category: str, rep_df: pd.DataFrame, level_age: int):
    row = rep_df[rep_df["max_age_months"] == level_age].iloc[0]
    print("\n" + "="*72)
    print(f"[{category.replace('_',' ').title()}] Estimated functional level: ~{level_age} months")
    print("- Suggested therapies/activities:")
    acts = [
        (row["activity_1_name"], row["activity_1_url"], row["duration_1"], row["frequency_1"]),
        (row["activity_2_name"], row["activity_2_url"], row["duration_2"], row["frequency_2"]),
        (row["activity_3_name"], row["activity_3_url"], row["duration_3"], row["frequency_3"]),
    ]
    for i, (name, url, dur, freq) in enumerate(acts, start=1):
        if not name:
            continue
        dosage = " — ".join([p for p in [(dur or "").strip(), (freq or "").strip()] if p])
        print(f"  {i}. {name} ({url})" + (f" [{dosage}]" if dosage else ""))
    print(f"- Sources: {row['source_urls']}")



## 5. Run the interactive interview

- Enter the child's **chronological age (months)**.  
- Answer the milestone questions for each category with **y/n**.  
- The notebook will print the **estimated functional level** and **three activities** per category.


In [None]:

# Ask for chronological age once
while True:
    try:
        age_input = input("How old is your baby (in months)? ").strip()
        START_AGE = int(age_input)
        if START_AGE < 0 or START_AGE > 120:
            raise ValueError
        break
    except ValueError:
        print("Please enter a valid age in whole months (e.g., 6, 12, 24).")

results = {}
for cat in CATEGORIES:
    rep = pick_representative_rows(df, cat)
    if rep.empty:
        print(f"\n(No data available for category: {cat})")
        continue
    cat_min, cat_max = rep["max_age_months"].min(), rep["max_age_months"].max()
    start_for_cat = min(max(START_AGE, cat_min), cat_max)
    print("\n" + "#"*72)
    print(f"Category: {cat.replace('_',' ').title()}")
    level = find_functional_level_for_category(rep, start_for_cat)
    results[cat] = (level, rep)

print("\n" + "="*72)
print("Summary: Estimated functional levels by category")
for cat, (level, _) in results.items():
    print(f" - {cat.replace('_',' ').title()}: ~{level} months")

print("\nRecommendations by category")
for cat, (level, rep) in results.items():
    print_category_summary(cat, rep, level)


Please enter a valid age in whole months (e.g., 6, 12, 24).

########################################################################
Category: Gross Motor



## 6. (Optional) Export results to JSON

If you want to persist the summary for your app, run this to export the findings to a JSON structure.


In [None]:

import json
from datetime import datetime

def export_results_to_json(results_dict, out_path="genex_interview_results.json"):
    payload = {}
    for cat, (level, rep) in results_dict.items():
        row = rep[rep["max_age_months"] == level].iloc[0]
        payload[cat] = {
            "functional_level_months": int(level),
            "activities": [
                {
                    "name": row["activity_1_name"],
                    "url": row["activity_1_url"],
                    "duration": row["duration_1"],
                    "frequency": row["frequency_1"],
                },
                {
                    "name": row["activity_2_name"],
                    "url": row["activity_2_url"],
                    "duration": row["duration_2"],
                    "frequency": row["frequency_2"],
                },
                {
                    "name": row["activity_3_name"],
                    "url": row["activity_3_url"],
                    "duration": row["duration_3"],
                    "frequency": row["frequency_3"],
                },
            ],
            "sources": row["source_urls"],
        }
    payload["generated_at"] = datetime.utcnow().isoformat() + "Z"
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(payload, f, ensure_ascii=False, indent=2)
    return out_path

# Example usage (uncomment to save):
# out_json = export_results_to_json(results)
# print("Saved:", out_json)



## 7. Export a printable PDF report

This cell defines `export_results_to_pdf(...)`, which generates a **one–two page PDF** summary for parents:
- Child's chronological age and estimated **functional level** per category  
- **Three activities** (with durations/frequencies) and source links per category  
- A brief disclaimer  

> If `reportlab` is unavailable in your environment, the function will gracefully fall back to saving a `.txt` report.


In [None]:

from datetime import datetime

def export_results_to_pdf(results_dict, child_age_months, out_path="genex_interview_report.pdf"):
    """Create a concise printable report from interview results.
    If reportlab is not available, falls back to a .txt file.
    Returns the output path used.
    """
    # Try PDF with reportlab
    try:
        from reportlab.lib.pagesizes import letter
        from reportlab.pdfgen import canvas
        from reportlab.lib.units import inch
        from reportlab.lib import colors

        c = canvas.Canvas(out_path, pagesize=letter)
        width, height = letter

        margin = 0.75 * inch
        x = margin
        y = height - margin

        def draw_heading(text, size=16, color=colors.HexColor('#0F172A')):
            nonlocal y
            c.setFillColor(color)
            c.setFont("Helvetica-Bold", size)
            c.drawString(x, y, text)
            y -= 0.3 * inch

        def draw_subheading(text, size=12, color=colors.HexColor('#0F172A')):
            nonlocal y
            c.setFillColor(color)
            c.setFont("Helvetica-Bold", size)
            c.drawString(x, y, text)
            y -= 0.22 * inch

        def draw_paragraph(text, size=10, color=colors.black, leading=12):
            """Wrap long text to fit page width."""
            nonlocal y
            c.setFillColor(color)
            c.setFont("Helvetica", size)
            max_width = width - 2*margin
            words = text.split()
            line = ""
            for w in words:
                test = (line + " " + w).strip()
                if c.stringWidth(test, "Helvetica", size) <= max_width:
                    line = test
                else:
                    if y < margin + 0.7*inch:
                        c.showPage()
                        y = height - margin
                    c.drawString(x, y, line)
                    y -= leading
                    line = w
            if line:
                if y < margin + 0.7*inch:
                    c.showPage()
                    y = height - margin
                c.drawString(x, y, line)
                y -= leading

        # Header
        draw_heading("GeneX — Milestone to Therapy Summary", size=18)
        draw_paragraph(f"Generated: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%SZ')}  |  Child age (reported): {child_age_months} months", size=9, color=colors.HexColor('#334155'))

        # Summary levels
        draw_subheading("Estimated functional levels by category", size=13)
        for cat, (level, _) in results_dict.items():
            draw_paragraph(f"• {cat.replace('_',' ').title()}: ~{int(level)} months", size=10)

        # Activities per category
        for cat, (level, rep) in results_dict.items():
            row = rep[rep["max_age_months"] == level].iloc[0]
            c.setFillColor(colors.HexColor('#0F766E'))
            draw_subheading(f"{cat.replace('_',' ').title()} — activities", size=12, color=colors.HexColor('#0F766E'))

            acts = [
                (row["activity_1_name"], row["activity_1_url"], row["duration_1"], row["frequency_1"]),
                (row["activity_2_name"], row["activity_2_url"], row["duration_2"], row["frequency_2"]),
                (row["activity_3_name"], row["activity_3_url"], row["duration_3"], row["frequency_3"]),
            ]
            for i, (name, url, dur, freq) in enumerate(acts, start=1):
                if not name:
                    continue
                dosage = " — ".join([p for p in [(dur or "").strip(), (freq or "").strip()] if p])
                draw_paragraph(f"{i}. {name}" + (f" [{dosage}]" if dosage else ""), size=10)
                draw_paragraph(f"   Link: {url}", size=9, color=colors.HexColor('#1D4ED8'))

            # Sources line
            draw_paragraph(f"Sources: {row['source_urls']}", size=9, color=colors.HexColor('#334155'))

        # Disclaimer
        c.setFillColor(colors.HexColor('#64748B'))
        draw_paragraph("Disclaimer: GeneX suggestions are educational and drawn from reputable child-development sources. They are not a diagnosis. Please consult your pediatrician or therapist for personalized guidance.", size=8, leading=10)

        c.save()
        return out_path

    except Exception as e:
        # Fallback: write a plain text report
        txt_path = out_path.rsplit(".", 1)[0] + ".txt"
        lines = []
        lines.append("GeneX — Milestone to Therapy Summary")
        lines.append(f"Generated: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%SZ')}")
        lines.append(f"Child age (reported): {child_age_months} months")
        lines.append("")
        lines.append("Estimated functional levels by category")
        for cat, (level, _) in results_dict.items():
            lines.append(f" - {cat.replace('_',' ').title()}: ~{int(level)} months")
        lines.append("")
        for cat, (level, rep) in results_dict.items():
            row = rep[rep["max_age_months"] == level].iloc[0]
            lines.append(f"{cat.replace('_',' ').title()} — activities")
            acts = [
                (row["activity_1_name"], row["activity_1_url"], row["duration_1"], row["frequency_1"]),
                (row["activity_2_name"], row["activity_2_url"], row["duration_2"], row["frequency_2"]),
                (row["activity_3_name"], row["activity_3_url"], row["duration_3"], row["frequency_3"]),
            ]
            for i, (name, url, dur, freq) in enumerate(acts, start=1):
                if not name:
                    continue
                dosage = " — ".join([p for p in [(dur or '').strip(), (freq or '').strip()] if p])
                lines.append(f"  {i}. {name}" + (f" [{dosage}]" if dosage else ""))
                lines.append(f"     Link: {url}")
            lines.append(f"Sources: {row['source_urls']}")
            lines.append("")
        lines.append("Disclaimer: GeneX suggestions are educational and drawn from reputable child-development sources. They are not a diagnosis. Please consult your pediatrician or therapist for personalized guidance.")

        with open(txt_path, "w", encoding="utf-8") as f:
            f.write("\n".join(lines))
        return txt_path

# Example usage (uncomment after running the interview above):
# out_file = export_results_to_pdf(results, child_age_months=START_AGE, out_path="genex_interview_report.pdf")
# print("Saved report to:", out_file)
