In [16]:
import pandas as pd
import os

In [17]:
# Fixed file path for output
OUT_PATH = "data/Onboarding_Q&A.csv"

In [18]:
# Persona name variable (can be pre-filled or left blank to ask)
PERSONA_NAME = "test subject"  # e.g., "Sarah Ahmed"

In [19]:
# --- EXACT headers already in your CSV (copy-pasted from your message) ---
HEADERS = pd.read_csv(OUT_PATH, nrows=0).columns.tolist()

In [20]:
# --- Friendly prompts to collect answers (MCQs shown in brackets for guidance) ---
PROMPTS = {
    "Persona": "Persona name (e.g., 'Sarah Ahmed'): ",
    "Preferred name or nickname": "What is your preferred name or nickname?",
    "Age": "What is your age?",
    "First language(s)": "What is your first language? Any other languages you speak?",
    "Personality (3â€“5 words)": "Describe your personality in 3–5 words (e.g., calm, funny, thoughtful, direct):",
    "Preferred communication tone(s)": "How do you prefer to communicate? [Formal; Casual; Empathetic; Witty; Sarcastic; To-the-point; Other]",
    "Tones/styles to avoid": "What tones/styles should be avoided? (e.g., condescending, overly emotional)",
    "Response length preference": "Do you prefer short or detailed responses? [Short; Detailed; Depends]",
    "Use of emojis/exclamations": "Should the assistant use emojis/exclamations? [Yes; No; I don’t mind]",
    "Words/expressions to avoid": "Any words/expressions/tones that frustrate or bother you?",
    "Explanation style (clear vs. simple)": "Explanation style? [Explained clearly; Kept simple; Depends]",
    "Relationship status": "What is your relationship status? [Single; Married; Divorced; Widowed; Other]",
    "Children (count)": "Do you have children? If yes, how many? (put a number; 0 if none)",
    "Children details (genders/ages)": "Children details (genders/ages) [Boy; Girl; Other] (optional):",
    "Living situation (alone/with)": "Do you live alone or with someone? [Alone; With family; With caregiver; Other]",
    "Who do you live with?": "Who do you live with? [Spouse; Parent; Adult children; Sibling; Flatmate; Carer; Other]",
    "Events per week": "How often do you attend events per week? [None; 1–2; 3–4; 5 or more]",
    "Event types usually attended": "What kinds of events do you attend? [Religious; Medical; Family visits; Club meetings; Online calls; Other]",
    "Hospital/clinic visits per month": "Hospital/clinic visits per month? [0; 1; 2–3; More than 3; It varies]",
    "Who you frequently talk to": "Who do you frequently talk to? [Family; Friends; Carers; Colleagues; Other]",
    "Where conversations usually happen": "Where are you usually during conversations? [Home; Clinic; Work; Social Events; Online; Other]",
    "Common conversation types": "Common conversation types? [Small talk; Emotional support; Making requests; Jokes/humour; Giving instructions; Other]",
    "People you'd like help communicating with": "People you'd like help communicating with? (e.g., doctor, sibling, manager)",
    "Situations to sound more (confident/polite/warm/firm/funny/professional)": "Situations you'd like help sounding more… [Confident; Polite; Warm; Firm; Funny; Professional; Other]",
    "Conversations that cause stress/fatigue": "Conversations that cause stress or fatigue?",
    "Misunderstood tone? (Yes/No/Sometimes)": "Do people sometimes misunderstand your tone/intent? [Yes; No; Sometimes]",
    "Hobbies/interests": "What are your hobbies or interests?",
    "Topics enjoyed most": "What topics do you enjoy most?",
    "Topics to avoid": "What topics would you prefer to avoid?",
    "Moments/topics that bring joy/comfort": "What moments/topics bring you joy or comfort?",
    "Stories/memories/songs you return to": "Stories, memories, or songs you often return to?",
    "How you express when low/frustrated": "How do you usually express yourself when feeling low or frustrated?",
    "Something you'd like to say more often": "Something you'd like to say more often but find hard to express?",
    "Traits/values that matter most": "Traits/values that matter most? [Honesty; Kindness; Humour; Clarity; Privacy; Other]",
    "Offensive jokes? (Y/N) + details": "Are there offensive jokes/topics? [Y/N] If yes, please specify:",
    "Biggest source of emotional strength": "Biggest source of emotional strength? [Faith; Family; Friends; Music; Personal values; Other]",
    "Favourite phrases/sayings": "Favourite phrases or sayings?",
    "Reflect religion/culture/values? (Y/N/Specify)": "Reflect your religion/culture/values in responses? [Yes; No; Please specify]",
    "Remember past conversations? (Y/N/Ask)": "Remember past conversations and bring them up? [Yes; No; Ask me each time]",
    "Default emotional tone (optimistic/realistic/etc.)": "Default emotional tone? [Optimistic; Realistic; Grateful; Calm; Other]",
    "Humour style (light/dry/puns/silly/none/other)": "Humour style? [Light; Dry; Puns; Silly; None; Other]",
    "Effort to select/type a response": "Effort to select/type a response? [Very little; Moderate effort; High effort]",
    "Assistant behaviour (suggest/wait/mix)": "Assistant behaviour? [Suggest replies; Wait for me; Mix of both]",
    "Fatigue/attention notes": "Fatigue/attention notes we should consider?",
    "How quickly to get to the point": "How quickly to ‘get to the point’? [Very quickly; Naturally; With warmth and buildup]",
    "Assistive tools used": "Assistive communication tools? [Eye-tracking; Head switches; Speech-to-text; AAC device; Other]",
    "Preferred device": "Preferred device? [Tablet; Desktop; Mobile phone; Smartwatch; Other]",
    "Typical conversation length": "How long can you typically engage before rest? [<5 min; 5–10 min; 10–20 min; >20 min]",
}

In [21]:
# --- Load existing (to ensure we stick to these columns) ---
if os.path.exists(OUT_PATH):
    try:
        existing = pd.read_csv(OUT_PATH, encoding="utf-8-sig")
    except Exception:
        existing = pd.read_csv(OUT_PATH)  # fallback
    # Ensure the file's columns match the expected HEADERS
    missing_in_file = [c for c in HEADERS if c not in existing.columns]
    extra_in_file   = [c for c in existing.columns if c not in HEADERS]
    if missing_in_file:
        raise ValueError(f"Existing CSV is missing expected columns: {missing_in_file}")
    if extra_in_file:
        # Keep only the expected columns in the order of HEADERS
        existing = existing[[c for c in existing.columns if c in HEADERS]]
else:
    # Create an empty DataFrame with the exact headers
    existing = pd.DataFrame(columns=HEADERS)


In [22]:
# --- Collect answers (one persona per run) ---
row = {col: "" for col in HEADERS}
print("Please answer the onboarding questions below. Press Enter to skip any field.\n")
for col in HEADERS:
    prompt = PROMPTS.get(col, f"{col}:")
    # Persona first if empty
    if col == "Persona" and row[col] == "":
        row[col] = input(prompt + "\n> ").strip()
    elif col != "Persona":
        row[col] = input(f"\n{prompt}\n> ").strip()

Please answer the onboarding questions below. Press Enter to skip any field.



In [23]:
# --- Append and save with EXACT columns and order ---
new_row_df = pd.DataFrame([row], columns=HEADERS)
out_df = pd.concat([existing[HEADERS], new_row_df], ignore_index=True)
out_df.to_csv(OUT_PATH, index=False, encoding="utf-8-sig")

print(f"✅ Saved/updated: {OUT_PATH} (columns preserved exactly)")

✅ Saved/updated: data/Onboarding_Q&A.csv (columns preserved exactly)
