# üß† Questionnaire & Conversation Rating Pipeline

This notebook extracts and rates responses for:
- **PHQ-9** (depression)
- **GAD-7** (anxiety)
- **ASRM** (activation/mania)
- **Casual conversations** (free talk between persona and friend)

Each section:
1. Parses the JSONs from `Conversations/`
2. Extracts choice-based scores for structured questionnaires
3. Uses an LLM to infer scores from **casual dialogues**
4. Saves results in `Analysis/` as CSV files for further correlation studies

In [6]:
import os, json, csv, re
from pathlib import Path
from dotenv import load_dotenv
from openai import OpenAI

# Load environment (for OPENAI_API_KEY)
load_dotenv()
client = OpenAI()

# Utility
def ensure_dir(path):
    os.makedirs(os.path.dirname(path), exist_ok=True)


## üìò PHQ-9 Questionnaire Ratings
Extracts numeric scores from `Conversations/PHQ9/Question based Conversation/*.json`  
Each answer includes `Choice:` lines like ‚ÄúNot at all‚Äù, ‚ÄúSeveral days‚Äù, etc.

In [7]:
# =================== PHQ-9 RATING SCRIPT ===================

INPUT_DIR = "Conversations/PHQ9/Question based Conversation"
OUTPUT_CSV = "Analysis/PHQ9/Questionnaire_summary.csv"
ensure_dir(OUTPUT_CSV)

CHOICE_MAP = {
    "not at all": 0,
    "several days": 1,
    "more than half the days": 2,
    "nearly every day": 3,
}

def extract_choice(answer: str):
    if not answer:
        return None
    text = answer.lower()
    m = re.search(r"choice\s*:\s*(.+)", text)
    choice = m.group(1).strip() if m else text
    for label, val in CHOICE_MAP.items():
        if label in choice:
            return val
    return None

def get_character_name(data, filename):
    if data.get("character"):
        return data["character"]
    items = data.get("Common Questions", [])
    if items:
        keys = list(items[0].keys())
        if "Consultant" in keys:
            keys.remove("Consultant")
        if keys:
            return keys[0]
    return os.path.splitext(filename)[0]

rows = []
for idx, fname in enumerate(sorted(os.listdir(INPUT_DIR)), start=1):
    if not fname.endswith(".json"): continue
    path = os.path.join(INPUT_DIR, fname)
    data = json.load(open(path, "r", encoding="utf-8"))
    name = get_character_name(data, fname)
    items = data.get("Common Questions", [])
    scores = [extract_choice(q.get(name, "")) for q in items][:9]
    total = sum(s for s in scores if s is not None) if all(s is not None for s in scores) else None
    row = {"No.": idx, "Name": name, **{f"Q{i}": s for i, s in enumerate(scores, 1)}, "Total": total}
    rows.append(row)

with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["No.","Name"]+[f"Q{i}" for i in range(1,10)]+["Total"])
    writer.writeheader()
    writer.writerows(rows)

print(f"‚úÖ PHQ-9 summary saved ‚Üí {OUTPUT_CSV}")


‚úÖ PHQ-9 summary saved ‚Üí Analysis/PHQ9/Questionnaire_summary.csv


## üòü GAD-7 Questionnaire Ratings
Extracts numeric scores from `Conversations/GAD7/Question based Conversation/*.json`.


In [8]:
# =================== GAD-7 RATING SCRIPT ===================

INPUT_DIR = "Conversations/GAD7/Question based Conversation"
OUTPUT_CSV = "Analysis/GAD7/Questionnaire_summary.csv"
ensure_dir(OUTPUT_CSV)

CHOICE_MAP = {
    "not at all": 0,
    "several days": 1,
    "more than half the days": 2,
    "nearly every day": 3,
}

def extract_choice(answer: str):
    if not answer: return None
    text = answer.lower()
    m = re.search(r"choice\s*:\s*(.+)", text)
    choice = m.group(1).strip() if m else text
    for label, val in CHOICE_MAP.items():
        if label in choice: return val
    return None

rows = []
for idx, fname in enumerate(sorted(os.listdir(INPUT_DIR)), start=1):
    if not fname.endswith(".json"): continue
    data = json.load(open(os.path.join(INPUT_DIR, fname), "r", encoding="utf-8"))
    name = data.get("character") or os.path.splitext(fname)[0]
    items = data.get("Common Questions", [])
    scores = [extract_choice(q.get(name, "")) for q in items][:7]
    total = sum(s for s in scores if s is not None) if all(s is not None for s in scores) else None
    rows.append({"No.": idx, "Name": name, **{f"Q{i}": s for i, s in enumerate(scores, 1)}, "Total": total})

with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["No.","Name"]+[f"Q{i}" for i in range(1,8)]+["Total"])
    writer.writeheader()
    writer.writerows(rows)

print(f"‚úÖ GAD-7 summary saved ‚Üí {OUTPUT_CSV}")


‚úÖ GAD-7 summary saved ‚Üí Analysis/GAD7/Questionnaire_summary.csv


## ‚ö° ASRM Questionnaire Ratings
Extracts numeric scores from `Conversations/ASRM/Question based Conversation/*.json`.


In [9]:
# =================== ASRM RATING SCRIPT ===================

INPUT_DIR = "Conversations/ASRM/Question based Conversation"
OUTPUT_CSV = "Analysis/ASRM/Questionnaire_summary.csv"
ensure_dir(OUTPUT_CSV)

CHOICE_MAP = {
    "never": 0,
    "rarely": 1,
    "sometimes": 2,
    "often": 3,
    "very often": 4,
}

def extract_choice(answer: str):
    if not answer: return None
    text = answer.lower()
    m = re.search(r"choice\s*:\s*(.+)", text)
    choice = m.group(1).strip() if m else text
    if "very often" in choice: return 4
    for label, val in CHOICE_MAP.items():
        if label in choice: return val
    return None

rows = []
for idx, fname in enumerate(sorted(os.listdir(INPUT_DIR)), start=1):
    if not fname.endswith(".json"): continue
    data = json.load(open(os.path.join(INPUT_DIR, fname), "r", encoding="utf-8"))
    name = data.get("character") or os.path.splitext(fname)[0]
    items = data.get("Common Questions", [])
    scores = [extract_choice(q.get(name, "")) for q in items][:5]
    total = sum(s for s in scores if s is not None) if all(s is not None for s in scores) else None
    rows.append({"No.": idx, "Name": name, **{f"Q{i}": s for i, s in enumerate(scores, 1)}, "Total": total})

with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["No.","Name"]+[f"Q{i}" for i in range(1,6)]+["Total"])
    writer.writeheader()
    writer.writerows(rows)

print(f"‚úÖ ASRM summary saved ‚Üí {OUTPUT_CSV}")


‚úÖ ASRM summary saved ‚Üí Analysis/ASRM/Questionnaire_summary.csv


## ‚ú® Casual Conversation Ratings
Extracts numeric scores from `Conversations/Casual/*.json`.


In [10]:
# =================== CASUAL CONVERSATION RATER ===================

INPUT_DIR  = "Conversations/Casual"
OUTPUT_CSV = "Analysis/Casual/Casual_summary.csv"
LOG_DIR    = "Analysis/Casual/_logs"

SCORING_MODEL = "gpt-4.1-mini"  # or "gpt-4.1"
TEMPERATURE   = 0.0

ensure_dir(OUTPUT_CSV)
os.makedirs(LOG_DIR, exist_ok=True)

PROMPT = """
You are an AI rater. Based ONLY on the person's statements in this casual conversation,
estimate likely scores on:
- PHQ-9 (9 items, 0‚Äì3)
- GAD-7 (7 items, 0‚Äì3)
- ASRM (5 items, 0‚Äì4)

Infer from mood, energy, anxiety, sleep, speech, confidence, risk, etc.
If not evident, score the lowest. Output JSON ONLY with this structure:

{
  "PHQ9": {"Q1":0-3,...,"Q9":0-3,"Total":0-27},
  "GAD7": {"Q1":0-3,...,"Q7":0-3,"Total":0-21},
  "ASRM": {"Q1":0-4,...,"Q5":0-4,"Total":0-20}
}
"""

def read_person_lines(path):
    data = json.load(open(path, "r", encoding="utf-8"))
    name = data.get("character", Path(path).stem)
    lines = [t["text"] for t in data.get("turns", []) if t.get("speaker","").lower() != "friend"]
    return name, "\n".join(lines)

def parse_json_response(txt):
    try:
        match = re.search(r"\{.*\}", txt, re.DOTALL)
        if match: return json.loads(match.group(0))
    except Exception:
        return None

def rate_convo(name, convo):
    for attempt in range(3):
        try:
            resp = client.chat.completions.create(
                model=SCORING_MODEL,
                temperature=TEMPERATURE,
                messages=[
                    {"role": "system", "content": "You are a precise psychological scoring assistant."},
                    {"role": "user", "content": PROMPT + "\n\nConversation:\n" + convo}
                ],
            )
            result = parse_json_response(resp.choices[0].message.content)
            if result: return result
        except Exception as e:
            with open(os.path.join(LOG_DIR, f"{name}_error.txt"), "w") as f:
                f.write(str(e))
    return None

def safe_get(d, *path, default=None):
    for p in path:
        if isinstance(d, dict) and p in d:
            d = d[p]
        else:
            return default
    return d

rows = []
for i, fname in enumerate(sorted(os.listdir(INPUT_DIR)), start=1):
    if not fname.endswith(".json"): continue
    name, convo = read_person_lines(os.path.join(INPUT_DIR, fname))
    result = rate_convo(name, convo)
    if not result:
        print(f"‚ö†Ô∏è {name}: no valid JSON")
        continue
    phq, gad, asrm = result.get("PHQ9", {}), result.get("GAD7", {}), result.get("ASRM", {})
    row = {
        "No.": i, "Name": name,
        **{f"PHQ9_Q{j}": safe_get(phq, f"Q{j}") for j in range(1,10)},
        "PHQ9_Total": safe_get(phq, "Total"),
        **{f"GAD7_Q{j}": safe_get(gad, f"Q{j}") for j in range(1,8)},
        "GAD7_Total": safe_get(gad, "Total"),
        **{f"ASRM_Q{j}": safe_get(asrm, f"Q{j}") for j in range(1,6)},
        "ASRM_Total": safe_get(asrm, "Total"),
    }
    rows.append(row)
    print(f"‚úì {name}: PHQ9={row['PHQ9_Total']}  GAD7={row['GAD7_Total']}  ASRM={row['ASRM_Total']}")

if rows:
    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
        writer.writeheader()
        writer.writerows(rows)
    print(f"\n‚úÖ Casual ratings saved ‚Üí {OUTPUT_CSV}")
else:
    print("No valid ratings generated.")


‚úì Abigail Hall: PHQ9=12  GAD7=18  ASRM=2
‚úì Abigail Johnson: PHQ9=8  GAD7=6  ASRM=6
‚úì Adriana Lopez: PHQ9=8  GAD7=12  ASRM=8
‚úì Aiden Scott: PHQ9=5  GAD7=6  ASRM=7
‚úì Aiden Smith: PHQ9=13  GAD7=17  ASRM=7
‚úì Aiden White: PHQ9=14  GAD7=15  ASRM=11
‚úì Alice Thompson: PHQ9=5  GAD7=9  ASRM=5
‚úì Alyssa Taylor: PHQ9=8  GAD7=10  ASRM=3
‚úì Alyssa Turner: PHQ9=6  GAD7=10  ASRM=7
‚úì Amelia Lopez: PHQ9=9  GAD7=11  ASRM=8
‚úì Amy Wilson: PHQ9=21  GAD7=19  ASRM=4
‚úì Angela Martinez: PHQ9=8  GAD7=10  ASRM=8
‚úì Anthony Robinson: PHQ9=14  GAD7=17  ASRM=10
‚úì Anthony White: PHQ9=10  GAD7=12  ASRM=7
‚úì Anthony Young: PHQ9=14  GAD7=18  ASRM=4
‚úì Anya Kim: PHQ9=14  GAD7=16  ASRM=4
‚úì Aria Watson: PHQ9=13  GAD7=17  ASRM=4
‚úì Ariana Wright: PHQ9=13  GAD7=15  ASRM=4
‚úì Ava Green: PHQ9=15  GAD7=17  ASRM=4
‚úì Ava Johnson: PHQ9=14  GAD7=16  ASRM=4
‚úì Ava Kim: PHQ9=16  GAD7=16  ASRM=4
‚úì Ava Nguyen: PHQ9=15  GAD7=16  ASRM=4
‚úì Ava Roberts: PHQ9=13  GAD7=16  ASRM=3
‚úì Avery Johnson: PHQ9=