## PHQ-9 Conversations Analyze

PHQ-9 Queations Analyzer

In [2]:
import os
import json
import csv
import re

# ---------- CONFIG ----------
INPUT_FOLDER = "Conversations/PHQ9/Question based Conversation/"        # folder containing all patient JSON files
OUTPUT_CSV = "Analysis/PHQ9/Questionnaire_summary.csv"     # output summary file
# -----------------------------

def extract_patient_name(file_path):
    """Get patient name from JSON or filename."""
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    # most files have the patient’s name as the first key of "Common Questions"
    if "Common Questions" in data and len(data["Common Questions"]) > 0:
        first_item = data["Common Questions"][0]
        for k in first_item.keys():
            if k.lower() not in ("consultant",):
                return k.strip()
    # fallback to filename
    return os.path.splitext(os.path.basename(file_path))[0]

def extract_ratings(file_path):
    """Extract 9 PHQ-9 ratings from Ava_Brooks-style JSON."""
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    ratings = []
    if "Common Questions" in data:
        for item in data["Common Questions"]:
            # each question is a dict with 'Consultant' and '<Name>'
            for k, v in item.items():
                if k.lower() != "consultant" and isinstance(v, str):
                    # find rating numbers like "Rating: 2", "rated it 3", "2—More"
                    match = re.search(r"\b([0-3])\b", v)
                    if match:
                        ratings.append(int(match.group(1)))
                    else:
                        ratings.append(None)
    # Ensure exactly 9 ratings
    ratings = (ratings + [None]*9)[:9]
    return ratings

def main():
    files = [f for f in os.listdir(INPUT_FOLDER) if f.endswith(".json")]
    all_rows = []

    for idx, file_name in enumerate(files, start=1):
        file_path = os.path.join(INPUT_FOLDER, file_name)
        name = extract_patient_name(file_path)
        ratings = extract_ratings(file_path)
        total = sum(r for r in ratings if isinstance(r, int))
        row = {"No.": idx, "Name": name}
        for i, r in enumerate(ratings, start=1):
            row[f"Q{i}"] = r
        row["Total"] = total
        all_rows.append(row)
        print(f"Processed {name} → Total Score: {total}")

    # Write CSV
    fieldnames = ["No.", "Name"] + [f"Q{i}" for i in range(1, 10)] + ["Total"]
    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(all_rows)

    print(f"\n✅ Saved summary to {OUTPUT_CSV}")

if __name__ == "__main__":
    main()


Processed Ahmed Noor → Total Score: 20
Processed Aiden Brooks → Total Score: 15
Processed Aiden Harris → Total Score: 17
Processed Aiden Scott → Total Score: 17
Processed Aisha Rashid → Total Score: 9
Processed Alice Johnson → Total Score: 3
Processed Alice Williams → Total Score: 18
Processed Alyssa Hall → Total Score: 18
Processed Alyssa Scott → Total Score: 19
Processed Amber Reed → Total Score: 17
Processed Amelia Green → Total Score: 17
Processed Amelia Harris → Total Score: 18
Processed Amira El-Sayed → Total Score: 15
Processed Anabelle King → Total Score: 16
Processed Andrew Martinez → Total Score: 15
Processed Anika Patel → Total Score: 5
Processed Anna Morgan → Total Score: 15
Processed Ava Brooks → Total Score: 17
Processed Ava Brown → Total Score: 18
Processed Ava Jensen → Total Score: 15
Processed Ava Martin → Total Score: 17
Processed Ava Martinez → Total Score: 15
Processed Ava Wilson → Total Score: 18
Processed Avery Morgan → Total Score: 16
Processed Avery Rivera → Tot

Casual Conversation Analyzer

In [None]:
import os, json, csv, re
from openai import OpenAI

# -----------------------
# CONFIG
# -----------------------
INPUT_FOLDER = "Conversations/PHQ9/Normal Conversation/"
OUTPUT_CSV = "Analysis/PHQ9/Conversation_summary.csv"
MODEL = "gpt-4o-mini"

client = OpenAI()

PROMPT = """
You are an AI trained to assess depression severity from natural conversations,
NOT by using the PHQ-9, but by analyzing real emotional, cognitive, and behavioral patterns.

Evaluate the PERSON's depression based ONLY on their spoken content in the transcript.

Assess these clinical dimensions, each 0-3:
- Mood & emotional tone (sadness, emptiness, hopelessness)
- Anhedonia (loss of interest/joy)
- Energy/fatigue
- Motivation / initiative
- Sleep disturbance
- Appetite or weight changes
- Cognitive difficulty (focus, decision-making, fogginess)
- Psychomotor change (slowed, restless)
- Self-worth / guilt / self-criticism
- Suicidal ideation / morbid thoughts (if expressed)

Scoring key per item:
0 = none
1 = mild / occasional
2 = moderate or frequent
3 = severe / nearly constant / disabling

Return JSON ONLY in this format:

{
  "Mood": <0-3>,
  "Anhedonia": <0-3>,
  "Energy": <0-3>,
  "Motivation": <0-3>,
  "Sleep": <0-3>,
  "Appetite": <0-3>,
  "Cognition": <0-3>,
  "Psychomotor": <0-3>,
  "SelfWorth": <0-3>,
  "Suicidality": <0-3>,
  "Total": <0-27>,
  "Confidence": "<low/medium/high>"
}
"""

def extract_text(path):
    data = json.load(open(path, "r", encoding="utf-8"))
    return "\n".join([f"{t['speaker']}: {t['text']}" for t in data.get("turns", [])])

def parse_json(txt):
    m = re.search(r"\{.*\}", txt, re.DOTALL)
    if not m: return None
    try: return json.loads(m.group(0))
    except: return None

def score_file(path):
    text = extract_text(path)

    r = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": "You are a clinical conversational inference model."},
            {"role": "user", "content": PROMPT + "\n\nConversation:\n" + text}
        ],
        temperature=0
    ).choices[0].message.content

    scores = parse_json(r)
    return scores

def main():
    os.makedirs(os.path.dirname(OUTPUT_CSV), exist_ok=True)

    results = []
    files = [f for f in os.listdir(INPUT_FOLDER) if f.endswith(".json")]

    for i, f in enumerate(files, 1):
        name = f.replace(".json", "")
        try:
            s = score_file(os.path.join(INPUT_FOLDER, f))
            if not s:
                print(f"⚠️ {name}: failed — retry manually"); continue
            results.append({"Name": name, **s})
            print(f"✓ {name}: CDI={s['Total']} ({s['Confidence']})")
        except Exception as e:
            print(f"❌ {name}: {e}")

    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as out:
        writer = csv.DictWriter(out, fieldnames=results[0].keys())
        writer.writeheader()
        writer.writerows(results)

    print(f"\n✅ Saved → {OUTPUT_CSV}")

if __name__ == "__main__":
    main()

In [1]:
# conversation_depression_inference_v2.py
import os, re, json, csv, time, random
from pathlib import Path
from typing import List, Dict, Optional, Tuple

from dotenv import load_dotenv
from openai import OpenAI

# =========================
# CONFIG
# =========================
INPUT_FOLDER = "Conversations/PHQ9/Normal Conversation"
OUT_DIR      = "Analysis/ConversationOnly"
OUT_SUMMARY  = os.path.join(OUT_DIR, "CDI_scores.csv")
OUT_DETAIL   = os.path.join(OUT_DIR, "CDI_evidence.csv")
LOG_DIR      = os.path.join(OUT_DIR, "_logs")

# Use a stronger model for structured inference
SCORING_MODEL = "gpt-4.1-mini"   # or "gpt-4.1" for best reliability
TEMPERATURE   = 0.0

MAX_CHARS_PER_CHUNK = 9000
MAX_RETRIES         = 3

os.makedirs(OUT_DIR, exist_ok=True)
os.makedirs(LOG_DIR, exist_ok=True)
load_dotenv()
client = OpenAI()  # expects OPENAI_API_KEY in env

# =========================
# DOMAINS (no PHQ-9 words)
# =========================
DOMAINS = [
    ("Mood",        "sadness, emptiness, hopelessness"),
    ("Anhedonia",   "loss of interest or joy"),
    ("Energy",      "low energy, fatigue, drained"),
    ("Motivation",  "reduced drive, difficulty initiating tasks"),
    ("Sleep",       "trouble falling or staying asleep, oversleeping"),
    ("Appetite",    "eating less or more than usual, weight change"),
    ("Cognition",   "poor concentration, mind racing, indecisive"),
    ("Psychomotor", "slowed movement/speech or restlessness/fidgeting"),
    ("SelfWorth",   "self-criticism, guilt, worthlessness"),
    ("Suicidality", "thoughts of death or self-harm (frequency only)"),
]

DOMAIN_KEYS = [d[0] for d in DOMAINS]

# =========================
# PROMPTS
# =========================
SYSTEM_SUMMARY = (
    "You are a careful clinical text rater. You do not diagnose or give advice. "
    "You only extract evidence signals from a person's natural conversation."
)

USER_SUMMARY_TEMPLATE = """
Read the person's statements (friend prompts removed). Extract concise evidence for each domain.
For each domain, return:
- "present": true/false
- "frequency": one of ["none","occasional","often","nearly daily"]
- "intensity": one of ["none","mild","moderate","severe"]
- "quotes": list of 1-3 short paraphrases or brief quotes from the text

Domains (9 + suicidality):
{domain_bullets}

Rules:
- Use only the PERSON's lines (the conversation below already excludes the friend).
- If no clear evidence, set present=false, frequency="none", intensity="none", quotes=[].
- If suicidality is mentioned indirectly, record frequency by how often it appears or is implied.
- Output JSON only, no extra text.

Conversation (PERSON only):
\"\"\"
{person_text}
\"\"\"
"""

SYSTEM_SCORE = (
    "You are a strict JSON scoring function. You convert evidence into 0-3 scores. "
    "Do not provide advice or narrative; return JSON only."
)

USER_SCORE_TEMPLATE = """
Convert the following domain evidence into numeric scores (0–3) using frequency+intensity:

0 = none
1 = mild / occasional
2 = moderate / often
3 = severe / nearly daily

Domains and evidence (JSON):
{evidence_json}

Return JSON ONLY:
{{
  "Mood": <0-3>,
  "Anhedonia": <0-3>,
  "Energy": <0-3>,
  "Motivation": <0-3>,
  "Sleep": <0-3>,
  "Appetite": <0-3>,
  "Cognition": <0-3>,
  "Psychomotor": <0-3>,
  "SelfWorth": <0-3>,
  "Suicidality": <0-3>,
  "Total": <0-27>,
  "Confidence": "<low|medium|high>"
}}
"""

JSON_RE = re.compile(r"\{.*\}", re.DOTALL)

# =========================
# HELPERS
# =========================
def read_person_only_text(path: str) -> Tuple[str, str]:
    """Return (person_only_text, name)."""
    data = json.load(open(path, "r", encoding="utf-8"))
    name = data.get("character", Path(path).stem)
    turns = data.get("turns", [])
    # keep only the person's lines
    person_lines = []
    for t in turns:
        sp = t.get("speaker", "")
        tx = (t.get("text") or "").strip()
        if not tx:
            continue
        if sp.lower() != "friend":   # everything not 'Friend' is the person
            person_lines.append(tx)
    text = "\n\n".join(person_lines)
    return text, name

def soft_chunks(text: str, max_chars=9000) -> List[str]:
    if len(text) <= max_chars:
        return [text]
    parts, acc, total = [], [], 0
    for para in text.split("\n\n"):
        if total + len(para) + 2 > max_chars and acc:
            parts.append("\n\n".join(acc))
            acc, total = [], 0
        acc.append(para); total += len(para) + 2
    if acc: parts.append("\n\n".join(acc))
    return parts

def json_from_text(txt: str) -> Optional[dict]:
    if not txt: return None
    t = txt.strip()
    if t.startswith("```"):
        t = re.sub(r"^```(?:json)?\s*", "", t, flags=re.IGNORECASE)
        t = re.sub(r"\s*```$", "", t)
    m = JSON_RE.search(t)
    if not m: return None
    try:
        return json.loads(m.group(0))
    except Exception:
        return None

def retry_chat(messages, tag: str) -> Optional[dict]:
    """Call model and return parsed JSON, with retries & logs."""
    for attempt in range(1, MAX_RETRIES+1):
        try:
            resp = client.chat.completions.create(
                model=SCORING_MODEL,
                temperature=TEMPERATURE,
                messages=messages
            )
            txt = resp.choices[0].message.content
            obj = json_from_text(txt)
            if obj is not None:
                return obj
            # log bad payload
            with open(os.path.join(LOG_DIR, f"{tag}_attempt{attempt}.txt"), "w", encoding="utf-8") as f:
                f.write(txt or "[EMPTY]")
        except Exception as e:
            with open(os.path.join(LOG_DIR, f"{tag}_ERROR_attempt{attempt}.txt"), "w", encoding="utf-8") as f:
                f.write(f"{type(e).__name__}: {e}")
        time.sleep(1.0 + random.random() * attempt)
    return None

def frequency_to_num(freq: str) -> int:
    """Fallback mapping if scoring pass fails."""
    if not freq: return 0
    f = freq.strip().lower()
    if "nearly" in f: return 3
    if "often" in f:  return 2
    if "occas" in f: return 1
    return 0

def combine_scores(chunk_scores: List[dict], confidences: List[str]) -> Tuple[dict, int, str]:
    """Confidence-weighted average across chunks, then recompute Total."""
    weights = []
    for c in confidences:
        cl = (c or "").lower()
        weights.append(0.5 if cl=="low" else 1.0 if cl=="medium" else 1.5 if cl=="high" else 0.8)

    agg = {k: [] for k in DOMAIN_KEYS}
    for sc, w in zip(chunk_scores, weights):
        if not sc: continue
        for k in DOMAIN_KEYS:
            v = sc.get(k, None)
            if isinstance(v, (int, float)):
                agg[k].append((float(v), w))

    final = {}
    for k in DOMAIN_KEYS:
        vals = agg[k]
        if not vals:
            final[k] = None
            continue
        num = sum(v*w for v,w in vals)
        den = sum(w for _,w in vals)
        final[k] = int(round(num / max(den, 1e-8)))

    items = [final[k] for k in DOMAIN_KEYS if final[k] is not None and k != "Suicidality"] + \
            ([final["Suicidality"]] if final.get("Suicidality") is not None else [])
    total = int(sum(items)) if items else None

    # overall confidence: mean of weights → label
    mw = sum(weights)/len(weights) if weights else 0.8
    overall = "low" if mw < 0.8 else "medium" if mw < 1.2 else "high"

    return final, total, overall

# =========================
# CORE PIPELINE (per file)
# =========================
def score_file(path: str) -> Tuple[dict, dict]:
    """
    Returns (summary_row, detail_rows_dict)
    summary_row: {Name, Mood..Suicidality, Total, Confidence}
    detail_rows_dict: {"Name":..., "Chunk": i, <domain>_present, <domain>_freq, ...}
    """
    person_text, name = read_person_only_text(path)
    chunks = soft_chunks(person_text, MAX_CHARS_PER_CHUNK)

    domain_bullets = "\n".join([f"- {k}: {desc}" for k,desc in DOMAINS])

    chunk_scores = []
    chunk_conf   = []
    detail_rows  = []

    for idx, ch in enumerate(chunks, start=1):
        # Stage 1: extract evidence
        ev_obj = retry_chat(
            [
                {"role":"system", "content": SYSTEM_SUMMARY},
                {"role":"user",   "content": USER_SUMMARY_TEMPLATE.format(domain_bullets=domain_bullets, person_text=ch)}
            ],
            tag=f"{Path(path).stem}_chunk{idx}_evidence"
        )

        if ev_obj is None:
            # make a minimal empty evidence shell
            ev_obj = {k: {"present": False, "frequency": "none", "intensity": "none", "quotes": []} for k in DOMAIN_KEYS}

        # Stage 2: convert evidence -> numeric scores
        sc_obj = retry_chat(
            [
                {"role":"system", "content": SYSTEM_SCORE},
                {"role":"user",   "content": USER_SCORE_TEMPLATE.format(evidence_json=json.dumps(ev_obj, ensure_ascii=False))}
            ],
            tag=f"{Path(path).stem}_chunk{idx}_score"
        )

        # Fallback if stage 2 failed: compute quick scores from frequency only
        if sc_obj is None:
            sc_obj = {}
            for k in DOMAIN_KEYS:
                freq = (ev_obj.get(k, {}) or {}).get("frequency", "none")
                sc_obj[k] = frequency_to_num(freq)
            sc_obj["Total"] = int(sum(sc_obj[k] for k in DOMAIN_KEYS))
            sc_obj["Confidence"] = "low"

        # keep per-chunk for aggregation
        chunk_scores.append({k: sc_obj.get(k) for k in DOMAIN_KEYS})
        chunk_conf.append(sc_obj.get("Confidence","medium"))

        # detail row (evidence preview)
        for k in DOMAIN_KEYS:
            ev = ev_obj.get(k, {}) or {}
            detail_rows.append({
                "Name": name, "Chunk": idx, "Domain": k,
                "Present": ev.get("present"),
                "Frequency": ev.get("frequency"),
                "Intensity": ev.get("intensity"),
                "Quotes": " | ".join((ev.get("quotes") or [])[:3]),
                "Score": sc_obj.get(k)
            })

    # Aggregate across chunks
    final_items, final_total, final_conf = combine_scores(chunk_scores, chunk_conf)

    summary = {"Name": name, **final_items, "Total": final_total, "Confidence": final_conf}
    return summary, detail_rows

# =========================
# MAIN
# =========================
def main():
    files = [f for f in os.listdir(INPUT_FOLDER) if f.lower().endswith(".json")]
    files.sort()

    summaries = []
    all_details = []

    for i, fname in enumerate(files, start=1):
        path = os.path.join(INPUT_FOLDER, fname)
        try:
            summary, details = score_file(path)
            summaries.append(summary)
            all_details.extend(details)
            print(f"✓ {summary['Name']}: CDI={summary['Total']} ({summary['Confidence']})")
        except Exception as e:
            print(f"⚠️ {fname}: {type(e).__name__}: {e}")

    if summaries:
        # write summary
        fieldnames = ["Name"] + DOMAIN_KEYS + ["Total","Confidence"]
        with open(OUT_SUMMARY, "w", newline="", encoding="utf-8") as f:
            w = csv.DictWriter(f, fieldnames=fieldnames)
            w.writeheader(); w.writerows(summaries)
        print(f"✅ Saved summary → {OUT_SUMMARY}")

    if all_details:
        # write evidence detail
        det_fields = ["Name","Chunk","Domain","Present","Frequency","Intensity","Quotes","Score"]
        with open(OUT_DETAIL, "w", newline="", encoding="utf-8") as f:
            w = csv.DictWriter(f, fieldnames=det_fields)
            w.writeheader(); w.writerows(all_details)
        print(f"✅ Saved evidence → {OUT_DETAIL}")

if __name__ == "__main__":
    main()


✓ Ahmed Noor: CDI=18 (high)
✓ Aiden Brooks: CDI=9 (high)
✓ Aiden Harris: CDI=9 (high)
✓ Aiden Scott: CDI=12 (high)
✓ Aisha Rashid: CDI=9 (high)
✓ Alice Johnson: CDI=0 (high)
✓ Alice Williams: CDI=9 (high)
✓ Alyssa Hall: CDI=13 (high)
✓ Alyssa Scott: CDI=14 (high)
✓ Amber Reed: CDI=14 (high)
✓ Amelia Green: CDI=15 (high)
✓ Amelia Harris: CDI=15 (high)
✓ Amira El-Sayed: CDI=13 (high)
✓ Anabelle King: CDI=10 (high)
✓ Andrew Martinez: CDI=9 (high)
✓ Anika Patel: CDI=7 (high)
✓ Anna Morgan: CDI=10 (high)
✓ Ava Brooks: CDI=14 (high)
✓ Ava Brown: CDI=13 (high)
✓ Ava Jensen: CDI=15 (high)
✓ Ava Martin: CDI=9 (high)
✓ Ava Martinez: CDI=13 (high)
✓ Ava Wilson: CDI=17 (high)
✓ Avery Morgan: CDI=7 (high)
✓ Avery Rivera: CDI=14 (high)
✓ Ben Cooper: CDI=14 (high)
✓ Benjamin Hall: CDI=15 (high)
✓ Brian Lee: CDI=2 (high)
✓ Caleb Brooks: CDI=16 (high)
✓ Caleb Thompson: CDI=15 (high)
✓ Carlos Rodriguez: CDI=6 (high)
✓ Carmen Rodriguez: CDI=12 (high)
✓ Carter James: CDI=12 (high)
✓ Carter Scott: CDI=15 (