## PHQ-9 Conversations Analyze

PHQ-9 Queations Analyzer

In [None]:
import os
import json
import csv
import re

# ---------- CONFIG ----------
INPUT_FOLDER = "Conversations/PHQ9/Question based Conversation/"        # folder containing all patient JSON files
OUTPUT_CSV = "Analysis/PHQ9/Questionnaire_summary.csv"     # output summary file
# -----------------------------

def extract_patient_name(file_path):
    """Get patient name from JSON or filename."""
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    # most files have the patient’s name as the first key of "Common Questions"
    if "Common Questions" in data and len(data["Common Questions"]) > 0:
        first_item = data["Common Questions"][0]
        for k in first_item.keys():
            if k.lower() not in ("consultant",):
                return k.strip()
    # fallback to filename
    return os.path.splitext(os.path.basename(file_path))[0]

def extract_ratings(file_path):
    """Extract 9 PHQ-9 ratings from Ava_Brooks-style JSON."""
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    ratings = []
    if "Common Questions" in data:
        for item in data["Common Questions"]:
            # each question is a dict with 'Consultant' and '<Name>'
            for k, v in item.items():
                if k.lower() != "consultant" and isinstance(v, str):
                    # find rating numbers like "Rating: 2", "rated it 3", "2—More"
                    match = re.search(r"\b([0-3])\b", v)
                    if match:
                        ratings.append(int(match.group(1)))
                    else:
                        ratings.append(None)
    # Ensure exactly 9 ratings
    ratings = (ratings + [None]*9)[:9]
    return ratings

def main():
    files = [f for f in os.listdir(INPUT_FOLDER) if f.endswith(".json")]
    all_rows = []

    for idx, file_name in enumerate(files, start=1):
        file_path = os.path.join(INPUT_FOLDER, file_name)
        name = extract_patient_name(file_path)
        ratings = extract_ratings(file_path)
        total = sum(r for r in ratings if isinstance(r, int))
        row = {"No.": idx, "Name": name}
        for i, r in enumerate(ratings, start=1):
            row[f"Q{i}"] = r
        row["Total"] = total
        all_rows.append(row)
        print(f"Processed {name} → Total Score: {total}")

    # Write CSV
    fieldnames = ["No.", "Name"] + [f"Q{i}" for i in range(1, 10)] + ["Total"]
    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(all_rows)

    print(f"\n✅ Saved summary to {OUTPUT_CSV}")

if __name__ == "__main__":
    main()


Casual Conversation Analyzer

In [None]:
import os
import json
import csv
import re
from openai import OpenAI

# ---------- CONFIG ----------
INPUT_FOLDER = "Conversations/PHQ9/Normal Conversation/"
OUTPUT_CSV = "analysis/Normal_estimates.csv"
MODEL_NAME = "gpt-4o-mini"   # or "gpt-4o-mini"
# -----------------------------

client = OpenAI()

PHQ9_PROMPT = """
You are a clinician scoring the PHQ-9 depression questionnaire.
Based only on this conversation, estimate the patient's scores for each PHQ-9 item (Q1–Q9),
where each item is rated 0–3 (0=Not at all, 1=Several days, 2=More than half the days, 3=Nearly every day).
Provide your answer in *strict JSON only* using this exact structure:
{
  "Q1": <int>, "Q2": <int>, "Q3": <int>, "Q4": <int>, "Q5": <int>,
  "Q6": <int>, "Q7": <int>, "Q8": <int>, "Q9": <int>, "Total": <int>
}
Do not include any explanation or extra text.
"""

def extract_patient_name(file_path):
    """Get patient name from JSON or filename."""
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    if isinstance(data, dict) and "character" in data:
        return data["character"]
    return os.path.splitext(os.path.basename(file_path))[0]

def get_conversation_text(file_path):
    """Join all dialogue turns into a readable transcript."""
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    if "turns" in data:
        return "\n".join([f"{t['speaker']}: {t['text']}" for t in data["turns"]])
    return json.dumps(data, ensure_ascii=False)

def safe_parse_json(text):
    """Extract a JSON object from possibly messy model output."""
    if not text:
        return None
    match = re.search(r"\{.*\}", text, re.DOTALL)
    if match:
        try:
            return json.loads(match.group(0))
        except json.JSONDecodeError:
            return None
    return None

def estimate_phq9(file_path):
    """Send the conversation to GPT and parse PHQ-9 scores."""
    conversation = get_conversation_text(file_path)

    try:
        response = client.responses.create(
            model=MODEL_NAME,
            input=[
                {"role": "system", "content": "You are a clinical assessment assistant."},
                {"role": "user", "content": PHQ9_PROMPT + "\n\nConversation:\n" + conversation}
            ],
            temperature=0
        )

        # New SDK: extract full text output correctly
        output_text = ""
        for output_item in response.output:
            if hasattr(output_item, "content"):
                for content_part in output_item.content:
                    if getattr(content_part, "type", None) == "output_text":
                        output_text += content_part.text

        scores = safe_parse_json(output_text)
        if not scores:
            raise ValueError("Model returned no valid JSON.")
        return scores

    except Exception as e:
        print(f"⚠️ Error parsing response for {file_path}: {e}")
        return {f"Q{i}": None for i in range(1, 10)} | {"Total": None}

def main():
    files = [f for f in os.listdir(INPUT_FOLDER) if f.endswith(".json")]
    all_results = []

    for idx, file_name in enumerate(files, start=1):
        file_path = os.path.join(INPUT_FOLDER, file_name)
        patient_name = extract_patient_name(file_path)
        scores = estimate_phq9(file_path)
        row = {"No.": idx, "Name": patient_name, **scores}
        all_results.append(row)
        print(f"Processed {patient_name} → Total Score: {scores.get('Total')}")

    # Write results
    fieldnames = ["No.", "Name"] + [f"Q{i}" for i in range(1, 10)] + ["Total"]
    with open(OUTPUT_CSV, "w", newline="", encoding="utf-8") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(all_results)

    print(f"\n✅ Results saved to {OUTPUT_CSV}")

if __name__ == "__main__":
    main()
