# Imports + Config

In [None]:
import json
import requests
import pandas as pd
!pip install --upgrade typing_extensions
!pip install pydantic
from pydantic import BaseModel
from typing import List

# Ollama Config
OLLAMA_NODE = "arctrdgn001"
BASE_URL = f"http://{OLLAMA_NODE}:11434/api/chat"

model = "gemma3-optimized:27b" # qwq:latest



#  Grabbing DAIC-WOZ data (Data Loader)

In [None]:
test_transcript = pd.read_csv(r"/data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/305_P/305_TRANSCRIPT.csv", sep="\t")
train_phq8_dataset = pd.read_csv(r"/data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/train_split_Depression_AVEC2017.csv")
train_phq8_dataset = pd.read_csv(r"/data/datasets/train_split_Depression.csv")


#/data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/documents/PHQ8.pdf

# Reformatting transcript data to be a string with speaker name + text
patient_transcript = '\n'.join(test_transcript['speaker'] + ': ' + test_transcript['value'])

print(patient_transcript[:677] + "\n")
print(train_phq8_dataset.iloc[2])



Ellie: hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment think of me as a friend i don't judge i can't i'm a computer  
Ellie: i'm here to learn about people and would love to learn about you i'll ask a few questions to get us started and please feel free to tell me anything your answers are totally confidential 
Ellie: how are you doing today
Participant: i'm doing alright
Ellie: that's good
Ellie: where are you from originally
Participant: uh originally i'm from california
Participant: uh born in glendale
Ellie: really
Ellie: nice
Ellie: how do you like your living situation
Participant: i'm not too happy with it

Participant_ID        305.0
PHQ8_Binary             0.0
PHQ8_Score              7.0
Gender                  1.0
PHQ8_NoInterest         0.0
PHQ8_Depressed          1.0
PHQ8_Sleep              1.0
PHQ8_Tired              2.0
PHQ8_Appetite           2.0
PHQ8_Failure            1.0
PHQ8_Concentrating      0.0
PHQ8_Moving  

## Qualitative Assessment

In [5]:
qualitative_prompt = f"""Please analyze the following interview transcript and assess the patient's mental health status, focusing on signs of depression, from social and biological aspects. 

Please provide your assessment in a structured format, including the following sections:
1. Overall Impression in <overall> tags: A brief summary of the patient's mental health status.
2. Social Aspects in <social> tags: Observations related to the patient's interpersonal relationships, family situation, and any relevant social interactions that may influence their mental health.
3. Biological Aspects in <biological> tags: Observations related to the patient's physical health, including sleep quality, appetite, physical activity, stress level, and any other biological factors that may influence their mental health.
4. Additional Notes in <notes> tags: Any other relevant observations or comments that do not fit into the above categories.
5. Potential Risk Factors in <risks> tags: Any identified risk factors for depression or other mental health issues.

Here is the interview transcript:
{patient_transcript}
"""
# Temp, top_k, and top_p set to more deterministic values
response = requests.post(
  BASE_URL,
  json = {
    "model": model,
    "messages": [{"role": "user", "content": qualitative_prompt}],
    "stream": False,
    "options": {
      "temperature": 0,
      "top_k": 20,
      "top_p": 0.9
    }
  }
)

print(json.dumps(response.json(), indent=2))



KeyboardInterrupt: 

## **TESTING ||** Grabbing PHQ-8 related sections from transcript (Using ground truth directly)

In [None]:
class PHQ8Evidence(BaseModel):
    nointerest_evidence: List[str]
    depressed_evidence: List[str]
    sleep_evidence: List[str]
    tired_evidence: List[str]
    appetite_evidence: List[str]
    failure_evidence: List[str]
    concentrating_evidence: List[str]
    moving_evidence: List[str]

evidence_extraction_prompt = f"""Analyze the following therapy transcript and extract specific text chunks that provide evidence for each PHQ-8 score. For each PHQ-8 domain, find 1-3 relevant quotes from the transcript that best support the given score.

PHQ-8 scoring reference:
- 0 = not at all (0-1 days)
- 1 = several days (2-6 days) 
- 2 = more than half the days (7-11 days)
- 3 = nearly every day (12-14 days)

Given PHQ-8 scores: {train_phq8_dataset.iloc[2]}

For each domain below, extract direct quotes from the transcript that justify the given score. If the score is 0, find evidence showing the absence or minimal presence of that symptom. If the score is higher, find evidence showing the frequency/severity indicated by that score.

PHQ-8 domains:
- nointerest: little interest or pleasure in activities
- depressed: feeling down, depressed, or hopeless
- sleep: sleep problems (trouble falling/staying asleep or sleeping too much)
- tired: feeling tired or having little energy
- appetite: appetite changes (poor appetite or overeating)
- failure: negative self-perception or feeling like a failure
- concentrating: trouble concentrating on tasks
- moving: psychomotor changes (moving/speaking slowly or restlessness)

Return a JSON object with arrays of relevant transcript quotes for each domain. Each quote should be a direct excerpt from the transcript that supports the given score.

Therapy transcript:
{patient_transcript}

Respond with valid JSON matching this structure:
{{
    "nointerest_evidence": ["quote1", "quote2"],
    "depressed_evidence": ["quote1", "quote2"],
    "sleep_evidence": ["quote1", "quote2"],
    "tired_evidence": ["quote1", "quote2"],
    "appetite_evidence": ["quote1", "quote2"],
    "failure_evidence": ["quote1", "quote2"],
    "concentrating_evidence": ["quote1", "quote2"],
    "moving_evidence": ["quote1", "quote2"]
}}

Important: Extract UNIQUE quotes only - do not repeat the same quote multiple times. Each quote should be different and provide distinct evidence.
"""

response = requests.post(
    BASE_URL,
    json={
        "model": model,
        "messages": [{"role": "user", "content": evidence_extraction_prompt}],
        "stream": False,
        "options": {
            "temperature": 0.1,
            "top_k": 10,
            "top_p": 0.8
        }
    }
)

# Printing
response_data = response.json()
content = response_data['message']['content']
content = content.strip('```json\n').strip('\n```')

evidence_dict = json.loads(content)

# Add scores to the output
scores = train_phq8_dataset.iloc[2]
evidence_dict['scores'] = {
    'nointerest': int(scores['PHQ8_NoInterest']),
    'depressed': int(scores['PHQ8_Depressed']),
    'sleep': int(scores['PHQ8_Sleep']),
    'tired': int(scores['PHQ8_Tired']),
    'appetite': int(scores['PHQ8_Appetite']),
    'failure': int(scores['PHQ8_Failure']),
    'concentrating': int(scores['PHQ8_Concentrating']),
    'moving': int(scores['PHQ8_Moving'])
}

# Remove duplicate quotes in each evidence list
for key in evidence_dict:
    if isinstance(evidence_dict[key], list):
        evidence_dict[key] = list(dict.fromkeys(evidence_dict[key]))

formatted_output = json.dumps(evidence_dict, indent=4)
print(formatted_output)
print(type(formatted_output))

{
    "nointerest_evidence": [
        "I don't like people too much, so being outdoors and uh I've always enjoyed being outdoors that's how I like being a truck driver I've traveled the world.",
        "There's not a lot that I like about l_a but um I like hollywood I like the the movie industry um"
    ],
    "depressed_evidence": [
        "uh I've been diagnosed with uh bipolarism",
        "I just get a lot of racing thoughts so or I get depressed you know 'cause I or I worry about things that you know that really I have no control over"
    ],
    "sleep_evidence": [
        "It's been hard lately it's been probably hard for the last uh going on a year um you know I uh depending on my thought process at the time I like I said I've been looking for work quite a bit um so just wondering why you know I don't get called I I just have my I have racing thoughts",
        "lately I have been probably about the last few months"
    ],
    "tired_evidence": [
        "lately I've been sl

## Quantitative Assesment

In [None]:
class PHQ8ScoreWithExplanation(BaseModel):
    evidence: str  # Direct quotes or references from interview
    reason: str  # Brief explanation for the score
    score: int  # 0-3

class PHQ8ScoresWithExplanations(BaseModel):
    nointerest: PHQ8ScoreWithExplanation  # Little interest or pleasure in doing things
    depressed: PHQ8ScoreWithExplanation   # Feeling down, depressed, or hopeless
    sleep: PHQ8ScoreWithExplanation       # Trouble falling/staying asleep, or sleeping too much
    tired: PHQ8ScoreWithExplanation       # Feeling tired or having little energy
    appetite: PHQ8ScoreWithExplanation    # Poor appetite or overeating
    failure: PHQ8ScoreWithExplanation     # Feeling bad about self or like a failure
    concentrating: PHQ8ScoreWithExplanation # Trouble concentrating
    moving: PHQ8ScoreWithExplanation      # Moving/speaking slowly or being fidgety/restless

quantitative_prompt = f"""Analyze the following interview transcript and predict PHQ-8 scores. The PHQ-8 is a depression screening tool consisting of eight of the nine DSM-IV criteria for depressive disorders, assessing symptoms over the past 2 weeks. Each item is scored: 0 = not at all (0-1 days), 1 = several days (2-6 days), 2 = more than half the days (7-11 days), 3 = nearly every day (12-14 days).

Interview transcript:
<transcript>
{patient_transcript}
</transcript>

Analyze each symptom using the following approach in <thinking> tags:
1. Identify direct quotes from the transcript that relate to each symptom
2. Consider the patient's speaking patterns and behaviors that indicate frequency/severity
3. Evaluate whether the evidence suggests the symptom occurs: not at all (0-1 days), several days (2-6 days), more than half the days (7-11 days), or nearly every day (12-14 days)
4. Determine if the evidence is significant enough to warrant a particular score
5. Consider if absence of mention means absence of symptom or just not discussed

After your analysis, provide your final assessment in <answer> tags as a JSON object.

For each symptom, provide:
1. "evidence": exact quotes from the transcript (use "No direct evidence in transcript" if symptom not mentioned)
2. "reason": explanation connecting the evidence to the frequency-based score
3. "score": integer value 0-3 based on your reasoning

Return ONLY a JSON object in <answer> tags with these exact keys:
- "nointerest": {{evidence, reason, score}} for little interest or pleasure in doing things (anhedonia)
- "depressed": {{evidence, reason, score}} for feeling down, depressed, or hopeless (depressed mood)
- "sleep": {{evidence, reason, score}} for trouble falling or staying asleep, or sleeping too much (sleep problems)
- "tired": {{evidence, reason, score}} for feeling tired or having little energy (fatigue)
- "appetite": {{evidence, reason, score}} for poor appetite or overeating (appetite/weight changes)
- "failure": {{evidence, reason, score}} for feeling bad about yourself or that you are a failure (negative self-perception)
- "concentrating": {{evidence, reason, score}} for trouble concentrating on things like reading or watching TV (concentration problems)
- "moving": {{evidence, reason, score}} for moving or speaking slowly, or being fidgety/restless (psychomotor changes)"""

# Most deterministic temp, top_k, and top_p
response = requests.post(
    BASE_URL,
    json={
        "model": model,
        "messages": [{"role": "user", "content": quantitative_prompt}],
        "stream": False,
        "options": {
            "temperature": 0,
            "top_k": 1,
            "top_p": 1.0
        }
    }
)

# Parse and validate the response
try:
    response_data = response.json()
    content = response_data['message']['content']
    
    # Extract content from <answer> tags if present
    if '<answer>' in content and '</answer>' in content:
        content = content.split('<answer>')[1].split('</answer>')[0].strip()
    
    # Remove markdown code blocks if present
    if content.startswith('```json'):
        content = content.split('```json')[1].split('```')[0].strip()
    elif content.startswith('```'):
        content = content.split('```')[1].split('```')[0].strip()
    
    # Parse the JSON response and validate with Pydantic
    scores_dict = json.loads(content)
    phq8_scores = PHQ8ScoresWithExplanations(**scores_dict)
    
    # Extract the 8 PHQ-8 score values
    scores_list = [
        phq8_scores.nointerest.score,
        phq8_scores.depressed.score,
        phq8_scores.sleep.score,
        phq8_scores.tired.score,
        phq8_scores.appetite.score,
        phq8_scores.failure.score,
        phq8_scores.concentrating.score,
        phq8_scores.moving.score
    ]
    
    # Get ground truth values
    ground_truth = train_phq8_dataset.iloc[2]

    print("Comparison of Predicted vs Ground Truth:")
    print("Metric\t\t\tPredicted\tGround Truth\tDifference")
    print("-" * 65)

    differences = []
    metrics = ['PHQ8_NoInterest', 'PHQ8_Depressed', 'PHQ8_Sleep', 'PHQ8_Tired', 
            'PHQ8_Appetite', 'PHQ8_Failure', 'PHQ8_Concentrating', 'PHQ8_Moving']
    predicted_values = [phq8_scores.nointerest.score, phq8_scores.depressed.score, phq8_scores.sleep.score, 
                    phq8_scores.tired.score, phq8_scores.appetite.score, phq8_scores.failure.score,
                    phq8_scores.concentrating.score, phq8_scores.moving.score]

    for metric, pred_val in zip(metrics, predicted_values):
        gt_val = int(ground_truth[metric])
        diff = abs(pred_val - gt_val)
        differences.append(diff)
        print(f"{metric:<23} {pred_val:<12} {gt_val:<15} {diff}")

    avg_difference = sum(differences) / len(differences)
    print("-" * 65)
    print(f"Average Absolute Difference: {avg_difference:.2f}")
    
    # Print reasoning and evidence section
    print("\n\nDetailed Reasoning for Each Score:")
    print("=" * 80)
    
    symptom_names = {
        'nointerest': 'Little Interest/Pleasure',
        'depressed': 'Feeling Depressed',
        'sleep': 'Sleep Problems',
        'tired': 'Fatigue',
        'appetite': 'Appetite Changes',
        'failure': 'Negative Self-Perception',
        'concentrating': 'Concentration Problems',
        'moving': 'Psychomotor Changes'
    }
    
    for key, symptom_name in symptom_names.items():
        score_data = getattr(phq8_scores, key)
        print(f"\n{symptom_name} (Score: {score_data.score})")
        print("-" * 40)
        print(f"Evidence: {score_data.evidence}")
        print(f"Reason: {score_data.reason}")

except (json.JSONDecodeError, KeyError, ValueError) as e:
    print(f"Error parsing response: {e}")
    print("Raw response:", response.json())

In [None]:
"""Available models:
- gemma3-optimized:27b
- gemma3_optimized:8bit
- gemma3:27b-it-qat
- gemma3:27b
- phi4-optimized:latest
- phi4:latest
- phi4-reasoning:plus
- qwen3_optimized:latest
- qwen3:32b
- llama3.3:latest
- llama4:scout
- mistral3.1-optimized:latest
- mistral-small3.1:latest
- qwq:latest

Notes:
1. The models marked as "optimized" are configured to use multiple GPUs for faster inference.
2. The llama4:maverick model is not available due to its size.
"""

OLLAMA_NODE = "arctrdgn001" # TODO: Change this variable to the node where Ollama is running
BASE_URL = f"http://{OLLAMA_NODE}:11434/api/chat"

model = "gemma3-optimized:27b" # TODO: Change this variable to the model you want to use
message = "What is the capital of France?" # TODO: Change this variable to the message you want to ask the model

response = requests.post(
  BASE_URL,
  json = {
    "model": model,
    "messages": [{"role": "user", "content": message}],
    "stream": False
  }
)

print(json.dumps(response.json(), indent=2))

{
  "model": "gemma3-optimized:27b",
  "created_at": "2025-07-08T17:40:17.231880462Z",
  "message": {
    "role": "assistant",
    "content": "The capital of France is **Paris**. \n\nIt's also the country's most populous city!\n\n\n\n"
  },
  "done_reason": "stop",
  "done": true,
  "total_duration": 34293797307,
  "load_duration": 31815583539,
  "prompt_eval_count": 16,
  "prompt_eval_duration": 578633882,
  "eval_count": 24,
  "eval_duration": 1898829459
}
