In [39]:
import json
import requests
import pandas as pd
import os
!pip install requests



In [76]:
OLLAMA_NODE = "arctrddgxa004" # TODO: Change this variable to the node where Ollama is running
BASE_URL = f"http://{OLLAMA_NODE}:11434/api/chat"
model = "gemma3-optimized:27b" # TODO: Change this variable to the model you want to use

In [77]:

train_path = pd.read_csv("/data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/train_split_Depression_AVEC2017.csv")
dev_path = pd.read_csv("/data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/dev_split_Depression_AVEC2017.csv")

id_train = train_path.iloc[:, 0].tolist()
id_dev = dev_path.iloc[:, 0].tolist()

# DEBUG: Check if data loaded correctly
print(f"Number of train subjects: {len(id_train)}")
print(f"Number of dev subjects: {len(id_dev)}")
print("First 3 train subjects:", id_train[:3] if len(id_train) >= 3 else id_train)
print("First 3 dev subjects:", id_dev[:3] if len(id_dev) >= 3 else id_dev)

all_subjects = [(subj, 'train') for subj in id_train] + [(subj, 'dev') for subj in id_dev]
print(f"Total subjects to process: {len(all_subjects)}")

results = []
processed_count = 0
skipped_count = 0

for i, (participant_id, dataset_type) in enumerate(all_subjects):
    print(f"\n--- Processing {i+1}/{len(all_subjects)}: {participant_id} ({dataset_type} dataset) ---")
    
    id_transcript = os.path.join("/data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/", f"{participant_id}_P", f"{participant_id}_TRANSCRIPT.csv")
    
    print(f"Looking for transcript at: {id_transcript}")
    
    if not os.path.exists(id_transcript):
        print(f"Transcript not found for {participant_id}")
        skipped_count += 1
        continue

    print(f"Transcript found, loading data...")
    
    try:
        transcript_data = pd.read_csv(id_transcript, sep='\t')
        dialogue_txt = transcript_data.dropna(subset=['speaker', 'value'])
        dialogue_txt['dialogue'] = dialogue_txt['speaker'] + " : " + dialogue_txt['value']
        full_dialogue = "\n".join(dialogue_txt['dialogue'])
        
        print(f"Dialogue length: {len(full_dialogue)} characters")
        print(f"First 200 chars: {full_dialogue[:200]}...")
        
        # Your existing prompt setup
        system_prompt = "You are a psychiatrist. Your job is to assess and diagnose patients for depression. Be as objective, concise, and accurate as possible."
        qual_assessment = f"""The following is an interview transcript from a psychiatric assessment of a subject who is being screened for depression. Please note that 'Ellie' is the interviewer, and 'Participant' is the subject being assessed.
        {full_dialogue}
      Please:
      1. Provide an overall qualitative assessment of the subject's mental health. 
      2. Summarize social aspects that may influence the subject's mental health. (ex. familial relationships, frienship dynamics, work environment, etc. that are relevant to the subjects mental health)
      3. Summarize biological aspects that may influence the subject's mental health. (ex. famillial history of mental health issues, previous or pre-existing mental health issues, stress levels, etc. that are relevant to the subjects mental health)
      4. Identify potential risk factors the subject may be experiencing.
      5. Use exact quotes from the transcript to support your assessment for each tag.

      Output should be formatted as bullet points with headings for each section using stars. Example: **Tiredness** <explanation of tiredness>. Do not include any additional text outside the bullet points
      Please answer in this XML format with each tag on a new line, properly indented. Use straight quotes instead of curly quotes, and do not include any additional text outside the XML tags:

      <assessment>
        <!-- Summary of participant's overall mental health -->
       <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
        </exact_quotes>
      </assessment>

      <social_factors>
        <!-- Summary of social influences on patient's health -->
        <exact_quotes>
      </social_factors>

      <biological_factors>
        <!-- Summary of biological influences on patient's health -->
       <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
        </exact_quotes>
      </biological_factors>

      <risk_factors>
        <!-- Summary of potential risk factors -->
         <exact_quotes>
       <!-- Quotes from the transcript that support the assessment -->
       </exact_quotes>
      </risk_factors>
    """
        
        print(f"Sending API request...")
        
        response = requests.post(
          BASE_URL,
          json = {
            "model": model,
            "messages": [{"role": "system", "content": system_prompt},
                         {"role": "user", "content": qual_assessment}],
            "stream": False,
            "options": {
              "temperature": 0,
              "top_k": 20,
              "top_p": 0.9
            }
          }
        )
        
        if response.status_code == 200:
            qual_content = response.json()['message']['content']
            print(f"API response received (length: {len(qual_content)} chars)")
            print(f"Response preview: {qual_content[:100]}...")
            
            results.append({
                "participant_id": participant_id,
                "dataset_type": dataset_type,
                "qualitative_assessment": qual_content
            })
            
            processed_count += 1
            print(f"Completed participant {participant_id} ({processed_count} total completed)")
            
        else:
            print(f"API request failed with status code: {response.status_code}")
            print(f"Response: {response.text}")
            
    except Exception as e:
        print(f"Error processing {participant_id}: {str(e)}")
        import traceback
        traceback.print_exc()
        continue

print(f"\n=== FINAL SUMMARY ===")
print(f"Total subjects: {len(all_subjects)}")
print(f"Successfully processed: {processed_count}")
print(f"Skipped (no transcript): {skipped_count}")
print(f"Results collected: {len(results)}")

# Save results
if results:
    resultsdf = pd.DataFrame(results)
    output_file = "/home/users/nblair7/ai-psychiatrist/nblair7_results5.csv"
    resultsdf.to_csv(output_file, index=False)
    print(f"Saved results to: {output_file}")
else:
    print("No results to save!")

Number of train subjects: 107
Number of dev subjects: 35
First 3 train subjects: [303, 304, 305]
First 3 dev subjects: [302, 307, 331]
Total subjects to process: 142

--- Processing 1/142: 303 (train dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/303_P/303_TRANSCRIPT.csv
Transcript found, loading data...
Dialogue length: 14183 characters
First 200 chars: Ellie : hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment think of me as a friend i don't judge i can't i'm a computer i'm here to learn about pe...
Sending API request...
API response received (length: 3945 chars)
Response preview: ```xml
<assessment>
  <!-- Summary of participant's overall mental health -->
  The participant pres...
Completed participant 303 (1 total completed)

--- Processing 2/142: 304 (train dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/304_P/304_TRANSCRIPT

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dialogue_txt['dialogue'] = dialogue_txt['speaker'] + " : " + dialogue_txt['value']


API response received (length: 4099 chars)
Response preview: ```xml
<assessment>
  <!-- Summary of participant's overall mental health -->
  The participant pres...
Completed participant 313 (6 total completed)

--- Processing 7/142: 315 (train dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/315_P/315_TRANSCRIPT.csv
Transcript found, loading data...
Dialogue length: 11854 characters
First 200 chars: Ellie : hi i'm ellie thanks for coming in today
Participant : alright
Ellie : i was created to talk to people in a safe and secure environment
Ellie : i'm not a therapist but i'm here to learn about p...
Sending API request...
API response received (length: 4710 chars)
Response preview: ```xml
<assessment>
  <!-- Summary of participant's overall mental health -->
  The participant pres...
Completed participant 315 (7 total completed)

--- Processing 8/142: 316 (train dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/d

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 534, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connection.py", line 565, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/http/client.py", line 1395, in getresponse
    response.begin()
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/http/client.py", line 325, in begin
    version, status, reason = self._read_status()
                              ^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/http/client.

Transcript found, loading data...
Dialogue length: 11327 characters
First 200 chars: Ellie : hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment 
Participant : hi
Ellie : i'm not a therapist but i'm here to learn about people and wo...
Sending API request...
Error processing 345: HTTPConnectionPool(host='arctrddgxa004', port=11434): Max retries exceeded with url: /api/chat (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x73a24ca920d0>: Failed to establish a new connection: [Errno 111] Connection refused'))

--- Processing 30/142: 347 (train dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/347_P/347_TRANSCRIPT.csv
Transcript found, loading data...
Dialogue length: 6476 characters
First 200 chars: Ellie : hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment 
Ellie : i'm not a therapist but i'm here to learn

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
    raise err
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 493, in _make_request
   

Transcript found, loading data...
Dialogue length: 23754 characters
First 200 chars: Participant : <synch>
Ellie : IntroV4Confirmation (hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment i'm not a therapist but i'm here to learn ab...
Sending API request...
Error processing 368: HTTPConnectionPool(host='arctrddgxa004', port=11434): Max retries exceeded with url: /api/chat (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x73a24c840e90>: Failed to establish a new connection: [Errno 111] Connection refused'))

--- Processing 46/142: 369 (train dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/369_P/369_TRANSCRIPT.csv
Transcript found, loading data...
Dialogue length: 17596 characters
First 200 chars: Participant : <sync>
Participant : <ellie starts>
Ellie : IntroV4Confirmation (hi i'm ellie thanks for coming in today i was created to talk to people in a safe a

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
    raise err
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 493, in _make_request
   

Dialogue length: 11542 characters
First 200 chars: Participant : <sync>
Ellie : IntroV4Confirmation (hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment i'm not a therapist but i'm here to learn abo...
Sending API request...
Error processing 402: HTTPConnectionPool(host='arctrddgxa004', port=11434): Max retries exceeded with url: /api/chat (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x73a24ca92d50>: Failed to establish a new connection: [Errno 111] Connection refused'))

--- Processing 65/142: 409 (train dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/409_P/409_TRANSCRIPT.csv
Transcript found, loading data...
Dialogue length: 18705 characters
First 200 chars: Participant : <sync>
Ellie : IntroV4Confirmation (hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment i'm not a therapist but i'm here to learn

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dialogue_txt['dialogue'] = dialogue_txt['speaker'] + " : " + dialogue_txt['value']
Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
    raise err
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Tr

Dialogue length: 8966 characters
First 200 chars: Participant : <synch>
Ellie : IntroV4Confirmation (hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment i'm not a therapist but i'm here to learn ab...
Sending API request...
Error processing 443: HTTPConnectionPool(host='arctrddgxa004', port=11434): Max retries exceeded with url: /api/chat (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x73a24c874090>: Failed to establish a new connection: [Errno 111] Connection refused'))

--- Processing 83/142: 444 (train dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/444_P/444_TRANSCRIPT.csv
Transcript found, loading data...
Dialogue length: 22138 characters
First 200 chars: Participant : <sync>
Ellie : IntroV4Confirmation (hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment i'm not a therapist but i'm here to learn 

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
    raise err
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 493, in _make_request
   


--- Processing 102/142: 479 (train dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/479_P/479_TRANSCRIPT.csv
Transcript found, loading data...
Dialogue length: 10605 characters
First 200 chars: Participant : <synch>
Ellie : IntroV4Confirmation (hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment i'm not a therapist but i'm here to learn ab...
Sending API request...
Error processing 479: HTTPConnectionPool(host='arctrddgxa004', port=11434): Max retries exceeded with url: /api/chat (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x73a24cb219d0>: Failed to establish a new connection: [Errno 111] Connection refused'))

--- Processing 103/142: 485 (train dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/485_P/485_TRANSCRIPT.csv
Transcript found, loading data...
Dialogue length: 9446 characters
First 200 chars: Pa

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
    raise err
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 493, in _make_request
   

Transcript found, loading data...
Dialogue length: 14895 characters
First 200 chars: Participant : <sync>
Ellie : IntroV4Confirmation (hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment i'm not a therapist but i'm here to learn abo...
Sending API request...
Error processing 390: HTTPConnectionPool(host='arctrddgxa004', port=11434): Max retries exceeded with url: /api/chat (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x73a24ca73010>: Failed to establish a new connection: [Errno 111] Connection refused'))

--- Processing 120/142: 395 (dev dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/395_P/395_TRANSCRIPT.csv
Transcript found, loading data...
Dialogue length: 12484 characters
First 200 chars: Participant : <synch>
Ellie : IntroV4Confirmation (hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment i'm not

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
    raise err
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 493, in _make_request
   

Transcript found, loading data...
Dialogue length: 8879 characters
First 200 chars: Participant : <synch>
Ellie : IntroV4Confirmation (hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment i'm not a therapist but i'm here to learn ab...
Sending API request...
Error processing 489: HTTPConnectionPool(host='arctrddgxa004', port=11434): Max retries exceeded with url: /api/chat (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x73a24ca95a10>: Failed to establish a new connection: [Errno 111] Connection refused'))

--- Processing 141/142: 490 (dev dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/490_P/490_TRANSCRIPT.csv
Transcript found, loading data...
Dialogue length: 9041 characters
First 200 chars: Participant : <synch>
Ellie : IntroV4Confirmation (hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment i'm not a

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connection.py", line 198, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 85, in create_connection
    raise err
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/util/connection.py", line 73, in create_connection
    sock.connect(sa)
ConnectionRefusedError: [Errno 111] Connection refused

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 787, in urlopen
    response = self._make_request(
               ^^^^^^^^^^^^^^^^^^^
  File "/data/users2/nblair7/envs/aipsy/lib/python3.11/site-packages/urllib3/connectionpool.py", line 493, in _make_request
   

In [70]:
# Add debugging at the beginning
train_path = pd.read_csv("/data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/train_split_Depression_AVEC2017.csv")
dev_path = pd.read_csv("/data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/dev_split_Depression_AVEC2017.csv")

id_train = train_path.iloc[:, 0].tolist()
id_dev = dev_path.iloc[:, 0].tolist()

# DEBUG: Check if data loaded correctly
print(f"Number of train subjects: {len(id_train)}")
print(f"Number of dev subjects: {len(id_dev)}")
print("First 3 train subjects:", id_train[:3] if len(id_train) >= 3 else id_train)
print("First 3 dev subjects:", id_dev[:3] if len(id_dev) >= 3 else id_dev)

all_subjects = [(subj, 'train') for subj in id_train] + [(subj, 'dev') for subj in id_dev]
print(f"Total subjects to process: {len(all_subjects)}")

results = []
processed_count = 0
skipped_count = 0

for i, (participant_id, dataset_type) in enumerate(all_subjects):
    print(f"\n--- Processing {i+1}/{len(all_subjects)}: {participant_id} ({dataset_type} dataset) ---")
    
    id_transcript = os.path.join("/data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/", f"{participant_id}_P", f"{participant_id}_TRANSCRIPT.csv")
    
    print(f"Looking for transcript at: {id_transcript}")
    
    if not os.path.exists(id_transcript):
        print(f"  ❌ Transcript not found for {participant_id}")
        skipped_count += 1
        continue

    print(f"  ✅ Transcript found, loading data...")
    
    try:
        transcript_data = pd.read_csv(id_transcript, sep='\t')
        dialogue_txt = transcript_data.dropna(subset=['speaker', 'value'])
        dialogue_txt['dialogue'] = dialogue_txt['speaker'] + " : " + dialogue_txt['value']
        full_dialogue = "\n".join(dialogue_txt['dialogue'])
        
        print(f"  📝 Dialogue length: {len(full_dialogue)} characters")
        print(f"  📝 First 200 chars: {full_dialogue[:200]}...")
        
        # Your existing prompt setup
        system_prompt = "You are a psychiatrist. Your job is to assess and diagnose patients for depression. Be as objective, concise, and accurate as possible."
        qual_assessment = f"""The following is an interview transcript from a psychiatric assessment of a subject who is being screened for depression. Please note that 'Ellie' is the interviewer, and 'Participant' is the subject being assessed.
        {full_dialogue}
      Please:
      1. Provide an overall qualitative assessment of the subject's mental health. 
      2. Summarize social aspects that may influence the subject's mental health. (ex. familial relationships, frienship dynamics, work environment, etc. that are relevant to the subjects mental health)
      3. Summarize biological aspects that may influence the subject's mental health. (ex. famillial history of mental health issues, previous or pre-existing mental health issues, stress levels, etc. that are relevant to the subjects mental health)
      4. Identify potential risk factors the subject may be experiencing.
      5. Use exact quotes from the transcript to support your assessment for each tag.

      Output should be formatted as bullet points with headings for each section using stars. Example: **Tiredness** <explanation of tiredness>. Do not include any additional text outside the bullet points
      Please answer in this XML format with each tag on a new line, properly indented. Use straight quotes instead of curly quotes, and do not include any additional text outside the XML tags:

      <assessment>
        <!-- Summary of participant's overall mental health -->
       <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
        </exact_quotes>
      </assessment>

      <social_factors>
        <!-- Summary of social influences on patient's health -->
        <exact_quotes>
      </social_factors>

      <biological_factors>
        <!-- Summary of biological influences on patient's health -->
       <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
        </exact_quotes>
      </biological_factors>

      <risk_factors>
        <!-- Summary of potential risk factors -->
         <exact_quotes>
       <!-- Quotes from the transcript that support the assessment -->
       </exact_quotes>
      </risk_factors>
    """
        
        print(f"  🚀 Sending API request...")
        
        response = requests.post(
          BASE_URL,
          json = {
            "model": model,
            "messages": [{"role": "system", "content": system_prompt},
                         {"role": "user", "content": qual_assessment}],
            "stream": False,
            "options": {
              "temperature": 0,
              "top_k": 20,
              "top_p": 0.9
            }
          }
        )
        
        if response.status_code == 200:
            qual_content = response.json()['message']['content']
            print(f"  ✅ API response received (length: {len(qual_content)} chars)")
            print(f"  📄 Response preview: {qual_content[:100]}...")
            
            results.append({
                "participant_id": participant_id,
                "dataset_type": dataset_type,
                "qualitative_assessment": qual_content
            })
            
            processed_count += 1
            print(f"  ✅ Completed participant {participant_id} ({processed_count} total completed)")
            
        else:
            print(f"  ❌ API request failed with status code: {response.status_code}")
            print(f"  ❌ Response: {response.text}")
            
    except Exception as e:
        print(f"  ❌ Error processing {participant_id}: {str(e)}")
        import traceback
        traceback.print_exc()
        continue

print(f"\n=== FINAL SUMMARY ===")
print(f"Total subjects: {len(all_subjects)}")
print(f"Successfully processed: {processed_count}")
print(f"Skipped (no transcript): {skipped_count}")
print(f"Results collected: {len(results)}")

# Save results
if results:
    resultsdf = pd.DataFrame(results)
    output_file = "/home/users/nblair7/ai-psychiatrist/nblair7_results5.csv"
    resultsdf.to_csv(output_file, index=False)
    print(f"📁 Saved results to: {output_file}")
else:
    print("⚠️  No results to save!")

Number of train subjects: 107
Number of dev subjects: 35
First 3 train subjects: [303, 304, 305]
First 3 dev subjects: [302, 307, 331]
Total subjects to process: 142

--- Processing 1/142: 303 (train dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_dataset/303_P/303_TRANSCRIPT.csv
  ✅ Transcript found, loading data...
  📝 Dialogue length: 14183 characters
  📝 First 200 chars: Ellie : hi i'm ellie thanks for coming in today i was created to talk to people in a safe and secure environment think of me as a friend i don't judge i can't i'm a computer i'm here to learn about pe...
  🚀 Sending API request...
  ✅ API response received (length: 3945 chars)
  📄 Response preview: ```xml
<assessment>
  <!-- Summary of participant's overall mental health -->
  The participant pres...
  ✅ Completed participant 303 (1 total completed)

--- Processing 2/142: 304 (train dataset) ---
Looking for transcript at: /data/users4/xli/ai-psychiatrist/datasets/daic_woz_

KeyboardInterrupt: 

In [42]:
system_prompt = "You are a psychiatrist. Your job is to assess and diagnose patients for depression. Be as objective, concise, and accurate as possible."
qual_assessment = f"""The following is an interview transcript from a psychiatric assessment of a subject who is being screened for depression. Please note that 'Ellie' is the interviewer, and 'Participant' is the subject being assessed.
{full_dialogue}


Please:
1. Provide an overall qualitative assessment of the subject’s mental health. 
2. Summarize social aspects that may influence the subject’s mental health. (ex. familial relationships, frienship dynamics, work environment, etc. that are relevant to the subjects mental health)
3. Summarize biological aspects that may influence the subject’s mental health. (ex. famillial history of mental health issues, previous or pre-existing mental health issues, stress levels, etc. that are relevant to the subjects mental health)
4. Identify potential risk factors the subject may be experiencing.
5. Use exact quotes from the transcript to support your assessment for each tag.

Output should be formatted as bullet points with headings for each section using stars. Example: **Tiredness** <explanation of tiredness>. Do not include any additional text outside the bullet points
Please answer in this XML format with each tag on a new line, properly indented. Use straight quotes instead of curly quotes, and do not include any additional text outside the XML tags:

<assessment>
<!-- Summary of participant’s overall mental health -->
  <exact_quotes>
  <!-- Quotes from the transcript that support the assessment -->
  </exact_quotes>
</assessment>


<social_factors>
<!-- Summary of social influences on patient's health -->
  <exact_quotes>
  <!-- Quotes from the transcript that support the assessment -->
  </exact_quotes>
</social_factors>


<biological_factors>
<!-- Summary of biological influences on patient's health -->
  <exact_quotes>
  <!-- Quotes from the transcript that support the assessment -->
  </exact_quotes>
</biological_factors>


<risk_factors>
<!-- Summary of potential risk factors -->
  <exact_quotes>
  <!-- Quotes from the transcript that support the assessment -->
  </exact_quotes>
</risk_factors>
"""

response = requests.post(
  BASE_URL,
  json = {
    "model": model,
    "messages": [{"role": "system", "content": system_prompt},
                 {"role": "user", "content": qual_assessment}],
    "stream": False,
    "options": {
      "temperature": 0,
      "top_k": 20,
      "top_p": 0.9
    }
  }
)

qual_content = response.json()['message']['content']
print(qual_content)



```xml
<assessment>
    <a>
        <value>0</value>
        <explanation>The patient does not express a lack of interest or pleasure in activities. They mention going to the movies, which suggests some engagement.</explanation>
        <exact_quotes>
        </exact_quotes>
    </a>
    <b>
        <value>0</value>
        <explanation>The patient consistently presents as neutral and does not express feelings of sadness, hopelessness, or depression. They state they are feeling "normal."</explanation>
        <exact_quotes>
            "feel_lately (how have you been feeling lately) Participant : normal"
        </exact_quotes>
    </b>
    <c>
        <value>0</value>
        <explanation>The patient states they "sleep too well," indicating they are likely getting sufficient sleep and not experiencing insomnia or excessive sleepiness.</explanation>
        <exact_quotes>
            "easy_sleep (how easy is it for you to get a good night's sleep) Participant : that's i get i sleep too

In [43]:
system_prompt = "You are a psychiatrist. Your job is to assess and diagnose patients for depression. Be as objective, concise, and accurate as possible."
quant_assessment = f"""The following is an interview transcript from a psychiatric assessment of a subject who is being screened for depression. Please note that 'Ellie' is the interviewer, and 'Participant' is the subject being assessed.
{full_dialogue}

Please:
1. Predict each score based on the patient's responses in the interview transcript.Provide a score for each of the following scale. Score from 0-3 (0- not at all 1- several days 2- more than half the days 3- nearly every day):

    a) little interest or pleasure in doing things 
    b) Feeling down, depressed, or hopeless
    c) Trouble falling or staying asleep, or sleeping too much
    d) Feeling tired or having little energy
    e) Poor appetite or overeating
    f) Feeling bad about yourself - or that you are a failure or have let yourself or your family down
    g) Trouble concentrating on things, such as reading the newspaper or watching television
    h) Moving or speaking so slowly that other people could have noticed. Or the opposite - being so fidgety or restless that you have been moving around a lot more than usual

2. Provide a brief explanation for each score.
3. Provide a summary of the participant's mental health based on the scores.
4. Provide a total score based on the sum of the individual scores, along with the category the total score falls into:
    0 - 4: no significant depressive symptoms
    5 - 9: mild depressive symptoms
    10 - 14: moderate depressive symptoms
    15 - 19: moderately severe depressive symptoms
    20 - 24: severe depressive symptoms
5. Use exact quotes from the transcript to support your assessment.
    
Output should be formatted as bullet points. Do not include any additional text outside the bullet points
Please answer in this XML format with each tag on a new line, properly indented. Use straight quotes instead of curly quotes, and do not include any additional text outside the XML tags.:

<assessment>
    <a>
        <!--little interest or pleasure in doing things-->
        <value>
        </value>
        <explanation>
        </explanation>
        <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
        </exact_quotes>
    </a>
    <b>
        <!--Feeling down, depressed, or hopeless-->
        <value>
        </value>
        <explanation>
        </explanation>
        <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
        </exact_quotes>
    </b>
    <c>
        <!--Trouble falling or staying asleep, or sleeping too much-->
        <value>
        </value>
        <explanation>
        </explanation>
        <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
        </exact_quotes>
    </c>
    <d>
        <!--Feeling tired or having little energy-->
        <value>
        </value>
        <explanation>
        </explanation>
        <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
        </exact_quotes>
    </d>
    <e>
        <!--Poor appetite or overeating-->
        <value>
        </value>
        <explanation>
        </explanation>
        <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
        </exact_quotes>
    </e>
    <f>
        <!--Feeling bad about yourself - or that you are a failure or have let yourself or your family down-->
        <value>
        </value>
        <explanation>
        </explanation>
        <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
        </exact_quotes>
    </f>
    <g>
        <!--Trouble concentrating on things, such as reading the newspaper or watching television-->
        <value>
        </value>
        <explanation>
        </explanation>
        <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
        </exact_quotes>
    </g>
    <h>
        <!--Moving or speaking so slowly that other people could have noticed. Or the opposite - being so fidgety or restless that you have been moving around a lot more than usual-->
        <value>
        </value>
        <explanation>
        </explanation>
        <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
        </exact_quotes>
    </h>
    
    <!-- Total score -->
    <total_score>
    </total_score>
    <category>
    </category>
    <exact_quotes>
    <!-- Quotes from the transcript that support the assessment -->
    </exact_quotes>
    
    <!-- Summary of participant's mental health based on scores --> 
</assessment>
"""

response = requests.post(
  BASE_URL,
  json = {
    "model": model,
    "messages": [{"role": "system", "content": system_prompt},
                 {"role": "user", "content": quant_assessment}],
    "stream": False,
    "options": {
      "temperature": 0,
      "top_k": 20,
      "top_p": 0.9
    }
  }
)

quant_content = response.json()['message']['content']
print(qual_content)

```xml
<assessment>
    <a>
        <value>0</value>
        <explanation>The patient does not express a lack of interest or pleasure in activities. They mention going to the movies, which suggests some engagement.</explanation>
        <exact_quotes>
        </exact_quotes>
    </a>
    <b>
        <value>0</value>
        <explanation>The patient consistently presents as neutral and does not express feelings of sadness, hopelessness, or depression. They state they are feeling "normal."</explanation>
        <exact_quotes>
            "feel_lately (how have you been feeling lately) Participant : normal"
        </exact_quotes>
    </b>
    <c>
        <value>0</value>
        <explanation>The patient states they "sleep too well," indicating they are likely getting sufficient sleep and not experiencing insomnia or excessive sleepiness.</explanation>
        <exact_quotes>
            "easy_sleep (how easy is it for you to get a good night's sleep) Participant : that's i get i sleep too

In [44]:
system_prompt = "You are a psychiatrist. Your job is to assess and diagnose patients for depression. Be as objective, concise, and accurate as possible."
overall_eval = f"""You are an AI psychiatrist. Based on the qualitative and quantitative assessments below, provide an overall evaluation of the patient's mental health. Use output from the qualitative and quantitative assessments to inform your evaluation.

Qualitative Assessment:
{qual_assessment}

Quantitative Assessment:
{quant_assessment}


Please :

1. Provide a summary of the patient's mental health.
2. Provide a brief explanation of the patient's mental health status based on the qualitative and quantitative assessments.
3. Provide the total score from the PHQ-8 assessment, the sum of individual scores
4. Provide the category as one of: None, Mild, Moderate, Moderately Severe, Severe. If no answer is provided, output 'null' for the category.
5. Use exact quotes from the transcript to support your assessment for each tag.

Please provide your evaluation in the following XML format, with each tag on a new line, properly indented. Use straight quotes instead of curly quotes, and do not include any additional text outside the XML tags:
Output should be formatted as bullet points with headings for each section using stars. Example: **Tiredness** <explanation of tiredness>. Do not include any additional text outside the bullet points
<evaluation>
    <summary>
        <!-- Summary of the patient's mental health -->
    </summary>
    <explanation>
        <!-- Brief explanation of the patient's mental health status -->
    </explanation>
    <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
    </exact_quotes>
</evaluation>

<PHQ8_evaluation>
    <total_score>
        <!-- Total score from the PHQ-8 assessment -->
    </total_score>
    <category>
        <!-- Category of the total score -->
        </category>
    <explanation>
        <!-- Brief explanation of the total score and category -->
    </explanation>
    <exact_quotes>
        <!-- Quotes from the transcript that support the assessment -->
    </exact_quotes>
</PHQ8_evaluation>
"""

response = requests.post(
  BASE_URL,
  json = {
    "model": model,
    "messages": [{"role": "system", "content": system_prompt},
                 {"role": "user", "content": overall_eval}],
    "stream": False,
    "options": {
      "temperature": 0,
      "top_k": 20,
      "top_p": 0.9
    }
  }
)

overall_content = response.json()['message']['content']
print(overall_content)

```xml
<assessment>
    <a>
        <value>0</value>
        <explanation>The patient does not express a lack of interest or pleasure in activities. They mention going to the movies, which suggests some engagement.</explanation>
        <exact_quotes>
        </exact_quotes>
    </a>
    <b>
        <value>0</value>
        <explanation>The patient consistently presents as neutral and does not express feelings of sadness, hopelessness, or depression. They state they are feeling "normal."</explanation>
        <exact_quotes>
            "feel_lately (how have you been feeling lately) Participant : normal"
        </exact_quotes>
    </b>
    <c>
        <value>0</value>
        <explanation>The patient states they "sleep too well," indicating they are likely getting sufficient sleep and not experiencing insomnia or excessive sleepiness.</explanation>
        <exact_quotes>
            "easy_sleep (how easy is it for you to get a good night's sleep) Participant : that's i get i sleep too

In [45]:
results.append({
    "participant_id": participant_id,
    "dataset_type": dataset_type,
    "qualitative_assessment": qual_content,
    "quantitative_assessment": quant_content,
    "overall_evaluation": overall_content
})  
# Save results to a JSON file
resultsdf = pd.DataFrame(results)
resultsdf.to_csv("/home/users/nblair7/ai-psychiatrist/nblair7_results.csv", index=False)
