In [1]:
import pandas as pd
import json
from tqdm import tqdm
from openai import OpenAI
from sklearn.metrics import accuracy_score

In [2]:
df = pd.read_csv(r'/content/FINAL_DATASET_IR (1).csv')

In [3]:
categories = df['status'].unique()

### GPT - 4

In [4]:
import openai

# Initialize the OpenAI client with your API key
client = openai.OpenAI(api_key="sk-proj-...")  # Keep your key secure

def classify(statement, categories=None):
    # Default categories if none are provided
    if categories is None:
        categories = [
            "Normal",
            "Suicidal",
            "Bi-Polar",
            "Depression",
            "Anxiety",
            "Stress",
            "Personality Disorder"
        ]

    # Define the messages for the chat completion request
    messages = [
        {
            "role": "system",
            "content": f"""You are a mental health classification expert.
Classify the user's statement based on the following strict criteria:

||| CLASSIFICATION RULES |||
1. **Priority Order** (high to low):
   Suicidal > Bi-Polar > Depression > Anxiety > Stress > Personality Disorder > Normal

2. **Symptom Thresholds**:
   - Suicidal: Explicit OR implicit self-harm references (e.g., "Everyone would be better without me")
   - Bi-Polar: Mentions of extreme mood swings + energy level changes
   - Depression: ≥3 of: hopelessness, anhedonia, sleep issues, appetite changes, guilt
   - Anxiety: Persistent worry + physical symptoms (e.g., "can't stop thinking", racing heart)
   - Stress: Situational tension without chronic mental health symptoms
   - Personality Disorder: Long-term interpersonal conflicts + identity issues
   - Normal: No significant symptoms or indicators of mental health distress

3. **Linguistic Red Flags**:
   - Metaphors about darkness/drowning/burden = Depression
   - Repeated safety checks ("are you there?") = Anxiety
   - Grandiose claims alternating with despair = Bi-Polar
   - First-person singular focus = Higher pathology risk

4. **Validation Checks**:
   - Require ≥2 indicators for non-Normal classifications
   - Time references: "Always/never" > "Sometimes" in severity
   - Intensity modifiers: "Utterly devastated" > "A bit sad"

5. **Boundary Cases**:
   - School/work stress WITHOUT bodily symptoms → Stress
   - Relationship conflicts WITH self-blame → Depression
   - Anger outbursts WITH planning → Personality Disorder
   - No concerning behavior or signs → Normal

Return your output in JSON format with keys: prediction and reason. Be concise.
Categories: {categories}
"""
        },
        {
            "role": "user",
            "content": statement
        }
    ]

    # Call OpenAI's chat completion API
    chat_completion = client.chat.completions.create(
        model="gpt-4",
        messages=messages,
        temperature=1,
        max_tokens=256
    )

    return chat_completion


In [5]:
resp = classify(df.iloc[0]['processed_statement'], categories)

In [7]:
resp.choices[0].message.content

'{\n  "prediction": "Normal",\n  "reason": "This statement provides insufficient details to classify a mental health state."\n}'

In [8]:
preds = []
reason = []

for i in tqdm(range(df.shape[0])):
  resp = classify(df.iloc[i]['processed_statement'], categories)
  # print(sampled_df.iloc[i]['statement'], resp)
  try:
    resp_dict = json.loads(resp.choices[0].message.content)
    preds.append(resp_dict['prediction'])
    reason.append(resp_dict['reason'])
  except json.JSONDecodeError:
    preds.append('')
    reason.append(resp.choices[0].message.content)

100%|██████████| 180/180 [09:30<00:00,  3.17s/it]


In [9]:
df['predictions_p2_b'] = preds
df['reason_p2_b'] = reason

df.to_csv('phase2_predictions_b')

In [10]:
df.head()

Unnamed: 0,processed_statement,status,predictions_p2_b,reason_p2_b
0,oh my gosh,Anxiety,Normal,Statement is too brief and non-specific to det...
1,trouble sleeping confused mind restless heart ...,Anxiety,Anxiety,Mentions of trouble sleeping and feelings of u...
2,all wrong back off dear forward doubt stay in ...,Anxiety,Anxiety,Observance of restlessness indicates persisten...
3,ive shifted my focus to something else but im ...,Anxiety,Stress,The user is expressing situational tension and...
4,im restless and restless its been a month now ...,Anxiety,Anxiety,"Recurrent mention of 'restless', a physical sy..."


### GPT - 4 Turbo

In [None]:
import openai

# Initialize the OpenAI client with your API key
client = openai.OpenAI(
    api_key="sk-proj-..."  # Replace with your actual key securely
)

def classify(statement, categories=None):
    # Set default categories if not provided
    if categories is None:
        categories = [
            "Normal",
            "Suicidal",
            "Bi-Polar",
            "Depression",
            "Anxiety",
            "Stress",
            "Personality Disorder"
        ]

    # Define the messages for the chat completion request
    messages = [
        {
            "role": "system",
            "content": f"""**Mental Health Classification Expert**
Analyze user statements using these strict criteria:

||| CLASSIFICATION RULES |||
1. **Priority Order** (high to low):
   Suicidal > Bi-Polar > Depression > Anxiety > Stress > Personality Disorder > Normal

2. **Symptom Thresholds**:
   - Suicidal: Explicit OR implicit self-harm references (e.g., "Everyone would be better without me")
   - Bi-Polar: Mentions of extreme mood swings + energy level changes
   - Depression: ≥3 of: hopelessness, anhedonia, sleep issues, appetite changes, guilt
   - Anxiety: Persistent worry + physical symptoms (e.g., "can't stop thinking", racing heart)
   - Stress: Situational tension without chronic mental health symptoms
   - Personality Disorder: Long-term interpersonal conflicts + identity issues
   - Normal: Absence of any significant symptoms, distress, or behavioral red flags

3. **Linguistic Red Flags**:
   - Metaphors about darkness/drowning/burden = Depression
   - Repeated safety checks ("are you there?") = Anxiety
   - Grandiose claims alternating with despair = Bi-Polar
   - First-person singular focus = Higher pathology risk

4. **Validation Checks**:
   - Require ≥2 indicators for non-Normal classifications
   - Time references: "Always/never" > "Sometimes" in severity
   - Intensity modifiers: "Utterly devastated" > "A bit sad"

5. **Boundary Cases**:
   - School/work stress WITHOUT bodily symptoms → Stress
   - Relationship conflicts WITH self-blame → Depression
   - Anger outbursts WITH planning → Personality Disorder
   - Neutral or minor concerns with no mental health distress → Normal

Return JSON format with keys: prediction and reason. Keep the explanation concise.
Categories: {categories}
"""
        },
        {
            "role": "user",
            "content": statement
        }
    ]

    # Call OpenAI's chat completion API
    chat_completion = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=messages,
        temperature=1,
        max_tokens=256
    )

    return chat_completion


In [None]:
resp = classify(df.iloc[0]['processed_statement'], categories)

In [None]:
resp.choices[0].message.content

In [None]:
preds = []
reason = []

for i in tqdm(range(df.shape[0])):
  resp = classify(df.iloc[i]['processed_statement'], categories)
  # print(sampled_df.iloc[i]['statement'], resp)
  try:
    resp_dict = json.loads(resp.choices[0].message.content)
    preds.append(resp_dict['prediction'])
    reason.append(resp_dict['reason'])
  except json.JSONDecodeError:
    preds.append('')
    reason.append(resp.choices[0].message.content)

In [None]:
df['predictions_p2_a'] = preds
df['reason_p2_a'] = reason

df.to_csv('phase2_predictions_a')

In [None]:
df.head()

### GPT - 3.5 Turbo

In [None]:
import openai

# Initialize the OpenAI client with your API key
client = openai.OpenAI(
    api_key="sk-proj-..."  # Keep this secure and replace with environment variable in production
)

def classify(statement, categories=None):
    # Set default categories if not provided
    if categories is None:
        categories = [
            "Normal",
            "Suicidal",
            "Bi-Polar",
            "Depression",
            "Anxiety",
            "Stress",
            "Personality Disorder"
        ]

    # Define the messages for the chat completion request
    messages = [
        {
            "role": "system",
            "content": f"""**Mental Health Classification Expert**
Analyze user statements using these strict criteria:

||| CLASSIFICATION RULES |||
1. **Priority Order** (high to low):
   Suicidal > Bi-Polar > Depression > Anxiety > Stress > Personality Disorder > Normal

2. **Symptom Thresholds**:
   - Suicidal: Explicit OR implicit self-harm references (e.g., "Everyone would be better without me")
   - Bi-Polar: Mentions of extreme mood swings + energy level changes
   - Depression: ≥3 of: hopelessness, anhedonia, sleep issues, appetite changes, guilt
   - Anxiety: Persistent worry + physical symptoms (e.g., "can't stop thinking", racing heart)
   - Stress: Situational tension without chronic mental health symptoms
   - Personality Disorder: Long-term interpersonal conflicts + identity issues
   - Normal: Absence of significant distress, concerning symptoms, or behavioral red flags

3. **Linguistic Red Flags**:
   - Metaphors about darkness/drowning/burden = Depression
   - Repeated safety checks ("are you there?") = Anxiety
   - Grandiose claims alternating with despair = Bi-Polar
   - First-person singular focus = Higher pathology risk

4. **Validation Checks**:
   - Require ≥2 indicators for non-Normal classifications
   - Time references: "Always/never" > "Sometimes" in severity
   - Intensity modifiers: "Utterly devastated" > "A bit sad"

5. **Boundary Cases**:
   - School/work stress WITHOUT bodily symptoms → Stress
   - Relationship conflicts WITH self-blame → Depression
   - Anger outbursts WITH planning → Personality Disorder
   - Neutral or minor concerns with no mental health distress → Normal

Return your response in JSON format with the following keys: prediction and reason. Keep the explanation concise.
Categories: {categories}
"""
        },
        {
            "role": "user",
            "content": statement
        }
    ]

    # Call OpenAI's chat completion API
    chat_completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        temperature=1,
        max_tokens=256
    )

    return chat_completion


In [None]:
resp = classify(df.iloc[0]['processed_statement'], categories)

In [None]:
resp.choices[0].message.content

In [None]:
preds = []
reason = []

for i in tqdm(range(df.shape[0])):
  resp = classify(df.iloc[i]['processed_statement'], categories)
  # print(sampled_df.iloc[i]['statement'], resp)
  try:
    resp_dict = json.loads(resp.choices[0].message.content)
    preds.append(resp_dict['prediction'])
    reason.append(resp_dict['reason'])
  except json.JSONDecodeError:
    preds.append('')
    reason.append(resp.choices[0].message.content)

In [None]:
df['predictions_p2_c'] = preds
df['reason_p2_c'] = reason

df.to_csv('phase2_predictions_c')

In [None]:
df.head()