In [1]:
'''{
  "text" : "I am sad."
  "emotion": ["sadness"],
  "cognitive_patterns": ["hopelessness", "anhedonia"],
  "behavioral_cues": ["social withdrawal"]
}
'''

'{\n  "text" : "I am sad."\n  "emotion": ["sadness"],\n  "cognitive_patterns": ["hopelessness", "anhedonia"],\n  "behavioral_cues": ["social withdrawal"]\n}\n'

In [6]:
COGNITIVE_PATTERNS = {
    "anhedonia": [
        "can't enjoy", "cannot enjoy", "nothing feels good",
        "no pleasure", "don't enjoy", "lost interest"
    ],
    "hopelessness": [
        "nothing will change", "no point", "never get better",
        "what's the use", "pointless"
    ],
    "catastrophizing": [
        "everything is ruined", "always goes wrong",
        "worst possible", "total disaster"
    ],
    "overgeneralization": [
        "always", "never", "everyone", "everything"
    ],
    "self_blame": [
        "my fault", "i blame myself", "i ruined",
        "i mess everything up"
    ],
    "worthlessness": [
        "i'm useless", "i'm worthless", "good for nothing"
    ],
    "rumination": [
        "can't stop thinking", "keep thinking",
        "replaying in my head"
    ],
    "helplessness": [
        "nothing i can do", "out of my control",
        "powerless"
    ]
}

BEHAVIORAL_CUES = {
    "withdrawal": [
        "avoid people", "stay alone", "isolated myself",
        "stopped talking", "cut myself off"
    ],
    "avoidance": [
        "avoid", "can't face", "put off",
        "ignoring"
    ],
    "inactivity": [
        "in bed all day", "can't get out of bed",
        "do nothing", "no energy to do"
    ],
    "reassurance_seeking": [
        "need reassurance", "keep asking",
        "need someone to tell me"
    ],
    "rumination_behavior": [
        "thinking about it all day",
        "obsessing", "looping thoughts"
    ],
    "sleep_disruption": [
        "can't sleep", "insomnia", "sleep all day",
        "awake all night"
    ]
}


In [1]:
import numpy as np
import pandas as pd
import spacy
import re

In [None]:
nlp = spacy.load("en_core_web_sm")

def normalize(text):
    text =text.lower()
    text =re.sub(r"\s+"," ", text)
    return text.strip()

In [None]:
def match_patterns(text,pattern_dict):
    text =normalize(text)
    found =[]
    for label, patterns in pattern_dict.items():
        for p in patterns:
            if p in text:
                found.append(label)
                break
    return list(set(found))

In [7]:
def annotate_text(text):
    cognitive = match_patterns(text, COGNITIVE_PATTERNS)
    behavioral = match_patterns(text, BEHAVIORAL_CUES)

    return {
        "text": text,
        "cognitive_patterns": cognitive if cognitive else ["none"],
        "behavioral_cues": behavioral if behavioral else ["none"]
    }


In [2]:
df = pd.read_csv('ml_data/processed/goemotions_reduced.csv')
df.head()

Unnamed: 0,text,emotion
0,That game hurt.,['sadness']
1,"You do right, if you don't care then fuck 'em!",['neutral']
2,Man I love reddit.,['love']
3,"[NAME] was nowhere near them, he was by the Fa...",['neutral']
4,Right? Considering it’s such an important docu...,['gratitude']


In [8]:
from tqdm import tqdm

cognitive_patterns = []
behavioral_cues = []

for text in tqdm(df["text"]):
    annotation = annotate_text(text)
    cognitive_patterns.append(annotation["cognitive_patterns"])
    behavioral_cues.append(annotation["behavioral_cues"])

# 3. Create final DataFrame with exactly 4 columns
final_df = pd.DataFrame({
    "text": df["text"],
    "emotion": df["emotion"],
    "cognitive_patterns": cognitive_patterns,
    "behavioral_cues": behavioral_cues
})

# 4. Save (choose one format)
final_df.to_csv("final_labeled_data.csv", index=False)
# or
final_df.to_json("final_labeled_data.json", orient="records", indent=2)


100%|██████████| 68871/68871 [00:03<00:00, 20311.59it/s]


In [9]:
final_df.head()

Unnamed: 0,text,emotion,cognitive_patterns,behavioral_cues
0,That game hurt.,['sadness'],[none],[none]
1,"You do right, if you don't care then fuck 'em!",['neutral'],[none],[none]
2,Man I love reddit.,['love'],[none],[none]
3,"[NAME] was nowhere near them, he was by the Fa...",['neutral'],[none],[none]
4,Right? Considering it’s such an important docu...,['gratitude'],[none],[none]


In [14]:
none_cognitive = final_df["cognitive_patterns"].apply(
    lambda x: x == ["none"]
).sum()

total = len(final_df)

print(f"Cognitive patterns = none: {none_cognitive}/{total} ({none_cognitive/total:.2%})")


Cognitive patterns = none: 65522/68871 (95.14%)


In [11]:
none_behavioral = final_df["behavioral_cues"].apply(
    lambda x: x == ["none"]
).sum()

print(f"Behavioral cues = none: {none_behavioral}/{total} ({none_behavioral/total:.2%})")


Behavioral cues = none: 68691/68871 (99.74%)
