In [None]:
from openai import OpenAI
from dotenv import load_dotenv
import os
import pandas as pd
import json
import tqdm

load_dotenv()


# instantiate openAI client with .env
client = OpenAI()

s_data_path = "../data/SCS-training-data"

# list all files in s_data_path
files = os.listdir(s_data_path)

s_data = {}


def read_jsonl(file_path):
    with open(file_path) as f:
        result = [json.loads(jline) for jline in f.read().splitlines()]
        return result


# go through all .jsonl files in files
# create valid file path (s_data_path + file)
# read in the file as a jsonl file using json library
# store object in s_data dictionary with key as file name
for file in files:
    if file.endswith(".jsonl"):
        file_path = s_data_path + "/" + file
        s_data[file.removesuffix("_train.jsonl")] = read_jsonl(file_path)
print(s_data.keys())

the_word = "".join(chr(i) for i in [115, 117, 105, 99, 105, 100, 101])

In [None]:
from forbidden_imports import *

scs_rubric = {
    "Entrapment": "Feeling like there is no exit, feeling hopeless, feeling like there is no way out, feeling afraid that things will never be normal again, feeling helpless to change, feeling trapped, feeling doomed, feeling or thinking that things will never change, feeling like there is no escape, feeling like there are no good solutions to problems.",
    "Affective Disturbance": "Emotional Pain: Having a sense of inner pain that is too much to bear, feeling relentless agonizing emotional pain, feeling a sense of inner pain that has to be stopped, feeling like the emotional pain is unbearable. Self-disgust and Self-loathing: Feeling intense deep and/or negative feelings directed toward the self, describing self with expletives. Rapid Spikes of Negative Emotion: Feeling any intense, deep, and/or quickly changing negative feelings or mood swings directed toward the self or someone else. Extreme Anxiety: Feeling nervousness or shakiness, feeling blood rushing through veins, feeling unusual physical sensations, dissociation and somatic symptoms. Acute Anhedonia: Feeling dissatisfied or bored with everything, feeling a lost interest in other people, not finding pleasure in hobbies and pastimes, not enjoying being with family or close friends.",
    "Loss of Cognitive Control": "Rumination: Focusing on one topic repeatedly, having racing or excessive thoughts about one or few topics, feeling like ideas are turning over and over in the mind and aren’t able to go away. Cognitive Rigidity: Rarely changing mind, stuck on one idea; inflexible, feeling like mind cannot be easily changed over things that are bothersome, feeling like mind can never change once a decision/conclusion has been made. Ruminative Flooding: Feeling like head could explode from too many thoughts, having a decreased ability to think, concentrate or make decisions, feeling pressure in head from thinking too much, feeling like a headache is developing from too many thoughts. Failed Thought Suppression: Wanting troubling thoughts to go away but unable to make them stop, feeling powerless to stop upsetting thoughts, feeling like it is hard to stop worrying.",
    "Hyperarousal": "Agitation: Feeling tensed or keyed up, feeling so restless that there is an inability to sit still, feeling a desire to crawl out of one’s skin, feeling so stirred up inside like wanting to scream, feeling a lot of emotional turmoil in gut. Hypervigilance: Feeling that staying alert and watchful is needed to prevent something bad from happening, constantly watching for signs of trouble, feeling that most people cannot be trusted. Irritability: Having uncontrollable temper outbursts, getting into frequent arguments, feeling easily annoyed or irritated, having a short fuse. Insomnia: Having trouble falling asleep because of uncontrollable thoughts, difficulties falling asleep, staying asleep or waking up too early, waking up from sleep tired and not refreshed, not sleeping enough hours.",
    "Social Withdrawal": "Interacting less with people who care, feeling unable to open up to family members or friends, feeling isolated from others, evading communications with people who care, pushing away caring people.NOTE: This is distinguished from perceived burdensomeness/thwarted belongingness through behavioral actions of social withdrawal, as opposed to simply having thoughts related to disconnection/burdensomeness. It should be ACUTE rather than more subacute/chronic.",
}

scs_rubric_discrete = {
    "Entrapment": [
        "feeling like there is no exit",
        "feeling hopeless",
        "feeling like there is no way out",
        "feeling afraid that things will never be normal again",
        "feeling helpless to change",
        "feeling trapped",
        "feeling doomed",
        "feeling or thinking that things will never change",
        "feeling like there is no escape",
        "feeling like there are no good solutions to problems",
    ]
}

rtfmap = {  # rubric name to file name map
    "Entrapment": "entrapment",
    "Affective Disturbance": "disturbance",
    "Loss of Cognitive Control": "control",
    "Hyperarousal": "hyperarousal",
    "Social Withdrawal": "withdrawal",
}

In [None]:
def f1_score(results):
    tp = 0
    fp = 0
    fn = 0
    for result in results:
        if result["label"] == "accept" and result["predicted_label"] == "accept":
            tp += 1
        elif result["label"] == "accept" and result["predicted_label"] == "reject":
            fn += 1
        elif result["label"] == "reject" and result["predicted_label"] == "accept":
            fp += 1
    try:
        precision = tp / (tp + fp)
        recall = tp / (tp + fn)
        f1 = 2 * (precision * recall) / (precision + recall)
        return f1
    except:
        print("F1 cant be computed becuase of division by zero")
        return 0


def accuracy_score(results):
    correct = 0
    for result in results:
        if result["label"] == result["predicted_label"]:
            correct += 1
    return correct / len(results)


def precision_score(results):
    tp = 0
    fp = 0
    for result in results:
        if result["label"] == "accept" and result["predicted_label"] == "accept":
            tp += 1
        elif result["label"] == "reject" and result["predicted_label"] == "accept":
            fp += 1
    return tp / (tp + fp)


def recall_score(results):
    tp = 0
    fn = 0
    for result in results:
        if result["label"] == "accept" and result["predicted_label"] == "accept":
            tp += 1
        elif result["label"] == "accept" and result["predicted_label"] == "reject":
            fn += 1
    return tp / (tp + fn)


def count_false_positives(results):
    fp = 0
    for result in results:
        if result["label"] == "reject" and result["predicted_label"] == "accept":
            fp += 1
    return fp


def count_false_negatives(results):
    fn = 0
    for result in results:
        if result["label"] == "accept" and result["predicted_label"] == "reject":
            fn += 1
    return fn


def count_true_positives(results):
    tp = 0
    for result in results:
        if result["label"] == "accept" and result["predicted_label"] == "accept":
            tp += 1
    return tp


def count_true_negatives(results):
    tn = 0
    for result in results:
        if result["label"] == "reject" and result["predicted_label"] == "reject":
            tn += 1
    return tn

In [None]:
def get_answer_just_GPT(string, rubric_category, reasoning_length=7):
    """
    :param string: the text to be labeled
    """
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        response_format={"type": "json_object"},
        messages=[
            {
                "role": "system",
                "content": system_message_just_gpt.format(
                    the_word=the_word,
                    rubric_category=rubric_category,
                    rubric=scs_rubric[rubric_category],
                ),
            },
            {
                "role": "user",
                "content": user_prompt_just_gpt.format(
                    string=string,
                    rubric_category=rubric_category,
                    reasoning_length=reasoning_length,
                ),
            },
        ],
    )
    content = json.loads(response.choices[0].message.content)
    label = content.get("label", "Not provided")
    reasoning = content.get("reasoning", "No reasoning provided")
    return {
        "text": string,
        "predicted_label": label,
        "reasoning": reasoning,
    }


for i in rtfmap:
    results = []
    for text in tqdm.tqdm(s_data[rtfmap[i]]):
        responseDict = get_answer_just_GPT(text["text"], i)
        results.append(
            {
                "text": text["text"],
                "label": text["answer"],
                "predicted_label": responseDict["predicted_label"],
                "reasoning": responseDict["reasoning"],
            }
        )
    print(f"F1 for {i}: {f1_score(results)}")
    json.dump(results, open(f"{i}_results.json", "w"))

In [None]:
get_answer_just_GPT("I feel stuck", "Entrapment")

In [None]:
print("F1 scores for JUST GPT")
for i in rtfmap:
    results = json.load(open(f"../data/s_data/just_gpt/{i}_results.json"))

    print(f"F1 for {i}: {f1_score(results)}")
    print(f"Accuracy for {i}: {accuracy_score(results)}")
    print(f"Precision for {i}: {precision_score(results)}")
    print(f"Recall for {i}: {recall_score(results)}")
    print(f"False Positives for {i}: {count_false_positives(results)}")
    print(f"False Negatives for {i}: {count_false_negatives(results)}")
    print()

In [None]:
def get_answer_just_GPT_strict(string, rubric_category, reasoning_length=7):
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        response_format={"type": "json_object"},
        messages=[
            {
                "role": "system",
                "content": system_message_just_gpt_strict.format(
                    the_word=the_word,
                    rubric_category=rubric_category,
                    rubric=scs_rubric[rubric_category],
                ),
            },
            {
                "role": "user",
                "content": user_prompt_just_gpt_strict.format(
                    string=string,
                    rubric_category=rubric_category,
                    reasoning_length=reasoning_length,
                ),
            },
        ],
    )
    content = json.loads(response.choices[0].message.content)
    label = content.get("label", "Not provided")
    reasoning = content.get("reasoning", "No reasoning provided")
    return {
        "text": string,
        "predicted_label": label,
        "reasoning": reasoning,
    }


for i in rtfmap:
    results = []

    for text in tqdm.tqdm(s_data[rtfmap[i]]):
        responseDict = get_answer_just_GPT_strict(text["text"], i)
        results.append(
            {
                "text": text["text"],
                "label": text["answer"],
                "predicted_label": responseDict["predicted_label"],
                "reasoning": responseDict["reasoning"],
            }
        )

    json.dump(results, open(f"../data/s_data/just_gpt_strict/{i}_results.json", "w"))

In [None]:
print("F1 scores for JUST GPT STRICT")
for i in rtfmap:
    results = json.load(open(f"../data/s_data/just_gpt_strict/{i}_results.json"))

    print(f"F1 for {i}: {f1_score(results)}")
    print(f"Accuracy for {i}: {accuracy_score(results)}")
    print(f"Precision for {i}t: {precision_score(results)}")
    print(f"Recall for {i}: {recall_score(results)}")
    print(f"False Positives for {i}: {count_false_positives(results)}")
    print(f"False Negatives for {i}: {count_false_negatives(results)}")
    print()

In [None]:
def get_answer_CoT(string, rubric_category, reasoning_length=7):
    system_message = """You are a useful lab assistant and the world's leading {the_word} researcher. Your job is to read a string of text from social media and help categorize it as {rubric_category} or not {rubric_category}. {rubric_category} is formally defined as: {scs_rubric[rubric_category]}. Now that you know what {rubric_category} is, when given a question asking whether or not a social media post is {rubric_category} you will respond with an ouput that starts with 'Answer: ', followed by point by point explanation that goes through the definiton of {rubric_category} and assesses whether the given post meets that point of the criteria. Finally, the end of your answer should start with the string '#FINAL ANSWER# ' followed by either 'accept' if the post is {rubric_category} or 'reject' if the post is not {rubric_category}. LISTEN CAREFULLY, this part is very important. Be VERY very literally in your interpretation of the rubric. It is safer to reject than to accept."""

    cot_prompt = f"Question: is the following social media post [Social media post: {sample1_cot1}] an instance of {rubric_category}? Let's think this step by step \nAnswer: {answer1_cot1}\nQuestion: is the following social media post [Social media post: {sample2_cot1}] an instance of {rubric_category}? Let's think this step by step \nAnswer: {answer2_cot1}\n"
    user_prompt = f"Question: is the following social media post [Social media post: {string}] an instance of {rubric_category}? Let's think this step by step \nAnswer:"
    user_prompt = cot_prompt + user_prompt

    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        # response_format={"type": "json_object"},
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_prompt},
        ],
    )
    content = response.choices[0].message.content
    label = content.split("#FINAL ANSWER# ")[-1].strip()
    reasoning = content.split("#FINAL ANSWER# ")[0].removeprefix("Answer: ").strip()
    # label = content.get("label", "Not provided")
    # reasoning = content.get("reasoning", "No reasoning provided")
    return {
        "text": string,
        "predicted_label": label,
        "reasoning": reasoning,
    }


for i in rtfmap:
    if i.lower() != "hyperarousal":
        continue
    results = []

    for text in tqdm.tqdm(s_data[rtfmap[i]]):
        responseDict = get_answer_CoT(text["text"], i)
        results.append(
            {
                "text": text["text"],
                "label": text["answer"],
                "predicted_label": responseDict["predicted_label"],
                "reasoning": responseDict["reasoning"],
            }
        )
    # print(f"F1 for {i}: {f1_score(results)}")
    json.dump(results, open(f"../data/s_data/cot/{i}_results.json", "w"))

In [None]:
print("F1 scores for 1st COT")
for i in rtfmap:
    results = json.load(open(f"../data/s_data/cot/{i}_results.json"))
    print(f"Accuracy for {i}: {accuracy_score(results)}")
    print(f"True Positives for {i}: {count_true_positives(results)}")
    print(f"True Negatives for {i}: {count_true_negatives(results)}")
    print(f"False Positives for {i}: {count_false_positives(results)}")
    print(f"False Negatives for {i}: {count_false_negatives(results)}")
    print(f"Precision for {i}: {precision_score(results)}")
    print(f"Recall for {i}: {recall_score(results)}")

    print(f"F1 for {i}: {f1_score(results)}")
    print()

In [None]:
def get_answer_CoT2(string, rubric_category):
    system_message = f"""
    You are a useful lab assistant and the world's leading {the_word} researcher. Your job is to read a string of text from social media and help categorize it as {rubric_category} or not {rubric_category}. {rubric_category} is formally defined as: {scs_rubric[rubric_category]}. Now that you know what {rubric_category} is, when given a question asking whether or not a social media post is {rubric_category} you will respond with an ouput that starts with 'Answer: ', followed by a lengthy and thorough point by point explanation that goes through the definiton of {rubric_category} and assesses whether the given post meets each point of the criteria. Finally, the end of your answer should start with the string '#FINAL ANSWER# ' followed by either 'accept' if the post is {rubric_category} or 'reject' if the post is not {rubric_category}. LISTEN CAREFULLY, this part is very important. Be VERY very literal in your interpretation of the rubric and the post. DO NOT read into the post beyond the words you are given (for example, using concepts such as 'tone' and 'mood' to decide the final answer). It is safer to reject than to accept."""

    cot_prompt = f"Question: is the following social media post [Social media post: {sample1_cot2}] an instance of {rubric_category}? Let's think this step by step \nAnswer: {answer1_cot2}\nQuestion: is the following social media post [Social media post: {sample2_cot2}] an instance of {rubric_category}? Let's think this step by step \nAnswer: {answer2_cot2}\n"
    user_prompt = f"Question: is the following social media post [Social media post: {string}] an instance of {rubric_category}? Let's think this step by step \nAnswer:"
    user_prompt = cot_prompt + user_prompt

    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        # response_format={"type": "json_object"},
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_prompt},
        ],
        max_tokens=4096,
    )
    content = response.choices[0].message.content
    label = content.split("#FINAL ANSWER# ")[-1].strip()
    reasoning = content.split("#FINAL ANSWER# ")[0].removeprefix("Answer: ").strip()
    # label = content.get("label", "Not provided")
    # reasoning = content.get("reasoning", "No reasoning provided")
    return {
        "text": string,
        "predicted_label": label,
        "reasoning": reasoning,
    }


for i in rtfmap:
    if i.lower() != "hyperarousal":
        continue
    results = []

    for text in tqdm.tqdm(s_data[rtfmap[i]]):
        responseDict = get_answer_CoT2(text["text"], i)
        results.append(
            {
                "text": text["text"],
                "label": text["answer"],
                "predicted_label": responseDict["predicted_label"],
                "reasoning": responseDict["reasoning"],
            }
        )
    # print(f"F1 for {i}: {f1_score(results)}")
    json.dump(results, open(f"../data/s_data/cot/{i}2_results.json", "w"))

In [None]:
print("F1 scores for 2nd COT")
for i in rtfmap:
    results = json.load(open(f"../data/s_data/cot/{i}2_results.json"))
    print(f"Accuracy for {i}: {accuracy_score(results)}")
    print(f"True Positives for {i}: {count_true_positives(results)}")
    print(f"True Negatives for {i}: {count_true_negatives(results)}")
    print(f"False Positives for {i}: {count_false_positives(results)}")
    print(f"False Negatives for {i}: {count_false_negatives(results)}")
    print(f"Precision for {i}: {precision_score(results)}")
    print(f"Recall for {i}: {recall_score(results)}")

    print(f"F1 for {i}: {f1_score(results)}")
    print()

In [None]:
def get_answer_CoT3(string, rubric_category):
    system_message = f"""
    You are a useful lab assistant and the world's leading {the_word} researcher. Your job is to read a string of text from social media and help categorize it as {rubric_category} or not {rubric_category}. {rubric_category} is formally defined as: {scs_rubric[rubric_category]}. Now that you know what {rubric_category} is, when given a question asking whether or not a social media post is {rubric_category} you will respond with an ouput that starts with 'Answer: ', followed by a lengthy and thorough point by point explanation that goes through the definiton of {rubric_category} and assesses whether the given post meets each point of the criteria. Finally, the end of your answer should start with the string '#FINAL ANSWER# ' followed by either 'accept' if the post is {rubric_category} or 'reject' if the post is not {rubric_category}. LISTEN CAREFULLY, this part is very important. Be VERY very literal in your interpretation of the rubric and the post. DO NOT read into the post beyond the words you are given (for example, using concepts such as 'tone' and 'mood' to decide the final answer). It is safer to reject than to accept. Your final decision must be based on the rubric. IMPORTANT: If atleast one point is thouroughly met, then the post is {rubric_category}. If no points are met or are met very weakly, then the post is not {rubric_category}."""

    cot_prompt = f"Question: is the following social media post [Social media post: {sample1_cot3}] an instance of {rubric_category}? Let's think this step by step \nAnswer: {answer1_cot3}\nQuestion: is the following social media post [Social media post: {sample2_cot3}] an instance of {rubric_category}? Let's think this step by step \nAnswer: {answer2_cot3}\n"
    user_prompt = f"Question: is the following social media post [Social media post: {string}] an instance of {rubric_category}? Let's think this step by step \nAnswer:"
    user_prompt = cot_prompt + user_prompt

    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        # response_format={"type": "json_object"},
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": user_prompt},
        ],
        max_tokens=4096,
    )
    content = response.choices[0].message.content
    label = content.split("#FINAL ANSWER# ")[-1].strip()
    reasoning = content.split("#FINAL ANSWER# ")[0].removeprefix("Answer: ").strip()
    # label = content.get("label", "Not provided")
    # reasoning = content.get("reasoning", "No reasoning provided")
    return {
        "text": string,
        "predicted_label": label,
        "reasoning": reasoning,
    }


for i in rtfmap:
    results = []

    for text in tqdm.tqdm(s_data[rtfmap[i]]):
        responseDict = get_answer_CoT3(text["text"], i)
        results.append(
            {
                "text": text["text"],
                "label": text["answer"],
                "predicted_label": responseDict["predicted_label"],
                "reasoning": responseDict["reasoning"],
            }
        )
    # print(f"F1 for {i}: {f1_score(results)}")
    json.dump(results, open(f"../data/s_data/cot/{i}3_results.json", "w"))

In [None]:
print("F1 scores for 3rd COT")
for i in rtfmap:
    results = json.load(open(f"../data/s_data/cot/{i}3_results.json"))

    print(f"Accuracy for {i}: {accuracy_score(results)}")
    print(f"True Positives for {i}: {count_true_positives(results)}")
    print(f"True Negatives for {i}: {count_true_negatives(results)}")
    print(f"False Positives for {i}: {count_false_positives(results)}")
    print(f"False Negatives for {i}: {count_false_negatives(results)}")
    print(f"Precision for {i}: {precision_score(results)}")
    print(f"Recall for {i}: {recall_score(results)}")

    print(f"F1 for {i}: {f1_score(results)}")
    print()