In [1]:
# experiment with binary classification with three models: gpt3.5, gpt4, and mistral
# data read from archive for 7 drug subreddits with ./read_archive_dat.ipynb

import pandas as pd
import numpy as np
import pickle
from openai import OpenAI
import time
import os
# 2.7 million rows
all_data = pickle.load(open('../data/all_data_filtered.pkl', 'rb'))

In [2]:
all_data.columns

Index(['author', 'created_utc', 'subreddit', 'title', 'selftext',
       'num_comments', 'score', 'id', 'text'],
      dtype='object')

In [6]:
all_data['created_utc2'] = pd.to_datetime(all_data['created_utc'], unit='s')
for subreddit in all_data['subreddit'].unique():
    print(subreddit)
    # print date range
    print(all_data[all_data['subreddit'] == subreddit]['created_utc2'].min())
    print(all_data[all_data['subreddit'] == subreddit]['created_utc2'].max())

benzodiazepines
2010-09-28 20:57:57
2022-12-31 23:51:00
Drugs
2008-02-19 12:24:23
2022-12-31 23:56:21
LSD
2008-05-27 00:43:44
2022-12-31 23:59:01
opiates
2010-04-19 18:30:57
2022-12-31 23:48:40
cocaine
2009-03-05 00:47:41
2022-12-31 23:52:09
kratom
2010-09-08 07:37:33
2022-12-31 23:51:54
trees
2009-10-15 19:11:17
2022-12-31 23:55:40


In [2]:
# get random sample from all_data weighted by subreddit
seed = 2025
np.random.seed(seed)
n_samples = 1000
# weighted random sample
weights = all_data['subreddit'].value_counts(normalize=True)
sample1 = all_data.sample(n=n_samples, weights=all_data['subreddit'].map(weights), random_state=seed)


In [6]:
sample2 = sample1.copy()
sample2.to_csv('../data/is_stigma/sample2.csv', index=False)

In [3]:
# another reference: https://medium.com/@amitbasuri123/using-prompt-engineering-for-classification-with-openai-api-8e6b2c0233ee
api_key = "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"
client = OpenAI(api_key = api_key)

In [4]:
# label function from elham
def label_is_stigma(sentence, sys_prompt, retries=2, model="gpt-3.5-turbo-0125"):
    example1 = "i'm really struggling to find a job. every time i get to the point in the interview where they ask about my gap in employment and i'm honest about my time in rehab, i can see the change in their demeanor. it's like suddenly i'm not a qualified candidate anymore. i know i've made mistakes, but i've worked hard to overcome them. it's so frustrating that this part of my past keeps holding me back."
    answer1 = "stigma"
    example2 = "Got caught Today, while cleaning out my room my mom found less than a G of some weed in a pill bottle. She asked me how many times I've done it and I said 2 even though it was a lie.  She just looked at me with utter disappointment and almost started crying.  What do I do? "
    answer2 = "no stigma"
    example3 = "I mean there’s a time in my life where I’d be paranoid about sitting around other people’s possessions you know ‘cause if anything went missing generally nine out of ten people in the room would be dismissed and I’d get the blame ... there’s a lot of discomfort within yourself after coming out of that lifestyle or existence really. "
    answer3 = "stigma"
    while retries > 0:
        try:
            response = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": sys_prompt,
                },
                {
                    "role": "user",
                    "content": example1,
                },
                {
                    "role": "assistant",
                    "content": answer1,
                },
                {
                    "role": "user",
                    "content": example2,
                },
                {
                    "role": "assistant",
                    "content": answer2,
                },
                {
                    "role": "user",
                    "content": example3,
                },
                {
                    "role": "assistant",
                    "content": answer3,
                },
                {
                    "role": "user",
                    "content": sentence,
                }
            ],
            model=model,
            temperature=0
        )
            label = response.choices[0].message.content.lower().strip()
            return label
        except Exception as e:
            if e:
                print(e)
                retries -= 1
                time.sleep(5)
            else:
                raise e
    print("Retrying...")
    return "skipped"
        

In [7]:
sys_prompt = f"""
You will be provided with a Reddit post.\
The post will be from a subreddit about a specific drug.\
Your task is to determine if a post contains a reference to a stigmatizing experience around the author's drug use or \
if the post contains stigmatizing language. Stigma can be implicit or explicit.\
There are four main types of stigma, including public stigma, self-stigma, associated stigma, and structural stigma.\
Sometimes, stigma can be anticipated if the author is describing a negative feeling that may be associated with drug use.\
If you believe the post contains stigmatizing language or a stigmatizing experience, please label it as "stigma".\
If you believe the post does not contain stigmatizing language or a stigmatizing experience, please label it as "no stigma".\
Please provide your answer in the form of a single word: "stigma" or "no stigma".\
"""



In [11]:
# get model response - gpt3.5
sample1['gpt3.5'] = sample1['text'].apply(lambda x: label_is_stigma(x, sys_prompt, model="gpt-3.5-turbo-0125"))

In [16]:
print(sample1['gpt3.5'].value_counts())

no stigma    635
stigma       365
Name: gpt3.5, dtype: int64


In [12]:
# filter on stigma for evaluation ~ 365 posts were labeled as stigma - seems a lot
gpt3_5_stigma = sample1[sample1['gpt3.5'] == 'stigma']
gpt3_5_stigma.to_csv('../data/gpt3_5_stigma.csv')

In [11]:
# modifying prompt to include definition of stigma
sys_prompt2 = f"""
You are an expert annotator. Read the input text and complete the task.\
Your task is to determine if a post contains a reference to a stigmatizing experience around the author's drug use or \
if the post contains stigmatizing language. Stigma can be implicit or explicit.\
There are four main types of stigma, including public stigma, self-stigma, associated stigma, and structural stigma.\
Self-stigma is the internalization of negative stereotypes about oneself. it can include expressions of shame or self-blame for their drug use or Feeling like they are a failure or have let others down. \
Public stigma, or social stigma, is the negative attitudes and beliefs that the general public holds about people who use drugs. This can also be 'enacted stigma' when a person acts as a result of social stigma. can manifest as discrimination, prejudice, and misinformation. \
Associated stigma is the stigma that is attached to the people who are associated with the person who uses drugs. where friends and family members of drug users are also judged or ostracized.\
Structural stigma is the societal-level conditions, cultural norms, and institutional practices that constrain the opportunities, resources, and well-being of the stigmatized.\
Sometimes, stigma can be anticipated if the author is describing a negative feeling that may be associated with drug use as a result of internalized stigma and public stigma.\
If you believe the post contains stigmatizing language or a stigmatizing experience, please label it as "stigma".\
If you believe the post does not contain stigmatizing language or a stigmatizing experience, please label it as "no stigma".\
Please provide your answer in the form of a single word: "stigma" or "no stigma".\
"""

In [14]:
sample1['gpt3.5_prompt2'] = sample1['text'].apply(lambda x: label_is_stigma(x, sys_prompt2, model="gpt-3.5-turbo-0125"))

In [15]:
print(sample1['gpt3.5_prompt2'].value_counts())

no stigma    595
stigma       405
Name: gpt3.5_prompt2, dtype: int64


In [17]:
# gpt4
sample1['gpt4'] = sample1['text'].apply(lambda x: label_is_stigma(x, sys_prompt, model="gpt-4-0125-preview"))

print(sample1['gpt4'].value_counts())   

no stigma    942
stigma        58
Name: gpt4, dtype: int64


In [14]:
sample1['gpt4_prompt2'] = sample1['text'].apply(lambda x: label_is_stigma(x, sys_prompt2, model="gpt-4-0125-preview"))

print(sample1['gpt4_prompt2'].value_counts())

no stigma                                                                                                        953
stigma                                                                                                            43
i'm sorry, but i can't provide assistance with that request.                                                       1
i'm sorry, but i can't provide an analysis without more context or content from the post you're referring to.      1
i'm sorry, but i can't fulfill this request.                                                                       1
i'm sorry, but i can't assist with that request.                                                                   1
Name: gpt4_prompt2, dtype: int64


In [15]:
sample1.to_csv('../data/is_stigma/sample1.csv')

In [None]:
# a function to run is_stigma on a sample of 1k posts until there are at least 1k posts labeled as stigma
total_stigma = 0
while total_stigma <= 1000:
    # make sure to get a new sample each time, no replacement
    sample = all_data.sample(n=n_samples, weights=all_data['subreddit'].map(weights), replace=False)
    sample['gpt4'] = sample['text'].apply(lambda x: label_is_stigma(x, sys_prompt, model="gpt-4-0125-preview"))
    total_stigma = sample['gp4'].value_counts()['stigma']
    print(total_stigma)
    print(sample1['gpt4'].value_counts())

In [16]:
# try with a new prompt
prompt3 = f"""
You are an expert annotator. Read the input text and complete the task.\
Your task is to determine if a post contains language that is stigmatizing towards drug use or users \
or if the post is related to a stigmatizing experience around the author's drug use.\
Stigma can be implicit or explicit.\
According to Link and Phelan (2001), stigma is a social process that involves labeling, stereotyping, separation, status loss, and discrimination.\
More specficially, "stigma exists when the following interrelated components converge. In the first component, people distinguish and label human differences.\
In the second, dominant cultural beliefs link labeled persons to undesirable characteristics—to negative stereotypes. \
In the third, labeled persons are placed in distinct categories so as to accomplish some degree of separation of “us” from “them.” \
In the fourth, labeled persons experience status loss and discrimination that lead to unequal outcomes.\
Finally, stigmatization is entirely contingent on access to social, economic, and political power that allows the identification of differentness, the construction of stereotypes, the separation of labeled persons into distinct categories, and the full execution of disapproval, rejection, exclusion, and discrimination."\
Based on this attribution, if you believe the post contains stigmatizing language or a stigmatizing experience, please label it as "stigma".\
If you believe the post does not contain stigmatizing language or a stigmatizing experience, please label it as "no stigma".\
Please provide your answer in the form of a single word: "stigma" or "no stigma".\
"""

In [28]:
prompt3a = f"""
You are an expert annotator. Read the input text carefully and complete the task.
**Task:** Determine if a post contains language that stigmatizes drug use or users, or if the post describes a stigmatizing experience related to the author's drug use. Consider both direct statements and subtle implications.
**What is stigma?**
* Stigma is a negative mark of disgrace associated with a particular circumstance, quality, or person.
* Stigma in the context of drug use often involves:
    * **Labeling:** Using derogatory terms for people who use drugs (e.g., "junkie," "addict").
    * **Stereotyping:** Assuming all people who use drugs are dangerous, irresponsible, etc.
    * **Separation:** Creating an "us vs. them" mentality, ostracizing people who use drugs.
    * **Status loss:**  Viewing people who use drugs as less valuable members of society.
    * **Discrimination:** Treating people who use drugs unfairly in areas like housing, employment, or healthcare.
**Examples:**
* **Stigma:** "Drug addicts are a scourge on our community. They need to be locked up."
* **Stigma:** "I realize my medication regiment is high however, I believe I am an adult and I have the right to choose what goes in to my bod, especially with a supportive psychiatrist on my side.Lately, the pharmacy has been extremely disrespectful to me due to the medications I take. Lately, I tried Ambien CR, which worked miserably, and I wanted to switch back to my sonata. The pharmacist would not let me until the Ambien script was up due to the fact that they were in the same class of drugs. I felt very discriminated against and I thought it was quite disrespectful.My parents also consistently questioning the medications I take and in a given week probably ask me‚ÄúDo you really need to take all of those medications?‚Äù least three or four times.I am having a bit of a psychological dilemma."
* **Stigmatizing Experience:**  "I lost my job after they found out I was in rehab. It felt like everyone at work was judging me."
**Instructions:**
* If you believe the post contains stigmatizing language or a stigmatizing experience, label it as "stigma".
* If you believe the post does not contain these elements, label it as "no stigma".
* Provide your answer as a single word: "stigma" or "no stigma".
"""

In [5]:
prompt3b = f"""
You are an expert annotator. Read the input text carefully and complete the task.

**Task:** Determine if a post contains language that stigmatizes drug use or users, or if the post describes a stigmatizing experience related to the author's drug use. Consider both direct statements and subtle implications.

**What is stigma?**

* Stigma is a negative mark of disgrace associated with a particular circumstance, quality, or person.
* Stigma in the context of drug use often involves:
    * **Labeling:** Using derogatory terms for people who use drugs (e.g., "junkie," "addict").
    * **Stereotyping:** Assuming all people who use drugs are dangerous, irresponsible, etc. 
    * **Separation:** Creating an "us vs. them" mentality, ostracizing people who use drugs.
    * **Status loss:**  Viewing people who use drugs as less valuable members of society.
    * **Discrimination:** Treating people who use drugs unfairly in areas like housing, employment, or healthcare.

**Important Notes:**

* **Focus on Intent:** Stigmatizing language aims to degrade, shame, or perpetuate negative stereotypes about people who use drugs. 
* **Humor vs. Stigma:** Jokes or memes, even if they mention drugs, do not automatically qualify as stigma. Look for language that goes beyond humor and targets individuals or groups with harmful labels or assumptions.
* **Context Matters:** Consider the overall tone and purpose of the post. Is it intended to inform, share an experience, or genuinely promote negativity towards people who use drugs?

**Examples:**

* **Stigma:** "Drug addicts are a scourge on our community. They need to be locked up."
* **Stigmatizing Experience:** "I realize my medication regiment is high however, I believe I am an adult and I have the right to choose what goes in to my bod, especially with a supportive psychiatrist on my side.Lately, the pharmacy has been extremely disrespectful to me due to the medications I take. Lately, I tried Ambien CR, which worked miserably, and I wanted to switch back to my sonata. The pharmacist would not let me until the Ambien script was up due to the fact that they were in the same class of drugs. I felt very discriminated against and I thought it was quite disrespectful.My parents also consistently questioning the medications I take and in a given week probably ask me‚ÄúDo you really need to take all of those medications?‚Äù least three or four times.I am having a bit of a psychological dilemma." 
* **Stigmatizing Experience:**  "I lost my job after they found out I was in rehab. It felt like everyone at work was judging me."
* **No Stigma:** "Txt him needing an 1/8th says he's got u 2day over 48hrs l8r only rcv'd "Yeah" in reply - Scumbag Steve | Meme Generator "

**Instructions:**

* If you believe the post contains stigmatizing language or a stigmatizing experience, label it as "stigma".
* If you believe the post does not contain these elements, label it as "no stigma".
* Provide your answer as a single word: "stigma" or "no stigma". 
"""


In [19]:
sample1['gpt4_prompt3'] = sample1['text'].apply(lambda x: label_is_stigma(x, prompt3, model="gpt-4-0125-preview"))

In [20]:
print(sample1['gpt4_prompt3'].value_counts())

no stigma                                                                                                                                                                                             969
stigma                                                                                                                                                                                                 28
i'm sorry, but it seems like your message was cut off before you could provide the full context or details of your situation. could you please provide more information or clarify your statement?      1
i'm sorry, but i can't fulfill this request.                                                                                                                                                            1
i'm sorry, but it seems like your message got cut off before you could finish your thought. could you provide more details or clarify your message?                                             

In [21]:
sample1.to_csv('../data/is_stigma/sample1.csv')

In [None]:
sample1['gpt4_prompt3a'] = sample1['text'].apply(lambda x: label_is_stigma(x, prompt3a, model="gpt-4-0125-preview"))
print(sample1['gpt4_prompt3a'].value_counts())

no stigma    981
stigma        19
Name: gpt4_prompt3a, dtype: int64


In [None]:
sample1.to_csv('../data/is_stigma/sample1.csv')

In [None]:
sample1['gpt4_prompt3b'] = sample1['text'].apply(lambda x: label_is_stigma(x, prompt3b, model="gpt-4-0125-preview"))
print(sample1['gpt4_prompt3b'].value_counts())

no stigma    991
stigma         9
Name: gpt4_prompt3b, dtype: int64


In [None]:
sample1.to_csv('../data/is_stigma/sample1.csv')

In [7]:
# experiment with prompt3b on a new sample of 1k
sample2["gpt4_prompt3b"] = sample2["text"].apply(lambda x: label_is_stigma(x, prompt3b, model="gpt-4-0125-preview"))
print(sample2["gpt4_prompt3b"].value_counts())

no stigma                                                                                                                                                                                        994
stigma                                                                                                                                                                                             5
insufficient information provided to determine if the post contains stigmatizing language or describes a stigmatizing experience related to drug use. please provide more context or details.      1
Name: gpt4_prompt3b, dtype: int64


In [8]:
sample2.to_csv('../data/is_stigma/sample2.csv')

In [10]:
prompt3c = f"""
You are an expert sociologist studying addiction and stigma. Read the input text carefully and complete the task.

**Task:** Determine if the post describes a stigmatizing experience related to the author's drug use. Consider both direct statements and subtle implications.

**What is stigma?**

* Stigma is a negative mark of disgrace associated with a particular circumstance, quality, or person.
* Stigma in the context of drug use can often be characterized by:
    * **Labeling:** Using derogatory terms for people who use drugs (e.g., "junkie," "addict").
    * **Stereotyping:** Assuming all people who use drugs are dangerous, irresponsible, etc. 
    * **Separation:** Creating an "us vs. them" mentality, ostracizing people who use drugs.
    * **Status loss:**  Viewing people who use drugs as less valuable members of society.
    * **Discrimination:** Treating people who use drugs unfairly in areas like housing, employment, or healthcare.
* Stigma can often be the internalization of negative stereotypes (public stigma) about oneself, leading to feelings of shame, self-blame, or failure. Affected individuals will then exclude themselves from public life, for example, by failing to apply for work or by removing themselves from public sight; or they will cease to see themselves as responsible citizens; or they will begin to see themselves as legitimate objects of the treatment meted out to them. Above all, they will be motivated to continue to consume in order to forget, set aside, or reduce the negative feelings arising from their shame (Looping effect).
* Look for signs of the Shame Condition in an individual's narrative: part of what he calls a twin normative failure model of addiction. In addition to the normative failure of effective agency and loss of control (common to most accounts), the affected person, in so far as she recognizes her repeated failures," ... cannot pass her own survey"

**Important Notes:**

* **Focus on Intent:** Stigmatizing language aims to degrade, shame, or perpetuate negative stereotypes about people who use drugs. 
* **Humor vs. Stigma:** Jokes or memes, even if they mention drugs, do not automatically qualify as stigma. Look for language that goes beyond humor and targets individuals or groups with harmful labels or assumptions.
* **Context Matters:** Consider the overall tone and purpose of the post. Is it intended to inform, share an experience, or genuinely promote negativity towards people who use drugs?

**Examples:**

* **Stigmatizing Experience:** "I realize my medication regiment is high however, I believe I am an adult and I have the right to choose what goes in to my bod, especially with a supportive psychiatrist on my side.Lately, the pharmacy has been extremely disrespectful to me due to the medications I take. Lately, I tried Ambien CR, which worked miserably, and I wanted to switch back to my sonata. The pharmacist would not let me until the Ambien script was up due to the fact that they were in the same class of drugs. I felt very discriminated against and I thought it was quite disrespectful.My parents also consistently questioning the medications I take and in a given week probably ask me‚ÄúDo you really need to take all of those medications?‚Äù least three or four times.I am having a bit of a psychological dilemma." 
* **Stigmatizing Experience:**  "I lost my job after they found out I was in rehab. It felt like everyone at work was judging me."
* **No Stigma:** "Txt him needing an 1/8th says he's got u 2day over 48hrs l8r only rcv'd "Yeah" in reply - Scumbag Steve | Meme Generator "

**Instructions:**

* If you believe the post contains stigmatizing language or a stigmatizing experience, label it as "stigma".
* If you believe the post does not contain these elements, label it as "no stigma".
* Provide your answer as a single word: "stigma" or "no stigma".
"""

In [11]:
sample2["gpt4_prompt3c"] = sample2["text"].apply(lambda x: label_is_stigma(x, prompt3c, model="gpt-4-0125-preview"))
print(sample2["gpt4_prompt3c"].value_counts())


no stigma                                                       994
stigma                                                            4
i'm sorry, but i can't provide assistance with that request.      1
insufficient information                                          1
Name: gpt4_prompt3c, dtype: int64


In [12]:
sample2.to_csv('../data/is_stigma/sample2.csv')

In [31]:
prompt4 = f"""
You are an expert annotator. Read the input text carefully and complete the task.

**Task:** Determine if a post contains language that stigmatizes drug use or users, or if the post describes a stigmatizing experience related to the author's drug use. Consider both direct statements and subtle implications.

**What is stigma?**

* Stigma is a negative mark of disgrace associated with a particular circumstance, quality, or person.
* Stigma in the context of drug use often involves:
    * **Labeling:** Using derogatory terms (e.g., "junkie," "addict").
    * **Stereotyping:** Assuming all people who use drugs are dangerous, irresponsible, etc. 
    * **Separation:** Creating an "us vs. them" mentality, ostracizing people who use drugs.
    * **Status loss:**  Viewing people who use drugs as less valuable members of society.
    * **Discrimination:** Treating people who use drugs unfairly in areas like housing, employment, or healthcare.

**Types of Stigma: **

We are interested in the different ways stigma is expressed and experienced.

* **Experiential Stigma:** Focused on how people with a stigmatized attribute perceive and experience negativity.
    * **Perceived:**  Belief that prejudice and discrimination exist.
    * **Anticipated:** Fear of being devalued or discriminated against.
    * **Received:**  Direct experience of prejudice and discrimination.
* **Action Stigma:** Focused on who is inflicting the stigma.
    * **Self-stigma:**  Internalizing negative beliefs about oneself.
    * **Public Stigma:**  Stigma perpetuated by society at large.
    * **Structural Stigma:** Stigma embedded in institutions, laws, and policies. 

**Instructions:**

* If you believe the post contains stigmatizing language or a stigmatizing experience, label it as "stigma".
* If you believe the post does not contain these elements, label it as "no stigma".
*  **Optional:** In the future, we may ask follow-up questions to delve deeper into the specific types of stigma identified.

**Examples:**

* **Stigma:** "Drug addicts are a scourge on our community. They need to be locked up."
* **Stigmatizing Experience:** "I realize my medication regiment is high however, I believe I am an adult and I have the right to choose what goes in to my bod, especially with a supportive psychiatrist on my side.Lately, the pharmacy has been extremely disrespectful to me due to the medications I take. Lately, I tried Ambien CR, which worked miserably, and I wanted to switch back to my sonata. The pharmacist would not let me until the Ambien script was up due to the fact that they were in the same class of drugs. I felt very discriminated against and I thought it was quite disrespectful.My parents also consistently questioning the medications I take and in a given week probably ask me‚ÄúDo you really need to take all of those medications?‚Äù least three or four times.I am having a bit of a psychological dilemma." 
* **Stigmatizing Experience:**  "I lost my job after they found out I was in rehab. It felt like everyone at work was judging me."
* **No Stigma:** "Txt him needing an 1/8th says he's got u 2day over 48hrs l8r only rcv'd "Yeah" in reply - Scumbag Steve | Meme Generator "

**Follow-up Questions (if labeled 'stigma')**

* **Primary Type:** Which stigma type is MOST prominent? Choose one: Perceived, Anticipated, Received, Self, Public, Structural 
* **Brief Explanation:** Provide a short reason for your choice.

**Additional Notes:**

* Humor or memes referencing drugs don't automatically qualify as stigma. Look for language targeting individuals or groups with harmful stereotypes or assumptions.
* The overall tone and purpose of the post matters. 
"""

In [32]:
sample1['gpt4_prompt4'] = sample1['text'].apply(lambda x: label_is_stigma(x, prompt4, model="gpt-4-0125-preview"))
print(sample1['gpt4_prompt4'].value_counts())

no stigma                                                                                                                                              986
stigma                                                                                                                                                  13
i'm sorry, but it seems there might have been a misunderstanding regarding the task. could you please provide more details or clarify your request?      1
Name: gpt4_prompt4, dtype: int64


In [33]:
sample1.to_csv('../data/is_stigma/sample1.csv')

In [36]:
prompt5 = f"""
You are an expert annotator. Read the input text carefully and complete the task.

**Study Purpose:**

We aim to identify posts on Reddit that contain stigma related to drug use. This understanding will help us develop interventions to reduce stigma and its harmful effects.

**Task:** Determine if a post contains language that stigmatizes drug use or users, or if the post describes a stigmatizing experience related to the author's drug use. Consider both direct statements and subtle implications.

**What is stigma? Definitions**
* **Experiential Stigma:** Focused on how people with a stigmatized attribute perceive and experience negativity.
    * **Perceived:**  Belief that prejudice and discrimination exist.
    * **Endorsed:**  Agreement with existing stereotypes about the group. 
    * **Anticipated:**  Fear of being devalued and discriminated against.
    * **Received:**  Direct experience of prejudice and discrimination.
    * **Enacted:**  The act of discriminating against someone due to the stigmatized attribute.  
* **Action Stigma:** Focused on who is inflicting the stigma.
    * **Self-stigma:**  Internalizing negative beliefs about oneself.
    * **Public Stigma:**  Stigma perpetuated by society at large.
    * **Structural Stigma:** Stigma embedded in institutions, laws, and policies. 

**Examples:**

* **Stigma:** "Drug addicts are a scourge on our community. They need to be locked up."
* **Stigmatizing Experience:** "I realize my medication regiment is high however, I believe I am an adult and I have the right to choose what goes in to my bod, especially with a supportive psychiatrist on my side.Lately, the pharmacy has been extremely disrespectful to me due to the medications I take. Lately, I tried Ambien CR, which worked miserably, and I wanted to switch back to my sonata. The pharmacist would not let me until the Ambien script was up due to the fact that they were in the same class of drugs. I felt very discriminated against and I thought it was quite disrespectful.My parents also consistently questioning the medications I take and in a given week probably ask me‚ÄúDo you really need to take all of those medications?‚Äù least three or four times.I am having a bit of a psychological dilemma." 
* **Stigmatizing Experience:**  "I lost my job after they found out I was in rehab. It felt like everyone at work was judging me."
* **No Stigma:** "Txt him needing an 1/8th says he's got u 2day over 48hrs l8r only rcv'd "Yeah" in reply - Scumbag Steve | Meme Generator "

**Instructions:**

* If you believe the post contains stigmatizing language or a stigmatizing experience, label it as "stigma".
* If you believe the post does not contain these elements, label it as "no stigma".
* Provide your answer as a single word: "stigma" or "no stigma". 

**Additional Notes:**

* **Focus on Intent:** Stigmatizing language aims to degrade, shame, or perpetuate negative stereotypes about people who use drugs. 
* **Humor vs. Stigma:** Jokes or memes, even if they mention drugs, do not automatically qualify as stigma. Look for language that goes beyond humor and targets individuals or groups with harmful labels or assumptions.
* **Context Matters:** Consider the overall tone and purpose of the post. Is it intended to inform, share an experience, or genuinely promote negativity towards people who use drugs?
"""

In [37]:
sample1['gpt4_prompt5'] = sample1['text'].apply(lambda x: label_is_stigma(x, prompt5, model="gpt-4-0125-preview"))
print(sample1['gpt4_prompt5'].value_counts())

no stigma    990
stigma        10
Name: gpt4_prompt5, dtype: int64


In [38]:
sample1.to_csv('../data/is_stigma/sample1.csv')

In [40]:
prompt6 = """
You are an expert sociologist. Read the input text carefully and complete the task.
Objective: Classify each post from specified drug-related subreddits as either 'stigma' or 'no stigma'. A post is labeled 'stigma' if it contains any form of stigmatizing language or narratives that depict stigmatizing experiences, directly or indirectly, towards individuals who use drugs. A 'no stigma' post lacks any utterances of stigma around drug use.

Stigma Attributes (Based on Link and Phelan, 2001):
1. Labeling: The use of derogatory or pejorative terms to describe individuals who use drugs (e.g., "junkie", "addict").
2. Stereotyping: Assigning generalized negative traits to drug users, such as being dangerous or irresponsible.
3. Separation: Promoting an 'us versus them' mentality, segregating drug users from the rest of society.
4. Status Loss: Depicting drug users as lesser or degraded members of society.
5. Discrimination: Describing or endorsing unfair treatment of drug users in various societal aspects such as employment, healthcare, or legal matters.

Contextual Awareness: Ensure that the classification remains sensitive to context, particularly in cases where humor is used. Posts meant as humor should be analyzed carefully to discern whether they perpetuate stigma subtly.

Examples for Training:
- Stigmatizing Example: "Drug addicts are a scourge on our community. They need to be locked up."
- Stigmatizing Experience: "Lately, the pharmacy has been extremely disrespectful to me due to the medications I take. I felt very discriminated against."
- Non-Stigmatizing Example: "Txt him needing an 1/8th says he's got u 2day over 48hrs l8r only rcv'd 'Yeah' in reply - Scumbag Steve | Meme Generator."

Classification Rationale: This binary classification aims to initially identify posts containing stigmatizing content. Identified posts will undergo further analysis to explore and understand the nature and context of stigma, aiding in developing more nuanced treatment and intervention strategies.
"""

In [41]:
sample1['gpt4_prompt6'] = sample1['text'].apply(lambda x: label_is_stigma(x, prompt6, model="gpt-4-0125-preview"))
print(sample1['gpt4_prompt6'].value_counts())

no stigma                                                                                                                                                                                                                                                                                                                    981
stigma                                                                                                                                                                                                                                                                                                                        11
insufficient information provided for classification.                                                                                                                                                                                                                                                                          1
i'm sorry, but i can't provide assist

In [42]:
sample1.to_csv('../data/is_stigma/sample1.csv')

In [43]:
prompt7 = """
Analyze the following Reddit post to determine if it contains any language indicative of stigma related to drug use or other personal issues. 
Label the response as 'no stigma' if no stigma-related language is present. If stigma is detected, specify the type(s) of stigma: perceived, endorsed, 
anticipated, received, and enacted. Provide a brief explanation for each type identified, citing specific parts of the text as evidence.

Definitions:
- Perceived Stigma: Awareness of societal negative stereotypes.
- Endorsed Stigma: Agreement with societal negative stereotypes.
- Anticipated Stigma: Expectation of being judged based on stereotypes.
- Received Stigma: Personal experiences of discrimination.
- Enacted Stigma: Actions reflecting stigmatizing beliefs.

Classify the text accordingly and explain your reasoning.
"""

In [44]:
sample1['gpt4_prompt7'] = sample1['text'].apply(lambda x: label_is_stigma(x, prompt7, model="gpt-4-0125-preview"))

In [45]:
sample1.to_csv('../data/is_stigma/sample1.csv')

---

In [2]:
# mistral
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda"

model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")


Loading checkpoint shards: 100%|██████████| 3/3 [00:33<00:00, 11.32s/it]


In [32]:
# apply message template for stigma
def label_is_stigma_mistral(sentence, sys_prompt, model, tokenizer, retries=2):
    example1 = "i'm really struggling to find a job. every time i get to the point in the interview where they ask about my gap in employment and i'm honest about my time in rehab, i can see the change in their demeanor. it's like suddenly i'm not a qualified candidate anymore. i know i've made mistakes, but i've worked hard to overcome them. it's so frustrating that this part of my past keeps holding me back."
    answer1 = "stigma"
    example2 = "it was day 8 and i couldn’t sleep from withdrawals, even popped a bunch of flu nighttime medication."
    answer2 = "no stigma"
    messages = [
        {"role": "user", "content": sys_prompt + "For example:" + example1},
        {"role": "assistant", "content": answer1},
        {"role": "user", "content": example2},
        {"role": "assistant", "content": answer2},
        {"role": "user", "content": sentence}
    ]
    while retries > 0:
        try:
            # inputs = tokenizer.encode(sys_prompt + example1 + answer1 + example2 + answer2 + sentence, return_tensors="pt", max_length=512, truncation=True, padding=True)
            inputs = tokenizer.apply_chat_template(messages, return_tensors="pt")
            model_inputs = inputs.to(device)
            model.to(device)
            outputs = model.generate(model_inputs, max_new_tokens=10, do_sample = True)
            label = tokenizer.decode(outputs, skip_special_tokens=True)
            print(label[0])
            return label[0]
        except Exception as e:
            if e:
                print(e)
                retries -= 1
                time.sleep(5)
            else:
                raise e
    print("Retrying...")
    return "skipped"

In [33]:
test = sample1['text'].iloc[0]
print(test)
print(label_is_stigma_mistral(test, sys_prompt, model, tokenizer))

All nighter followed up with an 10+ hour shift Who's with me??
Torch not compiled with CUDA enabled
Torch not compiled with CUDA enabled
Retrying...
skipped


In [3]:
# experiment with elham code for mistral and phi
import requests

API_TOKEN = "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" #your HF API Token here
headers = {"Authorization": f"Bearer {API_TOKEN}"}
# model = "microsoft/Phi-3-mini-4k-instruct"
model = "mistralai/Mistral-7B-Instruct-v0.2"
API_URL = f"https://api-inference.huggingface.co/models/{model}"

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

sys_prompt = "You are a helpful AI assistant."

example1 = "i'm really struggling to find a job. every time i get to the point in the interview where they ask about my gap in employment and i'm honest about my time in rehab, i can see the change in their demeanor. it's like suddenly i'm not a qualified candidate anymore. i know i've made mistakes, but i've worked hard to overcome them. it's so frustrating that this part of my past keeps holding me back."
answer1 = "stigma"

# prompt = "Your prompt here"

data = query(
    {
        "inputs": f"""<|system|>
                  {sys_prompt}<|end|>
                  <|user|>
                  {example1}<|end|>
                  <|assistant|>
                  {answer1}<|end|>
                  <|user|>
                  {sys_prompt2}<|end|>
                  <|assistant|>"""
    }
)

print(data[0]['generated_text'].split('<|assistant|>')[-1].strip())

KeyError: 0