In [2]:
import pandas as pd
import pickle
import numpy as np
import os
euphoria_drug = pickle.load(open("data/euphoria_posts_drugs.pkl", "rb"))

In [None]:
experience_prompt = """
Label the Reddit post either 0 or 1 if the post author describes a personal experience with drugs.
Example post 1:
'A friend turned me on to this show while I was tripping on ketamine and mushrooms. So long, reality.  I just texted her a pic of Rue saying "You fucking did this to me." Lmfao. Bye, universe.'
is_personal = 1
Example post 2:
'Just started watching the show and I’m loving it so much.  I can relate to Rue the most bc I’m an addict.  Is this an actually what it is like to be in high school now?  I don’t mean the sex, drugs, and partying outside of school but all the other craziness that seems to be happening under all the teachers/principles noses?  Can’t wait to keep watching because it hooked almost immediately.  As soon as it started I loved the show'
is_personal = 1
Example post 3:
'whats the song that plays at the ending of episode 04 season 2? you know that ending scene where we see all the main characters and their current situation in symbolically cinematographic pictures, cassie with the flowers, rue in the church, kat in the car trapped with ethan... couldn't find that song on spotify or via google music. thank you for your helpfull answers!'
is_personal = 0

constrain your answer to 0 or 1.
"""

In [None]:
import time
from openai import OpenAI

api_key = ""
client = OpenAI(api_key = api_key)

def label_post(post, prompt, retries=2, model="gpt-4-0125-preview"):
    prompt = prompt + "\n" + post + "\n"
    retries = retries
    while retries > 0:
        try:
            response = client.chat.completions.create(
                        model= model,
                        messages=[
                            {
                             "role": "user",
                             "content": prompt
                            },
                        ],
                        temperature=0
                    )
            label = response.choices[0].message.content
            #print (label.lower().strip())
            return label.lower().strip()
        except Exception as e:
            if e:
                print(e)
                print('Timeout error, retrying...')
                retries -= 1
                time.sleep(5)
            else:
                raise e
    print('API is not responding, moving on...')
    bad_api = "x"
    return bad_api

In [15]:
euphoria_drug.columns

Index(['author', 'created_utc', 'subreddit', 'title', 'selftext',
       'num_comments', 'score', 'id'],
      dtype='object')

In [16]:
euphoria_drug['all_text'] = euphoria_drug['title'] + " " + euphoria_drug['selftext']

In [None]:
euphoria_drug['is_personal'] = euphoria_drug['all_text'].apply(lambda x: label_post(x, experience_prompt))

In [None]:
# get freq of experiences posts
euphoria_drug['is_personal'].value_counts()

0                         1639
1                          106
yes, you got it right.       1
Name: is_personal, dtype: int64

In [None]:
euphoria_drug_personal = euphoria_drug[euphoria_drug['is_personal']=='1']

In [None]:
euphoria_drug_personal['all_text'].values[:5]

array(['I watched the first episode high as fuck. I think I fell in love with this show when she was talking about how when doing drugs the world gets quiet and fades away and your breath goes away, and then you breathe and it all comes back. \n\nReally awesome. Can’t wait for episode 2',
       'Does anyone else think Angus Cloud looks like Mac Miller? Just saw episode one. Loved it. Loved the accuracy. I’m an addict and this is the most relatable show I’ve seen in a while. But one thing that bugged me is how much fezzz or whatever his name is (the chill drug dealer guy) looks like Mac Miller. \n\nI literally thought it was Mac and then had to remember that he’s dead. Not something I wanted to deal with again. But also happy to remember. Anyway this post is pointless I just wonder if any of you had the same thought as me or if I smoked too much weed before I watched that episode.',
       'Wow this show really hits... I [21M] have really been enjoying this, I was a little skeptical at

In [None]:
pickle.dump(euphoria_drug_personal, open("data/euphoria_posts_drugs_labeled.pkl", "wb"))

In [None]:
euphoria_drug_personal.to_csv("excel/euphoria_posts_drugs_personal.csv")

In [None]:
print("num unique users", euphoria_drug_personal.author.nunique())
print('average post length:', euphoria_drug_personal.all_text.str.len().mean())

num unique users 100
average post length: 1253.4150943396226


In [17]:
# id not in euphoria drug
e_d_no_personal = euphoria_drug[~euphoria_drug['id'].isin(euphoria_drug_personal['id'])]
# print("num unique users", e_d_no_personal.author.nunique())
print('average post length:', e_d_no_personal.all_text.str.len().mean())

average post length: 247.2219512195122


----
TOPIC MODELING

In [None]:
import nltk
nltk.download('stopwords')
import re
from nltk.corpus import stopwords
from bertopic import BERTopic
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from bertopic.representation import KeyBERTInspired

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
# del vectorizer_model
# del representation_model
# del topic_model

In [None]:
vectorizer_model = CountVectorizer(stop_words="english", min_df=2, ngram_range=(1, 2))
vectorizer_model.fit(euphoria_drug_personal['all_text'])

representation_model = KeyBERTInspired()
# topic model
topic_model = BERTopic(language="english",
                       embedding_model="all-MiniLM-L6-v2",
                       vectorizer_model = vectorizer_model,
                       representation_model = representation_model,
                       verbose=True)
topics, probs = topic_model.fit_transform(euphoria_drug_personal['all_text'])

In [None]:
# workaraound for low num of docs
bertopic_docs = euphoria_drug_personal['all_text'].to_list()
if len(bertopic_docs) < 300:
  bertopic_docs.extend(bertopic_docs)

In [None]:
topics, probs = topic_model.fit_transform(bertopic_docs)

In [None]:
# visualize topics
topic_model.visualize_topics()

In [None]:
# get topics
topic_model.get_topics()

{-1: [('drug addiction', 0.57721627),
  ('addiction', 0.51417613),
  ('drug use', 0.4495204),
  ('fentanyl', 0.42969245),
  ('drug', 0.42462358),
  ('drugs', 0.3901175),
  ('withdrawals', 0.34326455),
  ('pills', 0.3052917),
  ('anxiety', 0.24402478),
  ('smoking', 0.21890503)],
 0: [('kat', 0.4350651),
  ('nate', 0.39373675),
  ('maddy', 0.3355539),
  ('cassie', 0.31707317),
  ('sexuality', 0.2791794),
  ('jules', 0.26947588),
  ('reality', 0.26111853),
  ('character', 0.2513585),
  ('insecure', 0.24875697),
  ('abused', 0.24384607)],
 1: [('euphoria', 0.47745833),
  ('euphoria just', 0.4684793),
  ('obsessed', 0.35252178),
  ('drug addiction', 0.32564384),
  ('addiction', 0.30190563),
  ('season finale', 0.27918628),
  ('addict', 0.27196783),
  ('addicts', 0.21726263),
  ('song', 0.21612436),
  ('relate', 0.21333043)],
 2: [('relate rue', 0.54271686),
  ('rues character', 0.5164143),
  ('rue', 0.43574324),
  ('rues', 0.38854903),
  ('depressed', 0.27834377),
  ('episode', 0.26583776)

In [None]:
topic_model.visualize_heatmap()

In [None]:
topic_model.get_topic_info()

Unnamed: 0,Topic,Count,Name,Representation,Representative_Docs
0,-1,28,-1_drug addiction_addiction_drug use_fentanyl,"[drug addiction, addiction, drug use, fentanyl...",[Blown away by how realistic the depictions of...
1,0,31,0_kat_nate_maddy_cassie,"[kat, nate, maddy, cassie, sexuality, jules, r...",[nate is not gay i think people who believe na...
2,1,22,1_euphoria_euphoria just_obsessed_drug addiction,"[euphoria, euphoria just, obsessed, drug addic...",[Who else got triggered by euphoria and now is...
3,2,22,2_relate rue_rues character_rue_rues,"[relate rue, rues character, rue, rues, depres...","[I relate to rue character so much, it kinda h..."
4,3,20,3_heroin_opiate_opiates_drug,"[heroin, opiate, opiates, drug, rue, drugs, dr...",[Error/inconsistency in the Laurie episode (se...
5,4,18,4_jules_euphoria_addiction_relationship,"[jules, euphoria, addiction, relationship, rel...","[""It's everything we dreamed of."" Just caught ..."
6,5,17,5_opiate addict_drug addict_opiate_addict,"[opiate addict, drug addict, opiate, addict, d...",[i am a 20 year old recovering heroin and oxy ...
7,6,16,6_molly_watched episode_trying sober_rue episode,"[molly, watched episode, trying sober, rue epi...",[Never seen the show but the Soundtrack just s...
8,7,14,7_drugs addiction_taking xanax_addiction_watch...,"[drugs addiction, taking xanax, addiction, wat...",[Any post addicts wanna share their stories?? ...
9,8,12,8_euphoria_portray_portraying_character,"[euphoria, portray, portraying, character, cha...",[Some thoughts and Questions about Euphoria fo...


In [None]:
topic_distr, topic_token_distr = topic_model.approximate_distribution(
    euphoria_drug[euphoria_drug['is_personal_clean']==1]['all_text'].values,
    calculate_tokens=True,
    window = 5
)

100%|██████████| 1/1 [00:00<00:00,  1.86it/s]


In [None]:
topic_model.visualize_distribution(
    topic_distr,
    min_probability = 0.05
)

---
Try GPT4 to get topic clusters

In [None]:
topic_prompt = """
I am giving you a Reddit post from r/euphoria that includes a personal experience with drugs. I want to know what topics about the show the viewers relate to, how do they feel about its representation of people who do drugs. Give me a broad
theme like ‘Withdrawal Accuracy’ or ‘Appreciation to Current Addiction’ or related high level topics in a few words.
"""

In [None]:
euphoria_drug_personal['GPT_theme'] = euphoria_drug_personal['all_text'].apply(lambda x: label_post(x, topic_prompt))

In [None]:
euphoria_drug_personal['GPT_theme'].value_counts()

based on the provided reddit post, the high-level themes related to viewers' experiences and perceptions of the show "euphoria" in the context of drug use could be summarized as follows:\n\n1. **drug experience realism**: the viewer appreciates the show's portrayal of the sensory and emotional experiences associated with drug use, highlighting the show's ability to capture the profound and often personal effects of substance use.\n\n2. **connection to character**: the viewer feels a strong connection to the protagonist (presumably rue, played by zendaya) and her experiences with drugs, indicating that the show successfully creates relatable characters for those who have had similar experiences.\n\n3. **escapism through substance use**: the mention of the world getting quiet and fading away reflects a theme of using drugs as a form of escapism, a significant aspect of the show that resonates with viewers who have used drugs to escape their reality.\n\n4. **anticipation for more**: the v

In [None]:
# any and all themes within the ** **
import re
sub_themes = []
for i in euphoria_drug_personal['GPT_theme']:
  x = re.findall(r'\*\*(.*?)\*\*', i)
  sub_themes.append(x)

In [None]:
sub_themes

[['drug experience realism',
  'connection to character',
  'escapism through substance use',
  'anticipation for more',
  'appreciation of cinematic representation'],
 ['character relatability',
  'accuracy of addiction representation',
  'emotional impact',
  'visual and character aesthetics',
  'viewer engagement and community discussion'],
 ['authentic representation of addiction',
  'personal connection to addiction storylines',
  'appreciation of drug narrative accuracy',
  'engagement and anticipation for future episodes',
  'impact of addiction representation on viewers'],
 ['appreciation of cinematic execution',
  'connection to drug experiences',
  'representation of drug use',
  'emotional resonance',
  'engagement while intoxicated'],
 [],
 ['drug portrayal accuracy',
  'desire for relatable representation',
  'emotional and physical responses to drugs',
  'viewer expectations vs. show depiction',
  'discussion on drug education and awareness'],
 ['triggering content',
  'r

In [None]:
num_themes = [len(x) for x in sub_themes]
print("number of subthemes: ", sum(num_themes))

number of subthemes:  638


In [None]:
# export subthemes
pickle.dump(euphoria_drug_personal, open("data/euphoria_drug_personal.pkl", "wb"))
sub_themes= pd.DataFrame(sub_themes)
sub_themes.to_csv("excel/euphoria/sub_themes.csv")