**Getting Reddit Metadata and Posts on Euphoria**
07/19/2022

In [59]:
# libraries
import pandas as pd
from praw import Reddit
from praw.models import MoreComments
from praw.models import Comment
from praw.models import Submission


In [60]:
# source api credentials from file
file = 'api-creds.py'
exec(open(file).read())

**Metadata**

In [None]:
# pull metadata from euphoria subreddit - details about the subreddit


Get posts headlines. Options: hot, new, top, or rising  

- `score`: the number of votes
- `created_etc`: date post of created
- `upvote ratio`: ratio of post upvotes to commennts
- `num_comments`: number of comments the post received

In [61]:
# pull and store headline attributes from api
def get_headlines(subreddit):
    """
    Get headlines from a subreddit
    """
    headlines = []
    for submission in reddit.subreddit(subreddit).hot(limit = None):
        headlines.append({'title': submission.title, 'selftext': submission.selftext, 'url': submission.url, 'id': submission.id, 'author': submission.author, 'score': submission.score, 'created_utc': submission.created_utc, 'upvote_ratio': submission.upvote_ratio, 'num_comments': submission.num_comments})
    return headlines

In [62]:
# store headlines in a dataframe
df = pd.DataFrame(get_headlines('euphoria'))
print(str(len(df)) + ' headlines pulled')

629 headlines pulled


In [19]:
# save to csv
df.to_csv('../dat/headlines.csv')

**Getting top posts**

In [63]:
def get_top_headlines(subreddit):
    """
    Get headlines from a subreddit
    """
    headlines = []
    for submission in reddit.subreddit(subreddit).top(limit = None):
        headlines.append({'title': submission.title, 'selftext': submission.selftext, 'url': submission.url, 'id': submission.id, 'author': submission.author, 'score': submission.score, 'created_utc': submission.created_utc, 'upvote_ratio': submission.upvote_ratio, 'num_comments': submission.num_comments})
    return headlines

In [64]:
top = pd.DataFrame(get_top_headlines('euphoria'))
print(str(len(top)) + ' headlines pulled')

994 headlines pulled


In [35]:
top.to_csv('../dat/top_headlines.csv')

Filter posts for drug references:

In [68]:
# search headline titles for a list of keywords and store in a dataframe
def get_keyword_headlines(subreddit, keywords):
    """
    Get headlines from a subreddit
    """
    headlines = []
    # for submission in reddit.subreddit(subreddit).hot(limit = None):
    for submission in reddit.subreddit(subreddit).top(limit = None):
        if any(keyword in submission.title for keyword in keywords):
            headlines.append({'title': submission.title, 'selftext': submission.selftext, 'url': submission.url, 'id': submission.id, 'author': submission.author, 'score': submission.score, 'created_utc': submission.created_utc, 'upvote_ratio': submission.upvote_ratio, 'num_comments': submission.num_comments})
    return headlines

In [67]:
keywords = ['dope', 'drug', 'opioiate', 'weed', 'heroin', 'xanax', 'opioid']
drug_post = pd.DataFrame(get_keyword_headlines('euphoria', keywords))

In [42]:
print(str(len(drug_post)) + ' headlines pulled')
drug_post.to_csv('../dat/hot_drug_headlines.csv')

9 headlines pulled


In [69]:
drug_post_top = pd.DataFrame(get_keyword_headlines('euphoria', keywords))

In [45]:
print(str(len(drug_post_top)) + ' headlines pulled')
drug_post_top.to_csv('../dat/top_drug_headlines.csv')

7 headlines pulled


Get comments from the selected headlines  
headlines were selected based on relevace of post and number of comments


In [70]:
# using the id of a post, get the comments and store in a dataframe

def get_comments(submission_id):
    """
    Get comments from a submission
    """
    comments = []
    submission = reddit.submission(id = submission_id)
    submission.comments.replace_more(limit = None)
    for comment in submission.comments.list():
        comments.append({'body': comment.body, 'author': comment.author, 'score': comment.score, 'created_utc': comment.created_utc, 'id': comment.id})
    return comments

**Pulling comments on post: sqhl33**  
Content: *Question: Does euphoria make you less likely to try drugs? Or are you more curious than you were before?*

In [71]:
comments_post1 = pd.DataFrame(get_comments('sqhl33'))
comments_post1['post'] = 'sqhl33'

In [50]:
print(str(len(comments_post1)) + ' comments pulled')
comments_post1.to_csv('../dat/comments_post1.csv')

971 comments pulled


**Pulling comments on post: sn2vpk**  
Content: *Not enough people are talking about Elliot's response to Rue telling him about her plan to get "free" drugs from Laurie*

In [73]:
comments_post2 = pd.DataFrame(get_comments('sn2vpk'))
comments_post2['post'] = 'sn2vpk'

In [52]:
print(str(len(comments_post2)) + ' comments pulled')
comments_post1.to_csv('../dat/comments_post2.csv')

523 comments pulled


**Pulling comments on post: smur2x**  
Content: *As an ex-opioid addict, Zendaya's withdrawal scenes are the most realistic portrayal I've ever seen before. her acting is phenomenal.*

In [74]:
comments_post3 = pd.DataFrame(get_comments('smur2x'))
comments_post3['post'] = 'smur2x'

In [55]:
print(str(len(comments_post3)) + ' comments pulled')
comments_post1.to_csv('../dat/comments_post3.csv')

215 comments pulled


In [75]:
# join dataframes to make analysis dataframe
analysis_df = pd.concat([comments_post1, comments_post2, comments_post3])

In [76]:
analysis_df.to_csv('../dat/all_comments.csv')