In [4]:
import os
import praw
import time
from CONFIG import SUBMISSION_DATA_FIELDS
import pandas as pd
from datetime import datetime
from dotenv import find_dotenv, load_dotenv

In [5]:
load_dotenv(find_dotenv())

CLIENT_ID=os.getenv("CLIENT_ID")
SECRET_TOKEN=os.getenv("SECRET_TOKEN")
USERNAME=os.getenv("USERNAME")
PASSWORD=os.getenv("PASSWORD")

reddit = praw.Reddit(client_id=CLIENT_ID,
                     client_secret=SECRET_TOKEN,
                     password=PASSWORD,
                     username=USERNAME,
                     user_agent='Tutorial')

print(reddit.user.me())

andreaLolli


In [6]:

# Function to search for a query and list subreddits
def search_and_list_subreddits(query, limit=10):
    # Search Reddit for the query
    search_results = reddit.subreddit('all').search(query, limit=limit)
    
    # Collect the subreddits from the search results
    subreddits = [result.subreddit.display_name for result in search_results]

    # Print the subreddits
    print(f"Top {limit} subreddits for the query '{query}':")
    return subreddits

In [10]:
subreddits_names = search_and_list_subreddits(query="Drake OR Kendrick Lamar")

Top 10 subreddits for the query 'Drake OR Kendrick Lamar':


In [11]:
def fetch_submission_data(subreddit_name, keywords, start_timestamp, fields=SUBMISSION_DATA_FIELDS, limit=100):
    """
    Fetches submission data from a given subreddit based on search keywords.

    Args:
        subreddit_name (str): The name of the subreddit to search in.
        keywords (str): The search keywords.
        limit (int): The maximum number of search results to fetch.

    Returns:
        pd.DataFrame: A DataFrame containing submission data.
    """
    submissions_data = []

    for submission in reddit.subreddit(subreddit_name).search(keywords, limit=limit):
        submission_data = {}
        if submission.created_utc > start_timestamp:
            for field in fields:
                try:
                    submission_data[field] = getattr(submission, field, '')
                except AttributeError:
                    submission_data[field] = ''

            submissions_data.append(submission_data)    

    return submissions_data

In [17]:
submissions_data_raw = []

start_date = "2024-03-22"
time_struct = time.strptime(start_date, "%Y-%m-%d")
start_timestamp = int(time.mktime(time_struct))

for subreddit_name in subreddits_names:
    data_raw = fetch_submission_data(
        subreddit_name=subreddits_names[0],
        start_timestamp=start_timestamp,
        keywords="Kendrick Lamar AND Drake",
        limit=10000
        ) # limit 10 --> 35s for 1 subreddit page

    submissions_data_raw.extend(data_raw)

df = pd.DataFrame(submissions_data_raw)
df.head()

In [14]:
df.shape

(180, 27)

In [16]:
df.to_csv("data/rap_beef.csv")

In [9]:
# Copy the raw DataFrame
df = submissions_data_raw.copy()

# Convert 'created_utc' from timestamp to datetime
df["created_utc"] = df["created_utc"].apply(lambda x: datetime.fromtimestamp(x) if x else None)

print("Converted 'created_utc' values:")
print(df["created_utc"].head())

# Sort the DataFrame by 'created_utc' and reset the index
df = df.sort_values(by=["created_utc"]).reset_index(drop=True)

# Display the sorted DataFrame
print("Sorted DataFrame:")
df.head()

Converted 'created_utc' values:
0   2024-05-04 05:06:47
1   2024-05-08 02:05:51
2   2024-04-26 18:34:16
3   2024-05-14 01:30:17
4   2019-02-07 20:06:46
Name: created_utc, dtype: datetime64[ns]
Sorted DataFrame:


Unnamed: 0,author,author_flair_text,clicked,comments,created_utc,distinguished,edited,id,is_original_content,is_self,...,poll_data,saved,score,selftext,spoiler,stickied,subreddit,title,upvote_ratio,url
0,jammasterajay,,False,"(efyqywi, efypuiq, efyqte9, efz0yr3, efyriqs, ...",2019-02-07 20:06:46,,False,ao7881,False,False,...,,False,14567,,False,False,hiphopheads,"Kendrick Lamar, Drake & Childish Gambino all d...",0.99,https://www.nytimes.com/2019/02/07/arts/music/...
1,FlyGloomy,,False,"(l1dkran, l1e5m9h, l1du44t, l1dsiv5, l1dzrvy, ...",2024-04-26 18:34:16,,False,1cdpxr5,False,False,...,,False,3740,,False,False,hiphopheads,Drake Takes Down Kendrick Lamar Diss After Leg...,0.98,https://pitchfork.com/news/drake-takes-down-ke...
2,Potential_Meat_5103,,False,"(l2i3xba, l2htxo2, l2htwk2, l2hr2ye, l2hrsyb, ...",2024-05-04 05:06:47,,1714792098.0,1cjqi2e,False,True,...,,False,11146,https://www.instagram.com/reel/C6h9xrguXBD/?ig...,False,False,hiphopheads,[SHOTS FIRED] Drake - Family Matters (Kendrick...,0.85,https://www.reddit.com/r/hiphopheads/comments/...
3,flowerhoney10,,False,"(l3283of, l326ezd, l32878p, l32lz42, l32bsze, ...",2024-05-08 02:05:51,,False,1cmqv0z,False,False,...,,False,4710,,False,False,hiphopheads,Can Drake Recover After His Battle With Kendri...,0.89,https://www.billboard.com/music/rb-hip-hop/dra...
4,KingChipotle,,False,"(l3xggua, l3xep1z, l3xlhm6, l3xl27p, l3xpftu, ...",2024-05-14 01:30:17,,False,1crdt3t,False,False,...,,False,3413,,False,False,hiphopheads,The Drake-Kendrick Lamar social media firestor...,0.96,https://brooklyneagle.com/articles/2024/05/13/...


In [10]:
def extract_ids(comment_list):
    l= []
    for comment in comment_list:
      l.append(str(comment))
    return l

In [26]:
df["comments_str"] = df["comments"].apply(extract_ids)
df["count_comments"] = df["comments_str"].apply(lambda x: len(x))

In [30]:
post = df.iloc[0]
post["title"]

'Kendrick Lamar, Drake & Childish Gambino all declined offers to perform at the 2019 Grammys'

In [39]:
post["comments"]

<praw.models.comment_forest.CommentForest at 0x7f7b91bd9d50>

In [35]:
replies = {}

for c in post["comments"]:
    try:
        replies[c] = c.replies
    except:
        print(c)
        print(c.comments)

<MoreComments count=100, children=['efzh66g', 'efzd2mg', 'efza7t4', '...']>
<bound method MoreComments.comments of <MoreComments count=100, children=['efzh66g', 'efzd2mg', 'efza7t4', '...']>>


In [36]:
count = {}
for c in list(replies.keys()):
    rep = replies[c]
    i = 0
    for r in rep:
        i+=1
    count[c]=i 

In [37]:
sorted(count.items(), key=lambda x: x[1], reverse=True)[0]

(Comment(id='efyqywi'), 13)

In [44]:
for c in post["comments"]:
  if c == 'efyqywi':
    print('Text of the comment:\n',"="*30)
    print(c.body)
    print('\nOne replie to the comment:\n', "="*30)
    print([x for x in c.replies][0].body)

Text of the comment:
Grammys need rappers more than rappers need Grammys. In the words of the most forthright of MCs “who gives a fuck about a gotdamn Grammy” 

One replie to the comment:
They still have no respect for rap even though it’s the largest genre rn. Even if they’re nominated, they shouldn’t even go. 
