## Getting API Keys

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

REDDIT_ID = os.getenv("REDDIT_CLIENT_ID")
REDDIT_SECRET = os.getenv("REDDIT_CLIENT_SECRET")

## 📦 Install required libraries

In [2]:
# 📚 Imports
import praw
import pandas as pd
import re
import random
import time
from datetime import datetime, timedelta
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
import nltk
nltk.download("punkt")

[nltk_data] Downloading package punkt to /home/codespace/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## 🔑 Reddit API Setup (Read-only mode)

In [3]:
reddit = praw.Reddit(
    client_id=REDDIT_ID,
    client_secret=REDDIT_SECRET,
    user_agent="youtube_to_reddit_sentiment"
)

## 📥 Load video titles


In [4]:
youtube_df = pd.read_csv("../data/youtube_data.csv")

# We'll use it as 'month' or 'week' here (PRAW only allows fixed ranges)
time_filter = "month"

## 🧹 Clean text

In [5]:
def clean_text(text):
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
    return text.lower()

## 📊 Sentiment analysis


In [6]:
analyzer = SentimentIntensityAnalyzer()
def analyze_sentiment(text):
    return analyzer.polarity_scores(text)["compound"]

In [7]:
# 😃 Convert score to emoji
def sentiment_emoji(score):
    if score >= 0.5:
        return "😃"
    elif score <= -0.5:
        return "😠"
    else:
        return "😐"

## 🧠 Reddit Post + Comment collector


In [8]:
%%time
all_data = []

for topic in youtube_df["video_title"].unique():
    posts = reddit.subreddit("all").search(query=topic, limit=5, time_filter=time_filter)

    for post in posts:
        post_data = {
            "youtube_title": topic,
            "reddit_post_title": post.title,
            "post_score": post.score,
            "post_url": post.url,
            "post_created": pd.to_datetime(post.created_utc, unit="s"),
            "post_sentiment": analyze_sentiment(clean_text(post.title)),
            "post_sentiment_emoji": sentiment_emoji(analyze_sentiment(clean_text(post.title))),
        }

        # ⛓️ Add top 3 comments per post
        post.comments.replace_more(limit=0)
        comments = post.comments[:3]
        for comment in comments:
            cleaned = clean_text(comment.body)
            all_data.append({
                **post_data,
                "comment": comment.body,
                "cleaned_comment": cleaned,
                "comment_sentiment": analyze_sentiment(cleaned),
                "comment_sentiment_emoji": sentiment_emoji(analyze_sentiment(cleaned)),
                "comment_author": str(comment.author),
                "comment_score": comment.score
            })

CPU times: user 2.23 s, sys: 178 ms, total: 2.4 s
Wall time: 1min 56s


## Analyzing the DataFrame

In [9]:
df = pd.DataFrame(all_data)

In [10]:
df

Unnamed: 0,youtube_title,reddit_post_title,post_score,post_url,post_created,post_sentiment,post_sentiment_emoji,comment,cleaned_comment,comment_sentiment,comment_sentiment_emoji,comment_author,comment_score
0,FULL SEGMENT: Randy Orton ROCKS John Cena with...,Post WWE Raw 4/21/2025 Show Discussion Thread ...,190,https://www.reddit.com/r/SquaredCircle/comment...,2025-04-22 02:53:50,0.0000,😐,Corey Graves watching Pat get choked out\n\nht...,corey graves watching pat get choked out\n\n,-0.6486,😠,breakourbones,365
1,FULL SEGMENT: Randy Orton ROCKS John Cena with...,Post WWE Raw 4/21/2025 Show Discussion Thread ...,190,https://www.reddit.com/r/SquaredCircle/comment...,2025-04-22 02:53:50,0.0000,😐,“The role of Brock Lesner will now be played b...,the role of brock lesner will now be played by...,0.3400,😐,ShippingNotIncluded,515
2,FULL SEGMENT: Randy Orton ROCKS John Cena with...,Post WWE Raw 4/21/2025 Show Discussion Thread ...,190,https://www.reddit.com/r/SquaredCircle/comment...,2025-04-22 02:53:50,0.0000,😐,"Seth, Bron and Heyman as a faction is going to...",seth bron and heyman as a faction is going to ...,-0.4576,😐,lunaticarchitect,231
3,FULL SEGMENT: Randy Orton ROCKS John Cena with...,Post WWE SmackDown Discussion Thread - April 1...,74,https://www.reddit.com/r/SquaredCircle/comment...,2025-04-19 02:59:55,0.0000,😐,Cena selling a single Cross Rhodes like death ...,cena selling a single cross rhodes like death ...,0.2006,😐,PepsiPlunge19,196
4,FULL SEGMENT: Randy Orton ROCKS John Cena with...,Post WWE SmackDown Discussion Thread - April 1...,74,https://www.reddit.com/r/SquaredCircle/comment...,2025-04-19 02:59:55,0.0000,😐,Cena never once had the upper hand all build. ...,cena never once had the upper hand all build h...,0.8519,😃,BathtubBobby,220
...,...,...,...,...,...,...,...,...,...,...,...,...,...
191,"The Last Of Us - Well, They Really Did It",[New Update]: My husband left our 5 year old a...,6362,https://www.reddit.com/r/BestofRedditorUpdates...,2025-04-05 04:00:10,-0.2500,😐,“He’s hiding alcohol around the house and chug...,hes hiding alcohol around the house and chuggi...,0.6808,😃,Azrael2082,5631
192,"The Last Of Us - Well, They Really Did It",[New Update]: My husband left our 5 year old a...,6362,https://www.reddit.com/r/BestofRedditorUpdates...,2025-04-05 04:00:10,-0.2500,😐,Oh my God I've been dying for this update.\n\n...,oh my god ive been dying for this update\n\nbu...,-0.4854,😐,0hn035,5131
193,"The Last Of Us - Well, They Really Did It",NEW UPDATE: My(33F) Husband(36M) may have chea...,8127,https://www.reddit.com/r/BestofRedditorUpdates...,2025-04-01 04:18:01,-0.0516,😐,#Do not comment on the original posts\n\nPleas...,do not comment on the original posts\n\nplease...,0.3182,😐,AutoModerator,1
194,"The Last Of Us - Well, They Really Did It",NEW UPDATE: My(33F) Husband(36M) may have chea...,8127,https://www.reddit.com/r/BestofRedditorUpdates...,2025-04-01 04:18:01,-0.0516,😐,When I read that the paternity test revealed t...,when i read that the paternity test revealed t...,0.5423,😃,randomndude01,9166


## Checking Downvoted Comments

In [11]:
df[df["comment_score"] < 0][["comment", "comment_score"]]

for comment, comment_score in zip(df["comment"], df["comment_score"]):
    if comment_score < 0:
        print(f"Comment:\n{comment}", f"Comment Score:\n{comment_score}", sep="\n\n")

Comment:
I really want Cletus to do well but not knowing where the start/finish line just gives a feeling like he doesn't care enough about what he's doing to learn. But hearing his "plan" is to make Xfinity seems like it's much more attainable now that he and Jr have a friendship

Comment Score:
-3
Comment:
stop spamming this rubbish video ffs

Comment Score:
-4


## 💾 Save to CSV

In [12]:
df.to_csv("../data/reddit_data.csv", index=False)