In [21]:
import praw
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import os
import datetime
import pandas as pd

In [22]:
REDDIT_API_SECRET = os.environ["$REDDIT_API_CLIENT_SECRET"]
REDDIT_API_CLIENT_ID = os.environ["$REDDIT_API_CLIENT_ID"]
REDDIT_API_USERNAME = os.environ["$REDDIT_API_USERNAME"]
REDDIT_API_PASSWORD = os.environ["$REDDIT_API_PASSWORD"]

In [23]:
# Step 1: Authenticate with the Reddit API
reddit = praw.Reddit(
    client_id=REDDIT_API_CLIENT_ID,          # Replace with your client ID
    client_secret=REDDIT_API_SECRET,  # Replace with your client secret
    user_agent='VIX Sentiment',        # Replace with a description, e.g., 'VIX sentiment analysis'
    username=REDDIT_API_USERNAME,            # Replace with your Reddit username
    password=REDDIT_API_PASSWORD             # Replace with your Reddit password
)

In [24]:
#Define Date Range
start_date = (datetime.date.today() - datetime.timedelta(days=10*365.24)).replace(day=1)
end_date = datetime.date.today()
print(start_date, end_date)

2014-09-01 2024-09-29


In [70]:
subreddit = reddit.subreddit('stocks')  # Change this to the subreddit of your choice
top_posts = subreddit.top()  # Fetch top posts from the last year (you can adjust the limit)

In [71]:
# Step 4: Perform sentiment analysis and store post details along with date
analyzer = SentimentIntensityAnalyzer()
post_data = []

for post in top_posts:
    # Convert post timestamp (created_utc) to a human-readable date
    post_date = datetime.date.fromtimestamp(post.created_utc)

    # Only include posts within the specified date range
    if start_date <= post_date <= end_date:
        title = post.title
        sentiment_score = analyzer.polarity_scores(title)['compound']
        
        # Store post data
        post_data.append({
            'title': title,
            'sentiment': sentiment_score,
            'date': post_date
        })

# Step 5: Convert post data to a DataFrame for easier handling
df = pd.DataFrame(post_data)

In [72]:
df

Unnamed: 0,title,sentiment,date
0,"It's fucking awful seeing the ""Silver"" misinfo...",-0.6801,2021-02-01
1,Companies try to prevent people from trading G...,0.0258,2021-01-28
2,Today is a dark day for traders,0.0000,2021-01-28
3,BREAKING: Dow falls 240 points as Trump calls ...,0.0000,2020-10-06
4,GME Dedicated Thread - Breaking: CNBC engages ...,0.0000,2021-01-27
...,...,...,...
977,Biden to ban imports of Russian oil over Ukrai...,-0.5574,2022-03-08
978,Apple tells employees it's increasing its annu...,0.0000,2022-05-25
979,Tesla’s stock will be added to the S&P 500 in ...,0.0000,2020-11-30
980,Labor Shortage is the biggest concern for tech...,-0.2500,2021-10-30


In [73]:
# Step 6: Create a "Month" column to categorize posts by month
df['Date'] = df['date']
df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = df['Date'].dt.to_period('M').dt.to_timestamp()  # Converts dates to Year-Month format (e.g., 2022-05)

In [74]:
df

Unnamed: 0,title,sentiment,date,Date
0,"It's fucking awful seeing the ""Silver"" misinfo...",-0.6801,2021-02-01,2021-02-01
1,Companies try to prevent people from trading G...,0.0258,2021-01-28,2021-01-01
2,Today is a dark day for traders,0.0000,2021-01-28,2021-01-01
3,BREAKING: Dow falls 240 points as Trump calls ...,0.0000,2020-10-06,2020-10-01
4,GME Dedicated Thread - Breaking: CNBC engages ...,0.0000,2021-01-27,2021-01-01
...,...,...,...,...
977,Biden to ban imports of Russian oil over Ukrai...,-0.5574,2022-03-08,2022-03-01
978,Apple tells employees it's increasing its annu...,0.0000,2022-05-25,2022-05-01
979,Tesla’s stock will be added to the S&P 500 in ...,0.0000,2020-11-30,2020-11-01
980,Labor Shortage is the biggest concern for tech...,-0.2500,2021-10-30,2021-10-01


In [75]:
# Step 7: Calculate aggregate sentiment by month
monthly_sentiment = df.groupby('Date')['sentiment'].mean()

# Step 8: Display the aggregate sentiment for each month
print(monthly_sentiment)

Date
2018-10-01    0.177900
2020-02-01    0.042900
2020-03-01   -0.163260
2020-04-01   -0.142160
2020-05-01   -0.017238
2020-06-01    0.075214
2020-07-01   -0.000047
2020-08-01    0.138700
2020-09-01    0.017105
2020-10-01    0.016244
2020-11-01    0.228314
2020-12-01    0.136277
2021-01-01    0.075338
2021-02-01   -0.044445
2021-03-01    0.030500
2021-04-01    0.042041
2021-05-01    0.104120
2021-06-01    0.010756
2021-07-01    0.025217
2021-08-01    0.150259
2021-09-01    0.035139
2021-10-01   -0.084852
2021-11-01   -0.013243
2021-12-01   -0.066500
2022-01-01   -0.052686
2022-02-01   -0.014743
2022-03-01   -0.016165
2022-04-01    0.102932
2022-05-01   -0.024131
2022-06-01    0.051614
2022-07-01   -0.105674
2022-08-01   -0.010311
2022-09-01   -0.160538
2022-10-01   -0.069515
2022-11-01    0.078155
2022-12-01   -0.173900
2023-01-01   -0.020045
2023-02-01   -0.094991
2023-03-01   -0.048173
2023-04-01   -0.154482
2023-05-01   -0.042725
2023-06-01   -0.049100
2023-07-01   -0.471825
2023-0