In [4]:
# !pip install praw yfinance pandas nltk matplotlib seaborn transformers

In [None]:
import praw
import yfinance as yf
import pandas as pd
import nltk
import matplotlib.pyplot as plt
import seaborn as sns
from nltk.sentiment import SentimentIntensityAnalyzer
from transformers import pipeline
from datetime import datetime

# Initialize Reddit API (Replace with your credentials)
reddit = praw.Reddit(
    client_id='YOUR_CLIENT_ID',
    client_secret='YOUR_CLIENT_SECRET',
    user_agent='YOUR_USER_AGENT'
)

In [None]:
# Fetch Reddit Posts
def get_reddit_posts(subreddit, limit=100):
    posts = []
    for post in reddit.subreddit(subreddit).hot(limit=limit):
        posts.append({
            'title': post.title, 
            'selftext': post.selftext, 
            'created': datetime.utcfromtimestamp(post.created_utc)
        })
    return pd.DataFrame(posts)

In [None]:
# Fetch Stock Prices
def get_stock_data(ticker, period='7d', interval='1d'):
    stock = yf.Ticker(ticker)
    return stock.history(period=period, interval=interval)

In [None]:
# Sentiment Analysis using VADER
nltk.download('vader_lexicon')
sia = SentimentIntensityAnalyzer()

In [None]:
def analyze_sentiment_vader(text):
    return sia.polarity_scores(text)['compound']

In [None]:
# Sentiment Analysis using FinBERT
finbert = pipeline("sentiment-analysis", model="ProsusAI/finbert")

def analyze_sentiment_finbert(text):
    if not text.strip():
        return 0  # Neutral if text is empty
    result = finbert(text[:512])  # Limit to 512 tokens
    sentiment_score = {'positive': 1, 'neutral': 0, 'negative': -1}
    return sentiment_score[result[0]['label']]

In [None]:
# Correlation Analysis
def analyze_correlation(reddit_data, stock_data):
    reddit_data = reddit_data.set_index('created').resample('D').mean()
    merged = reddit_data[['sentiment']].merge(stock_data[['Close']], left_index=True, right_index=True, how='inner')
    correlation = merged.corr()
    return correlation

In [None]:
# Visualization
def plot_sentiment_vs_stock(reddit_data, stock_data):
    reddit_data = reddit_data.set_index('created').resample('D').mean()
    plt.figure(figsize=(12, 6))
    sns.lineplot(data=stock_data, x=stock_data.index, y='Close', label='Stock Price', color='blue')
    sns.lineplot(data=reddit_data, x=reddit_data.index, y='sentiment', label='Sentiment Score', color='red')
    plt.xlabel('Date')
    plt.ylabel('Value')
    plt.title('Stock Price vs Sentiment Score')
    plt.legend()
    plt.show()

In [None]:
# Example Usage
if __name__ == "__main__":
    subreddit = 'StockMarket'
    stock_ticker = 'AAPL'
    
    reddit_data = get_reddit_posts(subreddit)
    stock_data = get_stock_data(stock_ticker)
    
    # Apply Sentiment Analysis (Choose VADER or FinBERT)
    reddit_data['sentiment'] = reddit_data['title'].apply(analyze_sentiment_finbert)
    
    # Analyze correlation
    correlation = analyze_correlation(reddit_data, stock_data)
    print("Correlation between Sentiment and Stock Price:\n", correlation)
    
    # Plot Sentiment vs Stock Price
    plot_sentiment_vs_stock(reddit_data, stock_data)