# Reddit Cryptocurrency Sentiment Analysis

This notebook performs sentiment analysis on cryptocurrency-related posts from Reddit. It uses:
- PRAW for Reddit API access
- NLTK for sentiment analysis
- Pandas for data handling

In [None]:
# Install required packages
!pip install praw pandas nltk python-dotenv matplotlib seaborn

In [None]:
import praw
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
import os
from dotenv import load_dotenv

# Download NLTK data
nltk.download('vader_lexicon', quiet=True)

## Setup Reddit API Credentials

You'll need to set up your Reddit API credentials. You can either:
1. Use a .env file
2. Set the credentials directly in this notebook

In [None]:
# Option 1: Load from .env file
load_dotenv()

# Option 2: Set credentials directly
# os.environ['REDDIT_CLIENT_ID'] = 'your_client_id'
# os.environ['REDDIT_CLIENT_SECRET'] = 'your_client_secret'
# os.environ['REDDIT_USER_AGENT'] = 'your_user_agent'

In [None]:
class RedditSentimentAnalyzer:
    """Reddit sentiment analyzer for cryptocurrency discussions"""
    
    def __init__(self):
        """Initialize Reddit API connection and sentiment analyzer"""
        self.reddit = praw.Reddit(
            client_id=os.getenv('REDDIT_CLIENT_ID'),
            client_secret=os.getenv('REDDIT_CLIENT_SECRET'),
            user_agent=os.getenv('REDDIT_USER_AGENT'),
            read_only=True,
            check_for_async=False
        )
        self.sia = SentimentIntensityAnalyzer()
    
    def scrape_posts(self, query='bitcoin', limit=100, subreddit='CryptoCurrency', sort='new'):
        """Scrape and analyze Reddit posts"""
        posts_data = []
        subreddit_instance = self.reddit.subreddit(subreddit)
        
        # Get posts based on sort method
        if sort == 'new':
            submissions = subreddit_instance.new(limit=limit)
        elif sort == 'hot':
            submissions = subreddit_instance.hot(limit=limit)
        elif sort == 'top':
            submissions = subreddit_instance.top(limit=limit)
        elif sort == 'rising':
            submissions = subreddit_instance.rising(limit=limit)
        else:
            submissions = subreddit_instance.new(limit=limit)
        
        for submission in submissions:
            # Analyze sentiment
            title_sentiment = self.sia.polarity_scores(submission.title)
            selftext_sentiment = self.sia.polarity_scores(submission.selftext) if submission.selftext else None
            
            # Compile post data
            post_data = {
                'title': submission.title,
                'text': submission.selftext,
                'score': submission.score,
                'num_comments': submission.num_comments,
                'title_sentiment_compound': title_sentiment['compound'],
                'title_sentiment_pos': title_sentiment['pos'],
                'title_sentiment_neg': title_sentiment['neg'],
                'title_sentiment_neu': title_sentiment['neu']
            }
            
            if selftext_sentiment:
                post_data.update({
                    'text_sentiment_compound': selftext_sentiment['compound'],
                    'text_sentiment_pos': selftext_sentiment['pos'],
                    'text_sentiment_neg': selftext_sentiment['neg'],
                    'text_sentiment_neu': selftext_sentiment['neu']
                })
            
            posts_data.append(post_data)
        
        return pd.DataFrame(posts_data)
    
    def analyze_sentiment(self, df):
        """Analyze overall sentiment of posts"""
        if df.empty:
            return {'error': 'No data available for analysis'}
        
        def categorize_sentiment(score):
            if score > 0.05:
                return 'Positive'
            elif score < -0.05:
                return 'Negative'
            return 'Neutral'
        
        sentiment_counts = df['title_sentiment_compound'].apply(categorize_sentiment).value_counts()
        
        return {
            'total_posts': len(df),
            'sentiment_distribution': sentiment_counts.to_dict(),
            'average_sentiment': df['title_sentiment_compound'].mean()
        }

## Example Usage

Let's analyze sentiment for cryptocurrency-related posts

In [None]:
# Initialize analyzer
analyzer = RedditSentimentAnalyzer()

# Try different sorting methods
for sort_method in ['new', 'hot', 'top', 'rising']:
    print(f"\nTrying {sort_method} posts...")
    posts_df = analyzer.scrape_posts(sort=sort_method, limit=50)
    
    if not posts_df.empty:
        print(f"Successfully retrieved posts using {sort_method} sorting")
        sentiment_summary = analyzer.analyze_sentiment(posts_df)
        print("\nSentiment Analysis Results:")
        print(sentiment_summary)
        
        # Save results
        output_file = f'reddit_sentiment_{sort_method}.csv'
        posts_df.to_csv(output_file, index=False)
        print(f"\nResults saved to {output_file}")
        break
else:
    print("\nFailed to retrieve posts with any sorting method")

## Visualize the Results

Let's create some visualizations of our sentiment analysis results

In [None]:
try:
    import matplotlib.pyplot as plt
    import seaborn as sns
    
    # Assuming we have our posts_df from the previous cell
    if 'posts_df' in locals() and not posts_df.empty:
        # Create a figure with multiple subplots
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
        
        # Plot 1: Sentiment Distribution
        sentiment_categories = posts_df['title_sentiment_compound'].apply(
            lambda x: 'Positive' if x > 0.05 else ('Negative' if x < -0.05 else 'Neutral')
        ).value_counts()
        
        sentiment_categories.plot(kind='pie', autopct='%1.1f%%', ax=ax1)
        ax1.set_title('Sentiment Distribution')
        
        # Plot 2: Sentiment vs. Score
        sns.scatterplot(data=posts_df, x='title_sentiment_compound', y='score', ax=ax2)
        ax2.set_title('Post Score vs. Sentiment')
        ax2.set_xlabel('Sentiment Score')
        ax2.set_ylabel('Post Score')
        
        plt.tight_layout()
        plt.show()
    else:
        print("No data available for visualization")
except ImportError:
    print("Please install matplotlib and seaborn for visualizations:")
    print("!pip install matplotlib seaborn")
except Exception as e:
    print(f"Error creating visualizations: {str(e)}")