# Reddit Scaper

In [16]:
import praw
import pandas as pd
import os
import time
import datetime

Personal credentials for reddit API, removed for privacy reasons. 

In [None]:
reddit = praw.Reddit(
    client_id="_____________",      
    client_secret="______________",  
    user_agent="_______________", 
)

## Main Code

In [None]:
TARGET_COINS = {
    'BTC': {
        'search_terms': '(bitcoin OR btc)',
        'subreddits': ['CryptoCurrency', 'Bitcoin', 'BitcoinMarkets', 'btc', 'CryptoMarkets']
    },
    'ETH': {
        'search_terms': '(ethereum OR eth)',
        'subreddits': ['CryptoCurrency', 'ethereum', 'ethtrader', 'CryptoMarkets']
    },
    'XRP': {
        'search_terms': '(xrp OR ripple)',
        'subreddits': ['CryptoCurrency', 'Ripple', 'XRP', 'CryptoMarkets']
    }
}
TOPIC_KEYWORDS = '(price OR sentiment OR bull OR bear OR prediction OR analysis OR value OR news OR bullish OR bearish OR volume OR engagement OR liquidation OR liquidity OR Trump)'

In [None]:
POST_LIMIT = 1000
COMMENT_LIMIT = 15

def scrape_for_coin(coin_ticker, config):
    print(f"Starting scrape for {coin_ticker}")
    
    scraped_data = []
    processed_post_ids = set() 
    
    # Constructing search query
    search_query = f"{config['search_terms']} AND {TOPIC_KEYWORDS}"

    # Looping through sub-reddits
    for subreddit_name in config['subreddits']:
        try:
            subreddit = reddit.subreddit(subreddit_name)
            print(f"\nSearching in subreddit: r/{subreddit_name}")

            search_results = subreddit.search(search_query, sort='relevance', time_filter='year', limit=POST_LIMIT)

            post_count = 0
            for post in search_results:
                if post.id in processed_post_ids:
                    continue
                
                post_count += 1
                processed_post_ids.add(post.id)

                scraped_data.append({
                    'coin': coin_ticker,
                    'date': datetime.datetime.utcfromtimestamp(post.created_utc).strftime('%Y-%m-%d'),
                    'type': 'post',
                    'subreddit': subreddit_name,
                    'title': post.title,
                    'body': post.selftext,
                    'score': post.score
                })

                post.comments.replace_more(limit=0) 
                comment_count = 0
                for comment in post.comments.list():
                    if comment_count >= COMMENT_LIMIT:
                        break
                    
                    scraped_data.append({
                        'coin': coin_ticker,
                        'date': datetime.datetime.utcfromtimestamp(comment.created_utc).strftime('%Y-%m-%d'),
                        'type': 'comment',
                        'subreddit': subreddit_name,
                        'title': f"Comment on: {post.title[:50]}...",
                        'body': comment.body,
                        'score': comment.score
                    })
                    comment_count += 1
            
            print(f"Found and processed {post_count} posts in r/{subreddit_name}.")

        except Exception as e:
            print(f" An error occurred while scraping r/{subreddit_name}: {e}")
            continue
            
    if not scraped_data:
        print(f"\nNo data found for {coin_ticker} with the current criteria.")
        return

    df = pd.DataFrame(scraped_data)
    df.drop_duplicates(subset=['body', 'title'], inplace=True)

    file_name = f"reddit_data_{coin_ticker}_24_25.csv"
    file_path = os.path.join(r'C:\Users\madha\Desktop\Dissertation\Data\Reddit API', file_name)
    
    df.to_csv(file_path, index=False, encoding='utf-8-sig')
    print(f"\n Successfully saved {len(df)} unique items for {coin_ticker}.")

if __name__ == "__main__":
    
    for ticker, coin_config in TARGET_COINS.items():
        scrape_for_coin(ticker, coin_config)

Starting scrape for BTC

Searching in subreddit: r/CryptoCurrency
Found and processed 249 posts in r/CryptoCurrency.

Searching in subreddit: r/Bitcoin
Found and processed 244 posts in r/Bitcoin.

Searching in subreddit: r/BitcoinMarkets
Found and processed 95 posts in r/BitcoinMarkets.

Searching in subreddit: r/btc
Found and processed 248 posts in r/btc.

Searching in subreddit: r/CryptoMarkets
Found and processed 240 posts in r/CryptoMarkets.

 Successfully saved 12759 unique items for BTC.
Starting scrape for ETH

Searching in subreddit: r/CryptoCurrency
Found and processed 247 posts in r/CryptoCurrency.

Searching in subreddit: r/ethereum
Found and processed 250 posts in r/ethereum.

Searching in subreddit: r/ethtrader
Found and processed 242 posts in r/ethtrader.

Searching in subreddit: r/CryptoMarkets
Found and processed 241 posts in r/CryptoMarkets.

 Successfully saved 13847 unique items for ETH.
Starting scrape for XRP

Searching in subreddit: r/CryptoCurrency
Found and proc