In [9]:
import requests
from bs4 import BeautifulSoup
import json

# Define the subreddit and the URL
subreddit = 'sentimentanalysis'  # Change this to any subreddit you're interested in
base_url = f'https://www.reddit.com/r/{subreddit}/top/.json?limit=10'  # Fetch top 10 posts

# Define headers to avoid blocking by Reddit
headers = {'User-Agent': 'Mozilla/5.0'}

# Function to scrape posts from Reddit
def scrape_reddit_posts(url):
    posts = []
    after = None

    # Loop to handle pagination
    for _ in range(3):  # Adjust the number of pages you want to scrape
        params = {'after': after} if after else {}
        
        # Send GET request to fetch data
        response = requests.get(url, headers=headers, params=params)
        
        # If request is successful, process the JSON data
        if response.status_code == 200:
            data = response.json()
            post_data = data['data']['children']
            
            # Extract relevant post details
            for post in post_data:
                title = post['data']['title']
                upvotes = post['data']['ups']
                comments = post['data']['num_comments']
                posts.append({'title': title, 'upvotes': upvotes, 'comments': comments})
            
            # Set the "after" parameter to fetch the next page
            after = data['data']['after']
        else:
            print(f"Failed to retrieve data: {response.status_code}")
            break
        
    return posts

# Function to display collected posts
def display_posts(posts):
    for post in posts:
        print(f"Title: {post['title']}")
        print(f"Upvotes: {post['upvotes']}, Comments: {post['comments']}")
        print("="*80)

# Main function to execute the scraping
if __name__ == "__main__":
    # Start scraping
    print("Scraping Reddit posts...")
    posts = scrape_reddit_posts(base_url)
    
    # Display the collected posts
    display_posts(posts)


Scraping Reddit posts...
Title: Discover the Economic Highs and Potential Risk Ahead: Economic and Market Review
Upvotes: 1, Comments: 0
Title: Discover the Economic Highs and Potential Risk Ahead: Economic and Market Review
Upvotes: 1, Comments: 0
Title: Discover the Economic Highs and Potential Risk Ahead: Economic and Market Review
Upvotes: 1, Comments: 0


In [15]:
import requests
import json

# Define the subreddit and the base URL
subreddit = 'sentimentanalysis'  # Change this to any subreddit you're interested in
base_url = f'https://www.reddit.com/r/{subreddit}/new/.json?limit=25'  # Fetch 25 posts, sorted by "new"

# Define headers to avoid blocking by Reddit
headers = {'User-Agent': 'Mozilla/5.0'}

# Function to scrape posts from Reddit
def scrape_reddit_posts(url):
    posts = []
    after = None
    page_count = 0  # To keep track of how many pages we have scraped
    
    # Loop to handle pagination
    while True:
        params = {'after': after} if after else {}
        
        # Send GET request to fetch data
        response = requests.get(url, headers=headers, params=params)
        
        # If request is successful, process the JSON data
        if response.status_code == 200:
            data = response.json()
            post_data = data['data']['children']
            print(f"Page {page_count + 1} fetched. Found {len(post_data)} posts.")  # Debug output
            
            # Extract relevant post details
            for post in post_data:
                title = post['data']['title']
                upvotes = post['data']['ups']
                comments = post['data']['num_comments']
                posts.append({'title': title, 'upvotes': upvotes, 'comments': comments})
            
            # Set the "after" parameter to fetch the next page
            after = data['data']['after']
            
            # Stop if there are no more pages to scrape
            if not after:
                print("No more pages to scrape.")
                break
            page_count += 1
        else:
            print(f"Failed to retrieve data: {response.status_code}")
            break
        
    return posts

# Function to display collected posts
def display_posts(posts):
    if posts:
        for post in posts:
            print(f"Title: {post['title']}")
            print(f"Upvotes: {post['upvotes']}, Comments: {post['comments']}")
            print("="*80)
    else:
        print("No posts found.")

# Main function to execute the scraping
if __name__ == "__main__":
    # Start scraping
    print("Scraping Reddit posts...")
    posts = scrape_reddit_posts(base_url)
    
    # Display the collected posts
    display_posts(posts)


Scraping Reddit posts...
Page 1 fetched. Found 25 posts.
Page 2 fetched. Found 25 posts.
Page 3 fetched. Found 25 posts.
Page 4 fetched. Found 25 posts.
Page 5 fetched. Found 5 posts.
No more pages to scrape.
Title: Discover the Economic Highs and Potential Risk Ahead: Economic and Market Review
Upvotes: 1, Comments: 0
Title: Navigating the Sentiment Analysis Jungle: Comparing Automated Tools
Upvotes: 6, Comments: 4
Title: Anyone know any firms in India or Bangalore that I could utilise for social media management?
Upvotes: 1, Comments: 2
Title: What manual sentiment Analysis looks like
Upvotes: 4, Comments: 2
Title: remote job opportunity
Upvotes: 1, Comments: 7
Title: Websites that do not consider web scraping illegal?
Upvotes: 1, Comments: 0
Title: Websites that do not consider web scraping illegal?
Upvotes: 1, Comments: 0
Title: Survey for Sentiment Analysis Project
Upvotes: 1, Comments: 0
Title: MonkeyLearn Sentiment Analyzer Sign Up issue
Upvotes: 6, Comments: 5
Title: Sentiment 

In [17]:
import requests
import csv

# Define the subreddit and the base URL
subreddit = 'sentimentanalysis'  # Change this to any subreddit you're interested in
base_url = f'https://www.reddit.com/r/{subreddit}/new/.json?limit=25'  # Fetch 25 posts, sorted by "new"

# Define headers to avoid blocking by Reddit
headers = {'User-Agent': 'Mozilla/5.0'}

# Function to scrape posts from Reddit
def scrape_reddit_posts(url):
    posts = []
    after = None
    page_count = 0  # To keep track of how many pages we have scraped
    
    # Loop to handle pagination
    while True:
        params = {'after': after} if after else {}
        
        # Send GET request to fetch data
        response = requests.get(url, headers=headers, params=params)
        
        # If request is successful, process the JSON data
        if response.status_code == 200:
            data = response.json()
            post_data = data['data']['children']
            print(f"Page {page_count + 1} fetched. Found {len(post_data)} posts.")  # Debug output
            
            # Extract relevant post details
            for post in post_data:
                title = post['data']['title']
                upvotes = post['data']['ups']
                comments = post['data']['num_comments']
                posts.append({'title': title, 'upvotes': upvotes, 'comments': comments})
            
            # Set the "after" parameter to fetch the next page
            after = data['data']['after']
            
            # Stop if there are no more pages to scrape
            if not after:
                print("No more pages to scrape.")
                break
            page_count += 1
        else:
            print(f"Failed to retrieve data: {response.status_code}")
            break
        
    return posts

# Function to save posts to CSV
def save_to_csv(posts, filename='reddit_posts.csv'):
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=['title', 'upvotes', 'comments'])
        writer.writeheader()
        writer.writerows(posts)
    print(f"Data saved to {filename}")

# Main function to execute the scraping
if __name__ == "__main__":
    # Start scraping
    print("Scraping Reddit posts...")
    posts = scrape_reddit_posts(base_url)
    
    # Save the posts to a CSV file
    save_to_csv(posts)


Scraping Reddit posts...
Page 1 fetched. Found 25 posts.
Page 2 fetched. Found 25 posts.
Page 3 fetched. Found 25 posts.
Page 4 fetched. Found 25 posts.
Page 5 fetched. Found 5 posts.
No more pages to scrape.
Data saved to reddit_posts.csv
