# Reddit Comments Data Collection - API

In [4]:
import requests
import time
import pandas as pd

def fetch_reddit_data(search_query, limit=100, after=None):
    # Define URL for Reddit API endpoint
    url = "https://www.reddit.com/search.json?q={}&limit={}".format(search_query, limit)
    
    # Add 'after' parameter if specified
    if after:
        url += "&after={}".format(after)
    
    # Make GET request to the API with User-Agent header
    headers = {'User-Agent': 'Mozilla/5.0'}
    response = requests.get(url, headers=headers)
    
    # Check if request was successful
    if response.status_code == 200:
        # Extract data from response JSON
        data = response.json()
        
        # Initialize list to store results
        results = []
        
        # Extract relevant information from each post
        for post in data['data']['children']:
            post_data = post['data']
            results.append({
                'title': post_data['title'],
                'text': post_data['selftext'],
                'url': post_data['url'],
                'subreddit': post_data['subreddit'],
                'date': pd.to_datetime(post_data['created_utc'], unit='s')
            })
        
        # Return results and the fullname of the last submission
        return results, data['data']['after']
    
    else:
        print("Error: Failed to fetch data from the API. Status code:", response.status_code)
        return None, None

# Define search query
search_query = "climate change"

# Initialize variables
limit = 100
after = None
all_results = []

# Fetch data iteratively until no more results are available
while True:
    # Fetch data for the current batch
    batch_results, after = fetch_reddit_data(search_query, limit, after)
    
    # If no results are returned, break the loop
    if not batch_results:
        break
    
    # Extend the list of results
    all_results.extend(batch_results)
    
    # Add a delay between requests to comply with rate limits
    time.sleep(1)  # Delay for 1 second between requests

# Convert the list of results to a DataFrame
reddit_df = pd.DataFrame(all_results)

                                               title  \
0                        Climate Change - A Timeline   
1             Still think climate change isn't real?   
2  My favorite climate change fact that will blow...   
3            Are we really doomed by climate change?   
4  NASA made an animation to clearly illustrate h...   

                                                text  \
0                                                      
1  https://www.nbcnews.com/news/weather/hot-tub-w...   
2  My favorite climate change fact. I feel like t...   
3  Reddit is always very doom and gloom, and obvi...   
4                                                      

                                                 url             subreddit  \
0                https://i.redd.it/nnxapy263q3a1.jpg          Conservative   
1                https://i.redd.it/etjeuc1ol5eb1.jpg              facepalm   
2                https://i.redd.it/323dbhex9vq91.jpg          Conservative   
3  https://www

In [5]:
reddit_df

Unnamed: 0,title,text,url,subreddit,date
0,Climate Change - A Timeline,,https://i.redd.it/nnxapy263q3a1.jpg,Conservative,2022-12-03 18:03:00
1,Still think climate change isn't real?,https://www.nbcnews.com/news/weather/hot-tub-w...,https://i.redd.it/etjeuc1ol5eb1.jpg,facepalm,2023-07-25 18:23:19
2,My favorite climate change fact that will blow...,My favorite climate change fact. I feel like t...,https://i.redd.it/323dbhex9vq91.jpg,Conservative,2022-09-29 21:35:46
3,Are we really doomed by climate change?,"Reddit is always very doom and gloom, and obvi...",https://www.reddit.com/r/AskScienceDiscussion/...,AskScienceDiscussion,2022-03-19 14:26:32
4,NASA made an animation to clearly illustrate h...,,https://v.redd.it/rwm9gvyn2vxa1,Damnthatsinteresting,2023-05-04 18:53:49
...,...,...,...,...,...
7620,Growing mushrooms alongside trees could feed m...,,https://www.pnas.org/doi/10.1073/pnas.2220079120,science,2023-03-14 13:19:45
7621,"Climate change: The rich are to blame, interna...",,https://www.bbc.com/news/business-51906530,Futurology,2020-04-14 08:01:00
7622,New data reveals climate change might be more ...,,https://wis-wander.weizmann.ac.il/environment/...,science,2022-05-26 19:51:16
7623,Obviously fighting climate change makes you a ...,,https://i.redd.it/wf3fd6bnatn31.jpg,TheRightCantMeme,2019-09-20 20:51:17


In [6]:
#reddit_df.to_csv("Reddit_data_full.csv")