In [5]:
import requests
import time
import pandas as pd
import matplotlib.pyplot as plt
import os
import sys
from typing import Dict, List, Optional  
from dotenv import load_dotenv

In [6]:
load_dotenv()
REDDIT_CONFIG = {
    'client_id': os.getenv('REDDIT_CLIENT_ID'),
    'client_secret': os.getenv('REDDIT_CLIENT_SECRET'),
    'user_agent': os.getenv('REDDIT_USER_AGENT'),
    'redirect_uri': os.getenv('REDDIT_REDIRECT_URI')
}
if all(REDDIT_CONFIG.values()):
    print("Environment variables loaded successfully!")
else:
    print("Missing environment variables. Please check your .env file!")
    missing = [k for k, v in REDDIT_CONFIG.items() if not v]
    print(f"Missing values for: {missing}")

Environment variables loaded successfully!


In [7]:
class RedditAPI:
    def __init__(self):
        self.client_id = REDDIT_CONFIG['client_id']
        self.client_secret = REDDIT_CONFIG['client_secret']
        self.user_agent = REDDIT_CONFIG['user_agent']
        self.auth = requests.auth.HTTPBasicAuth(self.client_id, self.client_secret)
        self.token = None
        self.token_expiry = 0

    def _get_token(self) -> None:
        """Get OAuth token from Reddit"""
        try:
            data = {
                'grant_type': 'client_credentials'
            }
            headers = {'User-Agent': self.user_agent}
            
            print("Attempting to authenticate...")
            response = requests.post(
                'https://www.reddit.com/api/v1/access_token',
                auth=self.auth,
                data=data,
                headers=headers
            )
            
            if response.status_code == 200:
                self.token = response.json()['access_token']
                self.token_expiry = time.time() + 3600  # Token expires in 1 hour
                print("Authentication successful!")
            else:
                print(f"Authentication failed with status code: {response.status_code}")
                print(f"Response: {response.text}")
                response.raise_for_status()
                
        except Exception as e:
            print(f"Error during authentication: {str(e)}")
            raise

    def _ensure_valid_token(self) -> None:
        if not self.token or time.time() > self.token_expiry:
            self._get_token()

    def get_subreddit_posts(self, subreddit: str, limit: int = 25) -> List[Dict]:
        """Get posts from a subreddit"""
        self._ensure_valid_token()
        
        headers = {
            'User-Agent': self.user_agent,
            'Authorization': f'Bearer {self.token}'
        }
        
        params = {
            'limit': min(limit, 100)
        }
        
        response = requests.get(
            f'https://oauth.reddit.com/r/{subreddit}/hot',
            headers=headers,
            params=params
        )
        
        if response.status_code == 200:
            return [post['data'] for post in response.json()['data']['children']]
        else:
            print(f"Error fetching posts: {response.status_code}")
            print(f"Response: {response.text}")
            response.raise_for_status()

    def search_subreddits(self, query: str, min_subscribers: int = 10000) -> List[Dict]:
        """Search for subreddits based on topic"""
        self._ensure_valid_token()
        
        headers = {
            'User-Agent': self.user_agent,
            'Authorization': f'Bearer {self.token}'
        }
        
        params = {
            'q': query,
            'limit': 100,
            'sort': 'relevance',
            'include_over_18': False
        }
        
        try:
            response = requests.get(
                'https://oauth.reddit.com/subreddits/search',
                headers=headers,
                params=params
            )
            
            if response.status_code == 200:
                data = response.json()
                subreddits = []
                
                for subreddit in data['data']['children']:
                    if 'data' in subreddit:
                        sub_data = subreddit['data']
                        subscribers = sub_data.get('subscribers', 0) or 0
                        
                        if subscribers >= min_subscribers:
                            clean_sub = {
                                'name': sub_data.get('display_name', ''),
                                'title': sub_data.get('title', ''),
                                'description': sub_data.get('public_description', ''),
                                'subscribers': subscribers,
                                'active_users': sub_data.get('active_user_count', 0) or 0,
                                'url': f"https://reddit.com{sub_data.get('url', '')}"
                            }
                            subreddits.append(clean_sub)
                
                return sorted(subreddits, key=lambda x: x['subscribers'], reverse=True)
            else:
                print(f"Error Response Content: {response.text}")
                response.raise_for_status()
                
        except Exception as e:
            print(f"Exception occurred: {str(e)}")
            raise


In [8]:
reddit = RedditAPI()

In [11]:
posts = reddit.get_subreddit_posts('tech', limit=5)
posts_df = pd.DataFrame(posts)
posts_df.head()

Unnamed: 0,approved_at_utc,subreddit,selftext,author_fullname,saved,mod_reason_title,gilded,clicked,title,link_flair_richtext,...,author_patreon_flair,author_flair_text_color,permalink,stickied,url,subreddit_subscribers,created_utc,num_crossposts,media,is_video
0,,tech,,t2_2uwit82z,False,,0,False,"Finland’s 100MW sand battery turns 2,000 tons ...",[],...,False,,/r/tech/comments/1grv3rt/finlands_100mw_sand_b...,False,https://interestingengineering.com/energy/sand...,657449,1731673000.0,1,,False
1,,tech,,t2_h92owkyw,False,,0,False,New Twitch policy bans the word ‘Zionist’ amid...,[],...,False,,/r/tech/comments/1gs4dna/new_twitch_policy_ban...,False,https://www.dexerto.com/twitch/new-twitch-poli...,657449,1731699000.0,0,,False
2,,tech,,t2_cc0n0rs5,False,,0,False,Wave-busting tech from Edinburgh keeps robots ...,[],...,False,,/r/tech/comments/1grwr8n/wavebusting_tech_from...,False,https://interestingengineering.com/energy/robo...,657449,1731679000.0,0,,False
3,,tech,,t2_2uwit82z,False,,0,False,Fatbergs fighter: New self-healing zinc coatin...,[],...,False,,/r/tech/comments/1gr3dkv/fatbergs_fighter_new_...,False,https://interestingengineering.com/science/new...,657449,1731586000.0,0,,False
4,,tech,,t2_cc0n0rs5,False,,0,False,New microreactor converts CO2 to methanol with...,[],...,False,,/r/tech/comments/1gr5pai/new_microreactor_conv...,False,https://interestingengineering.com/energy/co2-...,657449,1731594000.0,0,,False
