# Inport Necessary Libralies

In [23]:
import praw
import configparser
import pandas as pd
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import matplotlib.pyplot as plt
import seaborn as sns

# Set up configuration

In [24]:
# Set up search conditions 

SEARCH_TERM = '(Google Layoffs OR Google Layoff OR Alphabet Layoffs OR Google Fired)' 
SUBREDDITS = ['cscareerquestions', 'FinancialIndependence', 'recruitinghell', 'technology']
POST_LIMIT = 1000 # Request up to 1000 posts per subreddit initially

# Read config.ini
config = configparser.ConfigParser()
config.read('config.ini')

# Initialise Praw Client
try:
    # user authentication
    reddit = praw.Reddit(
        client_id=config['reddit']['client_id'],
        client_secret=config['reddit']['client_secret'],
        user_agent=config['reddit']['user_agent'],
        username=config['reddit']['username'],
        password=config['reddit']['password']
    )
    # Ensure the log in successful.
    print(f"PRAW client initialized. Logged in as: {reddit.user.me().name}")

except KeyError as e:
    print(f"FATAL ERROR: Missing Reddit credential in config.ini. Check key: {e}")
    exit()
except Exception as e:
    print(f"An error occurred during PRAW initialization or login: {e}")
    exit()

PRAW client initialized. Logged in as: Afraid-Medicine4769


# Retrive and Collect Submissions

In [25]:
#Define the broad search term (replaces the 'search_phrases' list)
SEARCH_TERM = '("Google Layoffs" OR "Google Layoff" OR "Alphabet Layoffs")' 

# Set up search conditions (re-stating for clarity, assuming they are defined earlier)
SUBREDDITS = ['cscareerquestions', 'FinancialIndependence', 'recruitinghell'] 
POST_LIMIT = 1000 

collected_posts = []

print(f"\n--- Starting data collection across {len(SUBREDDITS)} subreddits ---")

for subreddit_name in SUBREDDITS:
    print(f"Searching r/{subreddit_name}...")
    try:
        subreddit = reddit.subreddit(subreddit_name)
        
        # Revert to .search() and add the time_filter
        # This asks Reddit's index to search for the term over the past year.
        for submission in subreddit.search(SEARCH_TERM, limit=POST_LIMIT, time_filter='year', sort='new'):
            # No manual 'if' filter is needed here; the search itself handles it
            collected_posts.append({
                'id': submission.id,
                'subreddit': subreddit_name,
                'title': submission.title,
                'text': submission.selftext,
                'score': submission.score,
                'num_comments': submission.num_comments,
                'created_utc': submission.created_utc,
                'url': submission.url
            })
            
    except Exception as e:
        print(f"Skipping r/{subreddit_name} due to an error: {e}")
        continue # Move to the next subreddit

print(f"\nData Collection Complete. Total submissions collected: {len(collected_posts)}")

# CONVERT TO DATAFRAME AND DISPLAY SAMPLE
if collected_posts:
    df = pd.DataFrame(collected_posts)
    
    # Filter out empty text fields and convert timestamp
    df['created_at'] = pd.to_datetime(df['created_utc'], unit='s')
    
    print("\n--- Sample of Retrieved Data (First 5 Rows) ---")
    print(df[['subreddit', 'title', 'score', 'num_comments', 'created_at']].head())
else:
    print("\nNo posts were found matching the search criteria.")


--- Starting data collection across 3 subreddits ---
Searching r/cscareerquestions...
Searching r/FinancialIndependence...
Searching r/recruitinghell...

Data Collection Complete. Total submissions collected: 1

--- Sample of Retrieved Data (First 5 Rows) ---
           subreddit                                              title  \
0  cscareerquestions  Google Layoffs: Hundreds reportedly fired from...   

   score  num_comments          created_at  
0   1564           318 2025-04-12 05:27:56  
