In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
import time

# ✅ Setup
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()

# ✅ Open Reddit Thread
url = "https://www.reddit.com/r/AskReddit/comments/1j735u5/what_is_ruining_your_mental_health/"
driver.get(url)
time.sleep(5)  # Allow page to load

# ✅ Scroll to load more comments
for _ in range(5):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

# ✅ Storage
comments_data = []
max_comments = 10

# ✅ Get all comment containers
try:
    comment_blocks = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'shreddit-comment'))
    )
except Exception as e:
    print("Could not find comment blocks:", e)
    comment_blocks = []

print(f"Found {len(comment_blocks)} total comments on page")

# ✅ Extract data from each comment block (max 10)
for block in comment_blocks:
    if len(comments_data) >= max_comments:
        break

    try:
        username = block.find_element(By.CLASS_NAME, 'truncate.font-bold.text-neutral-content-strong.text-12.undefined.hover\\:underline').text.strip()
    except:
        username = "Unknown"

    try:
        content = block.find_element(By.CLASS_NAME, 'md.text-14.rounded-[8px].pb-2xs.overflow-hidden').text.strip()
    except:
        content = "No content"

    try:
        upvotes = block.find_element(By.XPATH, './/shreddit-comment-action-row//span/span/faceplate-number').text.strip()
    except:
        upvotes = "0"

    comments_data.append({
        "username": username,
        "content": content,
        "upvotes": upvotes
    })

# ✅ Save to CSV
df = pd.DataFrame(comments_data)
df.to_csv("reddit_mental_health_10comments.csv", index=False, encoding='utf-8')
print("✅ Saved 10 comments to reddit_mental_health_10comments.csv")

driver.quit()


Found 10 total comments on page
✅ Saved 10 comments to reddit_mental_health_10comments.csv


In [2]:
import requests
import pandas as pd

# ✅ Reddit thread URL (convert to JSON endpoint)
url = "https://www.reddit.com/r/AskReddit/comments/1j735u5/what_is_ruining_your_mental_health/.json"

# ✅ Set a user-agent or Reddit may block your request
headers = {'User-Agent': 'Mozilla/5.0'}

# ✅ Fetch the data
response = requests.get(url, headers=headers)
if response.status_code != 200:
    print("Failed to retrieve data:", response.status_code)
    exit()

# ✅ Extract the comments data (2nd object in the list)
data = response.json()
comments = data[1]['data']['children']

# ✅ Storage
results = []

# ✅ Parse top-level comments
for comment in comments:
    if comment['kind'] != 't1':
        continue  # skip if it's not a comment (e.g., "more comments")

    author = comment['data'].get('author', 'Unknown')
    content = comment['data'].get('body', 'No content')
    upvotes = comment['data'].get('score', 0)

    results.append({
        'username': author,
        'content': content,
        'upvotes': upvotes
    })

# ✅ Save to CSV
df = pd.DataFrame(results)
df.to_csv("reddit_mental_health_comments_api.csv", index=False, encoding='utf-8')
print(f"✅ Saved {len(df)} comments to reddit_mental_health_comments_api.csv")


✅ Saved 62 comments to reddit_mental_health_comments_api.csv
