In [None]:
import requests
import time
import pandas as pd

def fetch_posts(subreddit, limit=75000):
    headers = {"User-Agent": "custom-script"}
    url = f"https://www.reddit.com/r/{subreddit}/top/.json?t=week&limit=100"
    posts = []
    after = None

    while len(posts) < limit:
        full_url = url + (f"&after={after}" if after else "")
        response = requests.get(full_url, headers=headers)
        
        if response.status_code != 200:
            print(f"❌ Error: {response.status_code}")
            break

        data = response.json()
        children = data["data"]["children"]
        
        if not children:
            break

        for post in children:
            post_data = post["data"]
            posts.append(post_data)
            if len(posts) >= limit:
                break

        after = data["data"].get("after")
        if not after:
            print("✅ No more posts to fetch.")
            break

        time.sleep(1)  # Avoid rate limits
    
    print(f"✅ Fetched {len(posts)} posts.")
    return posts

def fetch_comments(post_permalink, headers):
    comments_url = f"https://www.reddit.com{post_permalink}.json?limit=500"
    response = requests.get(comments_url, headers=headers)
    comments = []
    
    if response.status_code == 200:
        comments_data = response.json()
        
        if len(comments_data) > 1:
            for comment in comments_data[1]["data"]["children"]:
                if "body" in comment["data"]:
                    comments.append(comment["data"]["body"])
    
    return comments

def scrape_reddit(subreddit, post_limit=75000):
    headers = {"User-Agent": "custom-script"}
    all_text = []
    posts = fetch_posts(subreddit, post_limit)

    for idx, post in enumerate(posts):
        post_body = post.get("selftext", "[No text]")
        all_text.append(post_body)
        
        comments = fetch_comments(post["permalink"], headers)
        all_text.extend(comments)
        
        if idx % 100== 0:
            print(f"📥 Processed {idx}/{len(posts)} posts...")

        time.sleep(1)  # Avoid rate limits

    print(f"✅ Fetched total {len(all_text)} posts & comments.")
    return all_text

# Run the scraper
subreddit = "AskSpain"
data = scrape_reddit(subreddit, post_limit=100)


df = pd.DataFrame({'TEXTO':data})
df['PAÍS'] = ['Spain']*len(data)
df['FUENTE'] = ['r/AskSpain']*len(data)
df.to_csv('AskSpain2.csv')

