In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [None]:
BASE_URL = 'https://unmethours.com'
PAGE_URL = 'https://unmethours.com/questions/?sort=newest&page={}'

In [None]:
# Get all the question url
def get_question_links(page_num):
    url = PAGE_URL.format(page_num)
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')

    question_div = soup.find('div', id='question-list')
    if not question_div:
        return []  # No questions found, stop crawling

    questions = []
    for h2 in question_div.find_all('h2'):
        a_tag = h2.find('a')
        if a_tag and a_tag.get('href'):
            href = BASE_URL + a_tag.get('href')
            title = a_tag.get_text(strip=True)
            questions.append((title, href))
    return questions

In [None]:
# Get all the answers of each question from href
def extract_qna(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    all_editables = soup.find_all('div', class_='js-editable-content')

    question = all_editables[0].get_text(strip=True) if all_editables else ''
    answers = [div.get_text(strip=True) for div in all_editables[1:]]
    return question, "\n---\n".join(answers)


In [None]:
# Loop through all pages
page = 1
data = []

while True:
    print(f"Scraping page {page}...")
    question_links = get_question_links(page)
    if not question_links:
        print("No more questions found. Stopping.")
        break

    for title, url in question_links:
        try:
            question, answer = extract_qna(url)
            data.append({
                'question_url': url,
                'question': question,
                'answer': answer or 'No answer'
            })
            time.sleep(1)  # Be respectful
        except Exception as e:
            print(f"Failed to scrape {url}: {e}")

    page += 1

In [None]:
# Create Pandas DataFrame
df = pd.DataFrame(data)
df.to_csv('unmethours_qna_all_pages.csv', index=False)
print("Saved to CSV.")