In [1]:
!pip install emoji
!pip install emot
!pip install selenium
!pip install webdriver_manager



In [2]:
# importing necessary libraries
import os
import re
import time
import calendar
import json


import emoji
import emot
import pandas as pd
import hashlib
import unicodedata

from datetime import datetime, timedelta
from tqdm import tqdm
from bs4 import BeautifulSoup

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [3]:
# Output Directory Check
OUTPUT_DIR = "Data"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [4]:
KAOMOJI_FILE = "kaomoji_to_text.json"
if os.path.exists(KAOMOJI_FILE):
    with open(KAOMOJI_FILE, "r", encoding="utf-8") as f:
        kaomoji_to_text = json.load(f)
else:0
    kaomoji_to_text = {}



def clean_text(s):
    if not s:
        return ""
    s = ''.join(c for c in s if unicodedata.category(c)[0] != 'C' and c != '\uFFFD')
    return s.replace('\xa0', ' ').replace('\u200e', '').strip()

def standardize_date(raw_date: str) -> str:
    s = clean_text(raw_date)
    today = datetime.now()
    weekdays = [d.lower() for d in calendar.day_name]
    s_lower = s.lower()

    # Handle relative dates
    if s_lower in weekdays:
        delta = (today.weekday() - weekdays.index(s_lower)) % 7
        return (today - timedelta(days=delta)).strftime("%d-%m-%Y")
    if "yesterday" in s_lower:
        return (today - timedelta(days=1)).strftime("%d-%m-%Y")
    if "today" in s_lower:
        return today.strftime("%d-%m-%Y")
    if "week" in s_lower:
        n = int(re.search(r"(\d+)", s_lower).group(1)) if re.search(r"\d+", s_lower) else 1
        return (today - timedelta(weeks=n)).strftime("%d-%m-%Y")
    if "month" in s_lower:
        n = int(re.search(r"(\d+)", s_lower).group(1)) if re.search(r"\d+", s_lower) else 1
        return (today - timedelta(days=30*n)).strftime("%d-%m-%Y")

    # Try most relevant date formats
    fmts = [
        "%Y-%m-%dT%H:%M:%SZ",
        "%Y-%m-%dT%H:%M:%S",
        "%Y-%m-%d %H:%M",
        "%d-%m-%Y %H:%M",
        "%d-%m-%Y %I:%M %p",
        "%d/%m/%Y %H:%M",
        "%d/%m/%Y %I:%M %p",
        "%d-%m-%Y%H:%M",
        "%Y-%m-%d%H:%M",
        "%d/%m/%Y%H:%M",
    ]
    for fmt in fmts:
        try:
            dt = datetime.strptime(s, fmt)
            return dt.strftime("%d-%m-%Y")
        except Exception:
            continue
    # Date only formats (no time info)
    date_fmts = [
        "%d-%m-%Y", "%Y-%m-%d", "%d/%m/%Y"
    ]
    for fmt in date_fmts:
        try:
            dt = datetime.strptime(s, fmt)
            return dt.strftime("%d-%m-%Y")
        except Exception:
            continue
    # Fallback: regex for non-standard
    m = re.match(r"(\d{2})-(\d{2})-(\d{4})", s)
    if m:
        return f"{m.group(1)}-{m.group(2)}-{m.group(3)}"
    return s

def convert_emojis_emoticons(text: str) -> str:
    text = clean_text(text)
    for k, v in kaomoji_to_text.items():
        text = text.replace(k, f" {v} ")
    e = emot.core.emot()
    emo = e.emoticons(text)
    for orig, mean in zip(emo["value"], emo["mean"]):
        text = text.replace(orig, f" {mean} ")
    text = emoji.demojize(text)
    return text.strip().lower()

def get_existing_ids(filepath, id_column):
    if os.path.exists(filepath):
        try:
            df = pd.read_csv(filepath)
            return set(df[id_column].astype(str))
        except Exception:
            return set()
    else:
        return set()

def make_comment_id(msg, post_id, comment_content):
    comment_id = (msg.get('data-message-id') or msg.get('id') or '').strip()
    if comment_id and comment_id.lower() not in ['lineardisplaymessageviewwrapper', '']:
        return comment_id
    hash_part = hashlib.sha256(comment_content.encode('utf-8')).hexdigest()[:10]
    return f"{post_id}_c{hash_part}"

def extract_comment_date(msg):
    time_elem = msg.find("time")
    if time_elem:
        if time_elem.has_attr('datetime'):
            return standardize_date(time_elem['datetime'])
        elif time_elem.has_attr('title'):
            return standardize_date(time_elem['title'])
        elif time_elem.text.strip():
            return standardize_date(time_elem.text)
    date_elem = msg.find("span", class_="local-friendly-date")
    if date_elem:
        if date_elem.has_attr('title') and date_elem['title'].strip():
            return standardize_date(date_elem['title'])
        elif date_elem.text.strip():
            return standardize_date(date_elem.text)
    datetime_elem = msg.find("span", class_="DateTime")
    if datetime_elem and datetime_elem.text.strip():
        return standardize_date(datetime_elem.text)
    if msg.has_attr('data-message-timestamp'):
        try:
            ts = int(msg['data-message-timestamp'])
            return datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M')
        except Exception:
            pass
    return ''

def comment_scraping(driver, post_url, post_id, category, max_comments=200, retry=3, polite_delay=1):
    comments = []
    comments_csv = os.path.join(OUTPUT_DIR, f"comments_{category}.csv")
    existing_comment_ids = get_existing_ids(comments_csv, "Comment ID")
    url = post_url
    scraped = 0
    for page in range(1, 100):  # will break on next not found
        if scraped >= max_comments:
            break
        success = False
        for attempt in range(retry):
            try:
                driver.get(url)
                WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.CLASS_NAME, "linear-message-list"))
                )
                soup = BeautifulSoup(driver.page_source, "html.parser")
                success = True
                break
            except Exception:
                time.sleep(2)
        if not success:
            break

        section = soup.find("div", class_="linear-message-list")
        if not section:
            break

        msgs = section.find_all("div", class_="lia-message-view-display")
        for msg in msgs:
            if scraped >= max_comments:
                break
            content_elem = msg.find("div", class_="lia-message-body-content")
            comment_content = convert_emojis_emoticons(content_elem.get_text("\n", strip=True)) if content_elem else ""
            comment_id = make_comment_id(msg, post_id, comment_content)
            if comment_id in existing_comment_ids:
                continue
            author_elem = msg.find("a", class_="lia-user-name-link")
            comment_author = clean_text(author_elem.get_text(strip=True)) if author_elem else ""
            comment_date = extract_comment_date(msg)
            support_elem = msg.find("span", {"id": re.compile(r"^kudos-count-")})
            if not support_elem:
                support_elem = msg.find("span", class_="lia-component-kudos-widget-message-kudos-count")
            comment_support = support_elem.text.strip() if support_elem else "0"
            comment_row = {
                "Comment ID": comment_id,
                "Post ID": post_id,
                "Category": category,
                "Comment Author": comment_author,
                "Comment Date": comment_date,
                "Comment Content": comment_content,
                "Comment Support": comment_support,
                "Post URL": post_url
            }
            comments.append(comment_row)
            existing_comment_ids.add(comment_id)
            scraped += 1
        nxt = soup.find("a", rel="next")
        if not nxt or not nxt.get("href"):
            break
        url = "https://forums.beyondblue.org.au" + nxt["href"] if nxt["href"].startswith("/") else nxt["href"]
        time.sleep(polite_delay)
    comments = sorted(comments, key=lambda c: c["Comment Date"])
    return comments

def beyondblue_scraping(tag: str, start_url: str, pages: int = 20, polite_delay=2):
    posts_csv = os.path.join(OUTPUT_DIR, f"posts_{tag}.csv")
    comments_csv = os.path.join(OUTPUT_DIR, f"comments_{tag}.csv")
    existing_post_ids = get_existing_ids(posts_csv, "Post ID")
    existing_comment_ids = get_existing_ids(comments_csv, "Comment ID")
    all_posts = []
    all_comments = []

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.set_page_load_timeout(30)
    url = start_url
    try:
        for p in tqdm(range(1, pages + 1), desc=f"Scraping {tag}"):
            for attempt in range(3):
                try:
                    driver.get(url)
                    WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.CLASS_NAME, "custom-message-list"))
                    )
                    soup = BeautifulSoup(driver.page_source, "html.parser")
                    break
                except Exception:
                    if attempt == 2:
                        print(f"Failed to load page {url}")
                        return
                    time.sleep(2)
            container = soup.find("div", class_="custom-message-list all-discussions")
            if not container:
                break
            for art in container.find_all("article"):
                aside = art.find("aside")
                cat_div = aside.find("div", class_="custom-tile-category-content") if aside else None
                post_cat = clean_text(cat_div.find("a").text.strip()) if cat_div and cat_div.find("a") else tag
                # --- DATE ---
                time_el = cat_div.find("time") if cat_div else None
                if time_el and time_el.has_attr('datetime'):
                    raw_date = time_el['datetime']
                elif time_el and time_el.has_attr('title'):
                    raw_date = time_el['title']
                elif time_el:
                    raw_date = time_el.text.strip()
                else:
                    raw_date = ""
                date = standardize_date(raw_date)
                # --- SUPPORT AND REPLIES ---
                support_li = art.find("li", class_="custom-tile-kudos")
                support_span = support_li.find("span") if support_li else None
                post_support = support_span.text.strip() if support_span else "0"

                replies_li = art.find("li", class_="custom-tile-replies")
                replies_b = replies_li.find("b") if replies_li else None
                total_comment_count = replies_b.text.strip() if replies_b else "0"
                # --- POST TITLE/URL/CONTENT ---
                h3 = art.find("h3")
                link_els = h3.find_all("a") if h3 else []
                link_el = link_els[1] if len(link_els) > 1 else (link_els[0] if link_els else None)
                post_link = link_el["href"] if link_el and link_el.has_attr("href") else ""
                post_id = post_link.rstrip("/").split("/")[-1] if post_link else ""
                if not post_id or post_id in existing_post_ids:
                    continue
                full_link = "https://forums.beyondblue.org.au" + post_link if post_link.startswith("/") else post_link
                title = convert_emojis_emoticons(link_el.text.strip()) if link_el else ""
                body = art.find("p", class_="body-text")
                content = convert_emojis_emoticons(body.text.strip()) if body else ""
                auth_div = aside.find("div", class_="custom-tile-author-info") if aside else None
                auth_a = auth_div.find("a") if auth_div else None
                author = clean_text(auth_a.get_text(strip=True)) if auth_a else ""
                post_content = content
                # --- Get real post support from detail page ---
                for attempt in range(3):
                    try:
                        driver.execute_script("window.open('');")
                        driver.switch_to.window(driver.window_handles[1])
                        driver.get(full_link)
                        WebDriverWait(driver, 10).until(
                            EC.presence_of_element_located((By.CLASS_NAME, "lia-message-body-content"))
                        )
                        post_soup = BeautifulSoup(driver.page_source, "html.parser")
                        content_elems = post_soup.find_all("div", class_="lia-message-body-content")
                        if content_elems:
                            post_content = "\n".join([convert_emojis_emoticons(elem.get_text("\n", strip=True)) for elem in content_elems if elem.get_text(strip=True)])
                        # Get support count from first post in thread
                        support_span_detail = post_soup.find("span", {"id": re.compile(r"^kudos-count-")})
                        if not support_span_detail:
                            support_span_detail = post_soup.find("span", class_="lia-component-kudos-widget-message-kudos-count")
                        if support_span_detail:
                            post_support = support_span_detail.text.strip()
                        # Get post date from detail if missing
                        time_elem = post_soup.find("time")
                        if not date and time_elem:
                            if time_elem.has_attr("datetime"):
                                date = standardize_date(time_elem["datetime"])
                            elif time_elem.has_attr("title"):
                                date = standardize_date(time_elem["title"])
                            else:
                                date = standardize_date(time_elem.text.strip())
                        # Scrape all comments (not for count, but real data)
                        comments = comment_scraping(driver, full_link, post_id, tag, max_comments=200)
                        new_comments = []
                        for c in comments:
                            if c["Comment ID"] not in existing_comment_ids:
                                new_comments.append(c)
                                existing_comment_ids.add(c["Comment ID"])
                        all_comments.extend(new_comments)
                        driver.close()
                        driver.switch_to.window(driver.window_handles[0])
                        break
                    except Exception as e:
                        if attempt == 2:
                            print(f"Failed to extract post page {full_link}: {e}")
                            try:
                                driver.close()
                                driver.switch_to.window(driver.window_handles[0])
                            except: pass
                        else:
                            time.sleep(2)
                post_data = {
                    "Post ID": post_id,
                    "Category": post_cat,
                    "Post Title": title,
                    "Post Author": author,
                    "Post Date": date,
                    "Post Content": post_content,
                    "Support Count": post_support,
                    "Total Number of Comments": total_comment_count,
                    "Post URL": full_link
                }
                all_posts.append(post_data)
                existing_post_ids.add(post_id)
                time.sleep(polite_delay)
            nxt_li = soup.find("li", class_="lia-paging-page-next")
            if nxt_li and nxt_li.find("a"):
                next_href = nxt_li.find("a")["href"]
                url = "https://forums.beyondblue.org.au" + next_href if next_href.startswith("/") else next_href
            else:
                break
            if p % 5 == 0:
                dfp = pd.DataFrame(all_posts)
                dfp.sort_values(by="Post Date", inplace=True)
                dfp.to_csv(posts_csv, index=False)
                dfc = pd.DataFrame(all_comments)
                dfc.sort_values(by="Comment Date", inplace=True)
                dfc.to_csv(comments_csv, index=False)
    finally:
        driver.quit()
    if all_posts:
        dfp = pd.DataFrame(all_posts)
        if os.path.exists(posts_csv):
            dfp_existing = pd.read_csv(posts_csv)
            dfp = pd.concat([dfp_existing, dfp], ignore_index=True)
            dfp.drop_duplicates(subset=["Post ID"], inplace=True)
        dfp.sort_values(by="Post Date", inplace=True)
        dfp.to_csv(posts_csv, index=False)
        print(f"Saved {len(dfp)} posts to {posts_csv}")
    if all_comments:
        dfc = pd.DataFrame(all_comments)
        if os.path.exists(comments_csv):
            dfc_existing = pd.read_csv(comments_csv)
            dfc = pd.concat([dfc_existing, dfc], ignore_index=True)
            dfc.drop_duplicates(subset=["Comment ID"], inplace=True)
        dfc.sort_values(by="Comment Date", inplace=True)
        dfc.to_csv(comments_csv, index=False)
        print(f"Saved {len(dfc)} comments to {comments_csv}")

if __name__ == "__main__":
    mental_health_urls = {
        #"anxiety":      "https://forums.beyondblue.org.au/t5/anxiety/bd-p/c1-sc2-b1?&sort=recent",
        #"depression":   "https://forums.beyondblue.org.au/t5/depression/bd-p/c1-sc2-b2?&sort=recent",
        "ptsd_trauma":  "https://forums.beyondblue.org.au/t5/ptsd-and-trauma/bd-p/c1-sc2-b3?&sort=recent",
        "suicidal_selfharm": "https://forums.beyondblue.org.au/t5/suicidal-thoughts-and-self-harm/bd-p/c1-sc2-b4?&sort=recent",
        "staying_well": "https://forums.beyondblue.org.au/t5/staying-well/bd-p/c1-sc3-b1?&sort=recent",
        "treatments":   "https://forums.beyondblue.org.au/t5/treatments-health-professionals/bd-p/c1-sc3-b2?&sort=recent",
        "relationships":"https://forums.beyondblue.org.au/t5/relationship-and-family-issues/bd-p/c1-sc3-b3?&sort=recent",
        "supporting_friends": "https://forums.beyondblue.org.au/t5/supporting-family-and-friends/bd-p/c1-sc3-b4?&sort=recent",
        "long_term_support":  "https://forums.beyondblue.org.au/t5/long-term-support-over-the/bd-p/c1-sc3-b5?&sort=recent",
        "young_people": "https://forums.beyondblue.org.au/t5/young-people/bd-p/c1-sc4-b1?&sort=recent",
        "Sex_identity": "https://forums.beyondblue.org.au/t5/sexuality-and-gender-identity/bd-p/c1-sc4-b2?&sort=recent",
        "Multiculture":  "https://forums.beyondblue.org.au/t5/multicultural-experiences/bd-p/c1-sc4-b3?&sort=recent",
        "Grief_loss":    "https://forums.beyondblue.org.au/t5/grief-and-loss/bd-p/c1-sc4-b4?&sort=recent"
    }
    for tag, addr in mental_health_urls.items():
        try:
            beyondblue_scraping(tag, addr, pages=100)
        except Exception as e:
            print(f"Error scraping {tag}: {e}")
            continue

Scraping ptsd_trauma:  64%|████████████████████████████████████▍                    | 64/100 [1:28:24<47:13, 78.71s/it]

Failed to extract post page https://forums.beyondblue.org.au/t5/ptsd-and-trauma/reaching-out-to-abuser/td-p/20973: Message: timeout: Timed out receiving message from renderer: 29.245
  (Session info: chrome=139.0.7258.66)
Stacktrace:
	GetHandleVerifier [0x0xbefd33+62915]
	GetHandleVerifier [0x0xbefd74+62980]
	(No symbol) [0x0xa23e13]
	(No symbol) [0x0xa14aab]
	(No symbol) [0x0xa147e1]
	(No symbol) [0x0xa12784]
	(No symbol) [0x0xa1324d]
	(No symbol) [0x0xa1fa99]
	(No symbol) [0x0xa30f75]
	(No symbol) [0x0xa36956]
	(No symbol) [0x0xa1388d]
	(No symbol) [0x0xa30d1e]
	(No symbol) [0x0xab29cc]
	(No symbol) [0x0xa911d6]
	(No symbol) [0x0xa60833]
	(No symbol) [0x0xa616a4]
	GetHandleVerifier [0x0xe58d23+2590131]
	GetHandleVerifier [0x0xe53f6a+2570234]
	GetHandleVerifier [0x0xc159ea+217722]
	GetHandleVerifier [0x0xc06058+153832]
	GetHandleVerifier [0x0xc0c4bd+179533]
	GetHandleVerifier [0x0xbf7738+94152]
	GetHandleVerifier [0x0xbf78c2+94546]
	GetHandleVerifier [0x0xbe2bda+9322]
	BaseThreadInitT

Scraping ptsd_trauma:  65%|███████████████████████████████████                   | 65/100 [1:32:38<1:16:28, 131.11s/it]

Failed to extract post page https://forums.beyondblue.org.au/t5/ptsd-and-trauma/my-anger-and-living-with-ptsd/td-p/11763: Message: timeout: Timed out receiving message from renderer: 29.266
  (Session info: chrome=139.0.7258.66)
Stacktrace:
	GetHandleVerifier [0x0xbefd33+62915]
	GetHandleVerifier [0x0xbefd74+62980]
	(No symbol) [0x0xa23e13]
	(No symbol) [0x0xa14aab]
	(No symbol) [0x0xa147e1]
	(No symbol) [0x0xa12784]
	(No symbol) [0x0xa1324d]
	(No symbol) [0x0xa1fa99]
	(No symbol) [0x0xa30f75]
	(No symbol) [0x0xa36956]
	(No symbol) [0x0xa1388d]
	(No symbol) [0x0xa30d1e]
	(No symbol) [0x0xab29cc]
	(No symbol) [0x0xa911d6]
	(No symbol) [0x0xa60833]
	(No symbol) [0x0xa616a4]
	GetHandleVerifier [0x0xe58d23+2590131]
	GetHandleVerifier [0x0xe53f6a+2570234]
	GetHandleVerifier [0x0xc159ea+217722]
	GetHandleVerifier [0x0xc06058+153832]
	GetHandleVerifier [0x0xc0c4bd+179533]
	GetHandleVerifier [0x0xbf7738+94152]
	GetHandleVerifier [0x0xbf78c2+94546]
	GetHandleVerifier [0x0xbe2bda+9322]
	BaseThre

Scraping ptsd_trauma:  68%|██████████████████████████████████████                  | 68/100 [1:37:40<55:17, 103.68s/it]

Failed to extract post page https://forums.beyondblue.org.au/t5/ptsd-and-trauma/dictator/td-p/16682: Message: timeout: Timed out receiving message from renderer: 29.069
  (Session info: chrome=139.0.7258.66)
Stacktrace:
	GetHandleVerifier [0x0xbefd33+62915]
	GetHandleVerifier [0x0xbefd74+62980]
	(No symbol) [0x0xa23e13]
	(No symbol) [0x0xa14aab]
	(No symbol) [0x0xa147e1]
	(No symbol) [0x0xa12784]
	(No symbol) [0x0xa1324d]
	(No symbol) [0x0xa1fa99]
	(No symbol) [0x0xa30f75]
	(No symbol) [0x0xa36956]
	(No symbol) [0x0xa1388d]
	(No symbol) [0x0xa30d1e]
	(No symbol) [0x0xab29cc]
	(No symbol) [0x0xa911d6]
	(No symbol) [0x0xa60833]
	(No symbol) [0x0xa616a4]
	GetHandleVerifier [0x0xe58d23+2590131]
	GetHandleVerifier [0x0xe53f6a+2570234]
	GetHandleVerifier [0x0xc159ea+217722]
	GetHandleVerifier [0x0xc06058+153832]
	GetHandleVerifier [0x0xc0c4bd+179533]
	GetHandleVerifier [0x0xbf7738+94152]
	GetHandleVerifier [0x0xbf78c2+94546]
	GetHandleVerifier [0x0xbe2bda+9322]
	BaseThreadInitThunk [0x0x75f7

Scraping ptsd_trauma:  75%|██████████████████████████████████████████▊              | 75/100 [1:47:59<30:12, 72.49s/it]

Failed to extract post page https://forums.beyondblue.org.au/t5/ptsd-and-trauma/i-have-no-one/td-p/506919: Message: timeout: Timed out receiving message from renderer: 29.429
  (Session info: chrome=139.0.7258.66)
Stacktrace:
	GetHandleVerifier [0x0xbefd33+62915]
	GetHandleVerifier [0x0xbefd74+62980]
	(No symbol) [0x0xa23e13]
	(No symbol) [0x0xa14aab]
	(No symbol) [0x0xa147e1]
	(No symbol) [0x0xa12784]
	(No symbol) [0x0xa1324d]
	(No symbol) [0x0xa1fa99]
	(No symbol) [0x0xa30f75]
	(No symbol) [0x0xa36956]
	(No symbol) [0x0xa1388d]
	(No symbol) [0x0xa30d1e]
	(No symbol) [0x0xab29cc]
	(No symbol) [0x0xa911d6]
	(No symbol) [0x0xa60833]
	(No symbol) [0x0xa616a4]
	GetHandleVerifier [0x0xe58d23+2590131]
	GetHandleVerifier [0x0xe53f6a+2570234]
	GetHandleVerifier [0x0xc159ea+217722]
	GetHandleVerifier [0x0xc06058+153832]
	GetHandleVerifier [0x0xc0c4bd+179533]
	GetHandleVerifier [0x0xbf7738+94152]
	GetHandleVerifier [0x0xbf78c2+94546]
	GetHandleVerifier [0x0xbe2bda+9322]
	BaseThreadInitThunk [0x

Scraping ptsd_trauma: 100%|████████████████████████████████████████████████████████| 100/100 [2:26:11<00:00, 87.72s/it]


Saved 2000 posts to Data\posts_ptsd_trauma.csv
Saved 994 comments to Data\comments_ptsd_trauma.csv


Scraping suicidal_selfharm:  70%|███████████████████████████████████▋               | 70/100 [1:29:01<44:34, 89.15s/it]

Failed to extract post page https://forums.beyondblue.org.au/t5/suicidal-thoughts-and-self-harm/frustrated-and-stupid-idiot-that-sabotaging-life-by-being-a-joke/td-p/22777: Message: timeout: Timed out receiving message from renderer: 29.074
  (Session info: chrome=139.0.7258.66)
Stacktrace:
	GetHandleVerifier [0x0xbefd33+62915]
	GetHandleVerifier [0x0xbefd74+62980]
	(No symbol) [0x0xa23e13]
	(No symbol) [0x0xa14aab]
	(No symbol) [0x0xa147e1]
	(No symbol) [0x0xa12784]
	(No symbol) [0x0xa1324d]
	(No symbol) [0x0xa1fa99]
	(No symbol) [0x0xa30f75]
	(No symbol) [0x0xa36956]
	(No symbol) [0x0xa1388d]
	(No symbol) [0x0xa30d1e]
	(No symbol) [0x0xab29cc]
	(No symbol) [0x0xa911d6]
	(No symbol) [0x0xa60833]
	(No symbol) [0x0xa616a4]
	GetHandleVerifier [0x0xe58d23+2590131]
	GetHandleVerifier [0x0xe53f6a+2570234]
	GetHandleVerifier [0x0xc159ea+217722]
	GetHandleVerifier [0x0xc06058+153832]
	GetHandleVerifier [0x0xc0c4bd+179533]
	GetHandleVerifier [0x0xbf7738+94152]
	GetHandleVerifier [0x0xbf78c2+94

Scraping suicidal_selfharm:  82%|█████████████████████████████████████████▊         | 82/100 [1:47:49<28:10, 93.93s/it]

Failed to extract post page https://forums.beyondblue.org.au/t5/suicidal-thoughts-and-self-harm/the-covert-narcissist-is-the-weak-one/td-p/31617: Message: timeout: Timed out receiving message from renderer: 29.392
  (Session info: chrome=139.0.7258.66)
Stacktrace:
	GetHandleVerifier [0x0xbefd33+62915]
	GetHandleVerifier [0x0xbefd74+62980]
	(No symbol) [0x0xa23e13]
	(No symbol) [0x0xa14aab]
	(No symbol) [0x0xa147e1]
	(No symbol) [0x0xa12784]
	(No symbol) [0x0xa1324d]
	(No symbol) [0x0xa1fa99]
	(No symbol) [0x0xa30f75]
	(No symbol) [0x0xa36956]
	(No symbol) [0x0xa1388d]
	(No symbol) [0x0xa30d1e]
	(No symbol) [0x0xab29cc]
	(No symbol) [0x0xa911d6]
	(No symbol) [0x0xa60833]
	(No symbol) [0x0xa616a4]
	GetHandleVerifier [0x0xe58d23+2590131]
	GetHandleVerifier [0x0xe53f6a+2570234]
	GetHandleVerifier [0x0xc159ea+217722]
	GetHandleVerifier [0x0xc06058+153832]
	GetHandleVerifier [0x0xc0c4bd+179533]
	GetHandleVerifier [0x0xbf7738+94152]
	GetHandleVerifier [0x0xbf78c2+94546]
	GetHandleVerifier [0x

Scraping suicidal_selfharm:  90%|█████████████████████████████████████████████▉     | 90/100 [2:00:46<16:01, 96.11s/it]

Failed to extract post page https://forums.beyondblue.org.au/t5/suicidal-thoughts-and-self-harm/what-to-do-if-person-who-saved-your-life-no-longer-talks-to-you/td-p/14350: Message: timeout: Timed out receiving message from renderer: 29.288
  (Session info: chrome=139.0.7258.66)
Stacktrace:
	GetHandleVerifier [0x0xbefd33+62915]
	GetHandleVerifier [0x0xbefd74+62980]
	(No symbol) [0x0xa23e13]
	(No symbol) [0x0xa14aab]
	(No symbol) [0x0xa147e1]
	(No symbol) [0x0xa12784]
	(No symbol) [0x0xa1324d]
	(No symbol) [0x0xa1fa99]
	(No symbol) [0x0xa30f75]
	(No symbol) [0x0xa36956]
	(No symbol) [0x0xa1388d]
	(No symbol) [0x0xa30d1e]
	(No symbol) [0x0xab29cc]
	(No symbol) [0x0xa911d6]
	(No symbol) [0x0xa60833]
	(No symbol) [0x0xa616a4]
	GetHandleVerifier [0x0xe58d23+2590131]
	GetHandleVerifier [0x0xe53f6a+2570234]
	GetHandleVerifier [0x0xc159ea+217722]
	GetHandleVerifier [0x0xc06058+153832]
	GetHandleVerifier [0x0xc0c4bd+179533]
	GetHandleVerifier [0x0xbf7738+94152]
	GetHandleVerifier [0x0xbf78c2+945

Scraping suicidal_selfharm:  92%|██████████████████████████████████████████████▉    | 92/100 [2:14:33<11:42, 87.75s/it]

Failed to load page https://forums.beyondblue.org.au/t5/suicidal-thoughts-and-self-harm/bd-p/c1-sc2-b4/page/93?sort=recent





Error scraping staying_well: Could not reach host. Are you offline?


Scraping treatments: 100%|█████████████████████████████████████████████████████████| 100/100 [1:42:04<00:00, 61.24s/it]


Saved 2000 posts to Data\posts_treatments.csv
Saved 1000 comments to Data\comments_treatments.csv


Scraping relationships: 100%|██████████████████████████████████████████████████████| 100/100 [1:39:30<00:00, 59.71s/it]


Saved 2000 posts to Data\posts_relationships.csv
Saved 1000 comments to Data\comments_relationships.csv


Scraping supporting_friends: 100%|█████████████████████████████████████████████████| 100/100 [1:41:02<00:00, 60.63s/it]


Saved 2000 posts to Data\posts_supporting_friends.csv
Saved 1000 comments to Data\comments_supporting_friends.csv


Scraping long_term_support:  17%|███████▉                                       | 17/100 [3:15:00<15:52:04, 688.25s/it]


Saved 321 posts to Data\posts_long_term_support.csv
Saved 171 comments to Data\comments_long_term_support.csv


Scraping young_people: 100%|███████████████████████████████████████████████████████| 100/100 [1:34:17<00:00, 56.57s/it]


Saved 2000 posts to Data\posts_young_people.csv
Saved 1000 comments to Data\comments_young_people.csv


Scraping Sex_identity:  78%|███████████████████████████████████████████▋            | 78/100 [1:25:15<24:02, 65.58s/it]


Saved 1532 posts to Data\posts_Sex_identity.csv
Saved 782 comments to Data\comments_Sex_identity.csv


Scraping Multiculture:  25%|██████████████                                          | 25/100 [29:40<1:29:02, 71.24s/it]


Saved 508 posts to Data\posts_Multiculture.csv
Saved 258 comments to Data\comments_Multiculture.csv


Scraping Grief_loss:  87%|██████████████████████████████████████████████████▍       | 87/100 [1:25:48<12:49, 59.18s/it]


Saved 1729 posts to Data\posts_Grief_loss.csv
Saved 879 comments to Data\comments_Grief_loss.csv
