In [1]:
%pip install selenium beautifulsoup4

Collecting selenium
  Downloading selenium-4.34.2-py3-none-any.whl.metadata (7.5 kB)
Collecting beautifulsoup4
  Using cached beautifulsoup4-4.13.4-py3-none-any.whl.metadata (3.8 kB)
Collecting trio~=0.30.0 (from selenium)
  Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting websocket-client~=1.8.0 (from selenium)
  Using cached websocket_client-1.8.0-py3-none-any.whl.metadata (8.0 kB)
Collecting sortedcontainers (from trio~=0.30.0->selenium)
  Downloading sortedcontainers-2.4.0-py2.py3-none-any.whl.metadata (10 kB)
Collecting outcome (from trio~=0.30.0->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting sniffio>=1.3.0 (from trio~=0.30.0->selenium)
  Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)
Collecting cffi>=1.14 (from trio~=0.30.0->selenium)
  Downloading cffi-1.17.1-cp312-cp312-win_am

In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
import json

In [3]:
# Setup headless browser
options = Options()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)

In [4]:
# Go to the specific Cyborg Manifesto page
url = "https://cyberfeminismindex.com/#/a-cyborg-manifesto-science-technology-and-socialist-feminism-in-the-late-20th-century"
driver.get(url)
time.sleep(30)  # Wait for JS to load

In [5]:
# Expand all entries (click each index_entry)
index_entries = driver.find_elements('css selector', '.index_entry')
for entry in index_entries:
    try:
        entry.click()
        time.sleep(0.1)
    except:
        continue

time.sleep(2)  # Let drawers expand
soup = BeautifulSoup(driver.page_source, 'html.parser')
driver.quit()

# Extract data
results = []
entries = soup.select('.index_entry')
drawers = soup.select('.index_drawer')

for entry, drawer in zip(entries, drawers):
    result = {}

    # Year
    year_tag = entry.select_one('.sm p')
    result['year'] = year_tag.get_text(strip=True) if year_tag else None

    # Author
    author_tag = entry.select_one('.md p')
    result['author'] = author_tag.get_text(strip=True) if author_tag else None

    # Excerpt
    para_tags = drawer.select('p')
    if len(para_tags) > 1:
        result['excerpt'] = para_tags[1].get_text(strip=True)

    # Link
    link_tag = drawer.select_one('a[href]')
    result['link'] = link_tag['href'] if link_tag else None

    results.append(result)

# Save as JSON
with open('cyberfeminism_manifesto_refs.json', 'w') as f:
    json.dump(results, f, indent=2)

print(f"Scraped {len(results)} entries.")

Scraped 860 entries.


In [6]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import json

# Setup Chrome options
options = Options()
options.add_argument('--headless')  # Comment this out if you want to see the browser
driver = webdriver.Chrome(options=options)

url = "https://cyberfeminismindex.com/#/a-cyborg-manifesto-science-technology-and-socialist-feminism-in-the-late-20th-century"
driver.get(url)

# Wait for at least one entry to load
WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.CLASS_NAME, "index_entry")))

# Find all index entries
entries = driver.find_elements(By.CLASS_NAME, "index_entry")
actions = ActionChains(driver)

# Scroll into view and click each entry to expand drawer content
for entry in entries:
    try:
        driver.execute_script("arguments[0].scrollIntoView(true);", entry)
        actions.move_to_element(entry).perform()
        entry.click()
        time.sleep(0.05)  # Small delay to allow drawer to open
    except Exception:
        continue

# Wait a moment for all content to render
time.sleep(2)

# Parse the loaded HTML with BeautifulSoup
soup = BeautifulSoup(driver.page_source, "html.parser")
driver.quit()

# Select all entries and their corresponding drawers
entry_blocks = soup.select(".index_entry")
drawer_blocks = soup.select(".index_drawer")

results = []

for entry, drawer in zip(entry_blocks, drawer_blocks):
    result = {}

    # Extract year
    year_tag = entry.select_one('.sm p')
    if year_tag:
        year_text = year_tag.get_text(strip=True)
        # Confirm it's a number to avoid picking up counts like (5)
        result['year'] = year_text if year_text.isdigit() else None
    else:
        result['year'] = None

    # Extract title (inside .lg p)
    title_tag = entry.select_one('.lg p')
    result['title'] = title_tag.get_text(strip=True) if title_tag else None

    # Extract authors (non-empty <p> inside .md)
    author_tags = entry.select('.md p')
    authors = [p.get_text(strip=True) for p in author_tags if p.get_text(strip=True)]
    # Optional: split authors if they are all in one string separated by commas
    if authors:
        result['authors'] = authors[0].split(', ') if len(authors) == 1 else authors
    else:
        result['authors'] = None

    # Extract excerpt: skip first <p> in drawer, find first non-empty after that
    para_tags = drawer.select('p')
    excerpt = None
    for p in para_tags[1:]:
        text = p.get_text(strip=True)
        if text:
            excerpt = text
            break
    result['excerpt'] = excerpt

    # Extract link from drawer <a href>
    link_tag = drawer.select_one('a[href]')
    result['link'] = link_tag['href'] if link_tag else None

    results.append(result)

# Save results to JSON file
with open("cyberfeminism_manifesto_refs.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=2, ensure_ascii=False)

print(f"Scraped {len(results)} entries")


✅ Scraped 860 entries and saved to cyberfeminism_manifesto_refs.json


In [7]:
import re
import json
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

def scrape_cyberfeminism_index():
    # Setup headless Chrome browser
    options = Options()
    options.add_argument('--headless')  # Comment out this line if you want to watch the browser
    driver = webdriver.Chrome(options=options)

    try:
        url = "https://cyberfeminismindex.com/#/a-cyborg-manifesto-science-technology-and-socialist-feminism-in-the-late-20th-century"
        driver.get(url)

        # Wait until the entries are loaded
        WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.CLASS_NAME, "index_entry")))

        # Find all entries
        entries = driver.find_elements(By.CLASS_NAME, "index_entry")
        actions = ActionChains(driver)

        # Scroll and click each entry to open its drawer content
        for entry in entries:
            try:
                driver.execute_script("arguments[0].scrollIntoView(true);", entry)
                actions.move_to_element(entry).perform()
                entry.click()
                time.sleep(0.05)  # small delay for content to open
            except Exception as e:
                print(f"Warning: Could not click entry: {e}")
                continue

        time.sleep(2)  # Wait for all drawers to fully render

        # Parse HTML with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, "html.parser")

    finally:
        driver.quit()

    # Extract entries and corresponding drawers
    entry_blocks = soup.select(".index_entry")
    drawer_blocks = soup.select(".index_drawer")

    results = []

    for i, (entry, drawer) in enumerate(zip(entry_blocks, drawer_blocks)):
        try:
            result = {}

            # Extract year from .sm p using regex for robustness
            year_tag = entry.select_one('.sm p')
            year = None
            if year_tag:
                text = year_tag.get_text(strip=True)
                match = re.search(r'\b(19|20)\d{2}\b', text)
                if match:
                    year = match.group(0)
            result['year'] = year

            # Extract project/group name from .md p (first non-empty p)
            md_ps = entry.select('.md p')
            name = None
            for p in md_ps:
                t = p.get_text(strip=True)
                if t:
                    name = t
                    break
            result['project'] = name

            # Extract authors from .lg p (split by commas if single p)
            lg_ps = entry.select('.lg p')
            authors = [p.get_text(strip=True) for p in lg_ps if p.get_text(strip=True)]
            if authors:
                result['authors'] = authors[0].split(', ') if len(authors) == 1 else authors
            else:
                result['authors'] = None

            # Extract excerpt from drawer paragraphs (skip first empty)
            para_tags = drawer.select('p')
            excerpt = None
            for p in para_tags[1:]:
                t = p.get_text(strip=True)
                if t:
                    excerpt = t
                    break
            result['excerpt'] = excerpt

            # Extract link from drawer
            link_tag = drawer.select_one('a[href]')
            result['link'] = link_tag['href'] if link_tag else None

            results.append(result)

        except Exception as e:
            print(f"Error processing entry {i}: {e}")

    # Save results to JSON file
    filename = "cyberfeminism_manifesto_stuff.json"
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

    print(f"✅ Scraped {len(results)} entries and saved to {filename}")

if __name__ == "__main__":
    scrape_cyberfeminism_index()


  (Session info: chrome=138.0.7204.169); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#elementclickinterceptedexception
Stacktrace:
	GetHandleVerifier [0x0x7ff7ad77e935+77845]
	GetHandleVerifier [0x0x7ff7ad77e990+77936]
	(No symbol) [0x0x7ff7ad539cda]
	(No symbol) [0x0x7ff7ad5986a9]
	(No symbol) [0x0x7ff7ad596042]
	(No symbol) [0x0x7ff7ad593081]
	(No symbol) [0x0x7ff7ad591f81]
	(No symbol) [0x0x7ff7ad5836c8]
	(No symbol) [0x0x7ff7ad5b88ca]
	(No symbol) [0x0x7ff7ad582f76]
	(No symbol) [0x0x7ff7ad5b8ae0]
	(No symbol) [0x0x7ff7ad5e0b07]
	(No symbol) [0x0x7ff7ad5b86a3]
	(No symbol) [0x0x7ff7ad581791]
	(No symbol) [0x0x7ff7ad582523]
	GetHandleVerifier [0x0x7ff7ada5684d+3059501]
	GetHandleVerifier [0x0x7ff7ada50c0d+3035885]
	GetHandleVerifier [0x0x7ff7ada70400+3164896]
	GetHandleVerifier [0x0x7ff7ad798c3e+185118]
	GetHandleVerifier [0x0x7ff7ad7a054f+216111]
	GetHandleVerifier [0x0x7ff7ad7872e4+113092]
	GetHandleVerifier

In [None]:
import re
import json
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

def scrape_cyberfeminism_index():
    # Setup headless Chrome
    options = Options()
    options.add_argument('--headless')  # Remove this line to watch browser action
    options.add_argument('--disable-gpu')
    options.add_argument('--no-sandbox')
    driver = webdriver.Chrome(options=options)

    try:
        url = "https://cyberfeminismindex.com/#/a-cyborg-manifesto-science-technology-and-socialist-feminism-in-the-late-20th-century"
        driver.get(url)

        # Wait for index entries to load
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, "index_entry"))
        )

        entries = driver.find_elements(By.CLASS_NAME, "index_entry")
        actions = ActionChains(driver)

        # Scroll and click each entry to open drawer content
        for entry in entries:
            try:
                driver.execute_script("arguments[0].scrollIntoView(true);", entry)
                actions.move_to_element(entry).perform()
                entry.click()
                # Reduced sleep for speed, but still allow drawer to open
                time.sleep(0.03)
            except Exception as e:
                print(f"Warning: Could not click entry: {e}")
                continue

        # Small pause to let all content fully render
        time.sleep(1)

        # Parse with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, "html.parser")

    finally:
        driver.quit()

    entry_blocks = soup.select(".index_entry")
    drawer_blocks = soup.select(".index_drawer")

    results = []

    for i, (entry, drawer) in enumerate(zip(entry_blocks, drawer_blocks)):
        try:
            result = {}

            # Extract year from div.sm but exclude .num class
            sm_divs = entry.select('div.sm:not(.num)')
            year = None
            if sm_divs:
                for sm_div in sm_divs:
                    text = sm_div.get_text(strip=True)
                    match = re.search(r'\b(19|20)\d{2}\b', text)
                    if match:
                        year = match.group(0)
                        break
            result['year'] = year

            # Extract project/group name from .md p (first non-empty p)
            md_ps = entry.select('.md p')
            name = None
            for p in md_ps:
                t = p.get_text(strip=True)
                if t:
                    name = t
                    break
            result['project'] = name

            # Extract authors from .lg p
            lg_ps = entry.select('.lg p')
            authors = [p.get_text(strip=True) for p in lg_ps if p.get_text(strip=True)]
            if authors:
                result['authors'] = authors[0].split(', ') if len(authors) == 1 else authors
            else:
                result['authors'] = None

            # Extract excerpt (skip first empty <p>)
            para_tags = drawer.select('p')
            excerpt = None
            for p in para_tags[1:]:
                t = p.get_text(strip=True)
                if t:
                    excerpt = t
                    break
            result['excerpt'] = excerpt

            # Extract link from drawer <a href>
            link_tag = drawer.select_one('a[href]')
            result['link'] = link_tag['href'] if link_tag else None

            results.append(result)

        except Exception as e:
            print(f"Error processing entry {i}: {e}")

    # Save results to JSON file
    filename = "cyberfeminism_manifesto_refs.json"
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(results, f, indent=2, ensure_ascii=False)

    print(f"✅ Scraped {len(results)} entries and saved to {filename}")

if __name__ == "__main__":
    scrape_cyberfeminism_index()


  (Session info: chrome=138.0.7204.169); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#elementclickinterceptedexception
Stacktrace:
	GetHandleVerifier [0x0x7ff7ad77e935+77845]
	GetHandleVerifier [0x0x7ff7ad77e990+77936]
	(No symbol) [0x0x7ff7ad539cda]
	(No symbol) [0x0x7ff7ad5986a9]
	(No symbol) [0x0x7ff7ad596042]
	(No symbol) [0x0x7ff7ad593081]
	(No symbol) [0x0x7ff7ad591f81]
	(No symbol) [0x0x7ff7ad5836c8]
	(No symbol) [0x0x7ff7ad5b88ca]
	(No symbol) [0x0x7ff7ad582f76]
	(No symbol) [0x0x7ff7ad5b8ae0]
	(No symbol) [0x0x7ff7ad5e0b07]
	(No symbol) [0x0x7ff7ad5b86a3]
	(No symbol) [0x0x7ff7ad581791]
	(No symbol) [0x0x7ff7ad582523]
	GetHandleVerifier [0x0x7ff7ada5684d+3059501]
	GetHandleVerifier [0x0x7ff7ada50c0d+3035885]
	GetHandleVerifier [0x0x7ff7ada70400+3164896]
	GetHandleVerifier [0x0x7ff7ad798c3e+185118]
	GetHandleVerifier [0x0x7ff7ad7a054f+216111]
	GetHandleVerifier [0x0x7ff7ad7872e4+113092]
	GetHandleVerifier