In [8]:
import requests
from bs4 import BeautifulSoup

# Step 1: Fetch the webpage
url = 'https://www.thebottomline.org.uk/summaries/icm/bling-iii/'
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Step 2: Parse the webpage content
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Step 3: Extract the title
    title_tag = soup.find('h1', class_='meta-article-title')
    if title_tag:
        title = title_tag.get_text(strip=True)
        print(f"Title: {title}")
    
    # Step 4: Find all <h3> tags
    headings = soup.find_all('h3')
    
    # Step 5: For each <h3> tag, find the next <ul> tag and extract the list items
    for heading in headings:
        print(f"\nHeading: {heading.get_text(strip=True)}")
        ul_tag = heading.find_next('ul')
        if ul_tag:
            list_items = [li.get_text(strip=True) for li in ul_tag.find_all('li')]
            print("List Items:")
            for item in list_items:
                print(f"- {item}")
        else:
            print("No list items found under this heading.")
else:
    print(f"Failed to retrieve the webpage. Status code: {response.status_code}")

Title: Continuous vs Intermittent β-Lactam Antibiotic Infusions in Critically Ill Patients with Sepsis TheBLING III Randomized Clinical Trial

Heading: Clinical Question
List Items:
- In critically ill adult patients with presumed sepsis, does the administration of β-lactam antibiotics via a continuous infusion compared to an intermittent infusion reduce 90-day mortality?

Heading: Background
List Items:
- β-lactam antibiotics have time-dependent bactericidal activity
- It is hypothesised that improved antibiotic exposure with continuous infusion will lead to longer time above MIC, with potentially improved bacterial eradication
- Additionally, underdosing may occur in sepsis due to:Increased cardiac output with potentially increased drug clearanceCapillary leak and increased volume of distribution
- Increased cardiac output with potentially increased drug clearance
- Capillary leak and increased volume of distribution
- BLING II(2015) showed no difference in alive ICU-free days at day

In [1]:
import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule

class BottomlineSpider(CrawlSpider):
    name = 'bottomline'
    allowed_domains = ['thebottomline.org.uk']
    start_urls = ['https://www.thebottomline.org.uk/summaries/icm/']

    custom_settings = {
        'CLOSESPIDER_PAGECOUNT': 2  # Limit to 2 pages
    }

    rules = (
        Rule(LinkExtractor(allow=r'/summaries/icm/'), callback='parse_item', follow=True),
    )

    def parse_item(self, response):
        title = response.xpath('//h1[@class="meta-article-title "]/text()').get()
        sections = response.xpath('//h3')
        
        data = {
            'title': title,
            'sections': []
        }

        for section in sections:
            heading = section.xpath('text()').get()
            items = section.xpath('following-sibling::ul[1]/li/text()').getall()
            data['sections'].append({
                'heading': heading,
                'items': items
            })

        yield data