In [8]:
import json
import requests
from bs4 import BeautifulSoup

In [None]:
def get_soup(link):
    """Returns a BeautifulSoup object of the given link"""
    try:
        response = requests.get(link)
        response.raise_for_status()  # Raises stored HTTPError, if one occurred.
    except requests.HTTPError as http_err:
        print(f'HTTP error occurred: {http_err}')
        return None
    except Exception as err:
        print(f'Other error occurred: {err}')
        return None
    else:
        return BeautifulSoup(response.text, 'html.parser')

In [6]:
page_link = "https://www.ovp.gov.ph/category/1/vp-sara-speeches"
speeches = []
page_count = 1

while page_link:
    print(f"Scraping page {page_count}...")
    page_soup = get_soup(page_link)

    if not page_soup:
        print(f"Error scraping page {page_count}. Skipping...")
        continue  # Skip this page if there's an error.

    items = page_soup.find_all("div", class_="media-post-item")
    for item in items:
        date = item.find("p", class_="media-post-date").text.strip()
        title_tag = item.find("a", class_="media-post-title")
        title = title_tag.text.strip()
        link = title_tag["href"]

        print(f"Scraping speech '{title}'...")
        speech_soup = get_soup(link)

        if not speech_soup:
            print(f"Error scraping speech '{title}'. Skipping...")
            continue

        content = speech_soup.find("div", class_="media-post-page-content").text.strip()

        speeches.append(
            {
                "title": title,
                "link": link,
                "date": date,
                "content": content,
            }
        )

    pagination = page_soup.find_all("a", class_="page-link")
    next_links = [link for link in pagination if link.text == "Next →"]
    page_link = next_links[0]["href"] if next_links else None

    page_count += 1

Scraping page 1...
Scraping speech 'ISO Recertification of the Office of the Vice President'...
Scraping speech 'VPSD Speech on Go Negosyo's 18th anniversary'...
Scraping speech 'VPSD Speech for National Children's Month Culminating Activity'...
Scraping speech 'VPSD Speech for the Gift-Giving in Binalonan'...
Scraping speech 'VP Sara speech for the Renaming of Agham Road and BIR Road to Senator Miriam P. Defensor-Santiago Avenue'...
Scraping speech 'VPSD Speech During the Signing Ceremony for the Records of Discussion of the JICA-assisted Formulation of the Comprehensive and Sustainable Urban Development Masterplan for Metropolitan Davao'...
Scraping speech 'VPSD Speech for 18th PSECE'...
Scraping speech 'VPSD Speech for the 49th Philippine Business Conference and Expo'...
Scraping speech 'VPSD Speech for Public Attorney’s Office 5th National Convention for the Rank-and-File Employee'...
Scraping speech 'VPSD Speech for the 2023 National Literacy Conference'...
Scraping page 2...
Scra

In [9]:
with open("speeches.json", "w") as f:
    json.dump(speeches, f, indent=4)