In [16]:
# Install Playwright, Firefox, and other required packages
!pip install playwright nest_asyncio beautifulsoup4 pandas
!playwright install firefox




In [17]:
import nest_asyncio
nest_asyncio.apply()  # Patch Jupyter's asyncio loop

import asyncio
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
import pandas as pd


In [18]:
async def fetch_page():
    async with async_playwright() as p:
        # Launch Firefox (headless=False shows the browser)
        browser = await p.firefox.launch(headless=False)
        page = await browser.new_page()
        
        # Go to the meeting minutes page
        await page.goto(
            "https://dps.arkansas.gov/law-enforcement/arkansas-state-police/directors-office/asp-commission/asp-commission-meeting-minutes/",
            wait_until="networkidle"
        )
        
        # Take a screenshot
        await page.screenshot(path="meeting_minutes_page.png")
        print("Screenshot saved as meeting_minutes_page.png")
        
        # Get full HTML content
        html = await page.content()
        await browser.close()
        return html

# Run inside Jupyter
html = await fetch_page()


Screenshot saved as meeting_minutes_page.png


In [19]:
# Parse the HTML
soup = BeautifulSoup(html, "html.parser")

# Find the <ul> containing meeting links
meeting_list = soup.find("ul", class_="wp-block-list")

# Extract all <a> tags
links = meeting_list.find_all("a")

# Prepare data
data = []
for a in links:
    date = a.text.strip()
    pdf_url = a['href']
    data.append({"meeting_date": date, "pdf_url": pdf_url})

# Convert to DataFrame
df = pd.DataFrame(data)

# Preview the first few rows
df.head()


Unnamed: 0,meeting_date,pdf_url
0,"September 11, 2025",https://dps.arkansas.gov/wp-content/uploads/AS...
1,"August 14, 2025",https://dps.arkansas.gov/wp-content/uploads/AS...
2,"July 10, 2025",https://dps.arkansas.gov/wp-content/uploads/AS...
3,"June 12, 2025",https://dps.arkansas.gov/wp-content/uploads/AS...
4,"May 8, 2025",https://dps.arkansas.gov/wp-content/uploads/AS...


In [20]:
# Save the data to a CSV file
df.to_csv("asp_meeting_minutes.csv", index=False)
print("CSV saved as asp_meeting_minutes.csv")


CSV saved as asp_meeting_minutes.csv
