In [0]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import pandas as pd
import time


import logging

# Configure logging
logging.basicConfig(
    filename='scraping_errors.log',
    level=logging.ERROR,
    format='%(asctime)s - %(levelname)s - %(message)s'
)


main_site = 'https://www.gov.uk'
start_url = "https://www.gov.uk/search/guidance-and-regulation?organisations[]=marine-management-organisation&order=most-viewed"

def get_next_page(soup):
    nav = soup.find("div", class_="govuk-pagination__next")
    if nav:
        page = nav.find("a", class_="govuk-link govuk-pagination__link")
        if page:
            return urljoin(main_site, page.get('href'))
    return None

def fetch_publication(url):
    all_data = []
    page_count = 0

    while url:
        try:
            response = requests.get(url, verify=False, timeout=10)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, 'html.parser')

            articles = soup.find_all("li", class_="gem-c-document-list__item")

            for article in articles:
                title_div = article.find('div', class_="gem-c-document-list__item-title")
                date_tag = article.find('time')
                if title_div:
                    link = title_div.find("a")
                    if link:
                        title = link.text.strip()
                        full_url = urljoin(main_site, link.get('href'))
                        date = date_tag.text.strip() if date_tag else "N/A"
                        all_data.append({"Title": title, "URL": full_url, "Last Updated": date})

            page_count += 1
            print(f"Page {page_count} scraped successfully.")
            time.sleep(0.125)  # Rate limit: 8 requests per second

            url = get_next_page(soup)

        
        except requests.exceptions.RequestException as e:
            error_message = f"Error fetching page {page_count + 1}: {e}"
            print(error_message)
            logging.error(error_message)
            break


    print("Scraping complete.")
    return all_data

if __name__ == "__main__":
    data = fetch_publication(start_url)
    df = pd.DataFrame(data)
    print(df)



Page 1 scraped successfully.




Page 2 scraped successfully.




Page 3 scraped successfully.




Page 4 scraped successfully.




Page 5 scraped successfully.




Page 6 scraped successfully.




Page 7 scraped successfully.




Page 8 scraped successfully.




Page 9 scraped successfully.




Page 10 scraped successfully.




Page 11 scraped successfully.




Page 12 scraped successfully.




Page 13 scraped successfully.




Page 14 scraped successfully.




Page 15 scraped successfully.




Page 16 scraped successfully.
Scraping complete.
                                                 Title  \
0                        Create a UK catch certificate   
1                          Do I need a marine licence?   
2    EU IUU Regulation Changes 2026: Catch Certific...   
3                         Fisheries and Seafood Scheme   
4                     Create a UK processing statement   
..                                                 ...   
304         Demersal landing obligation 2018: Guidance   
305  Fish Producer Organisation (PO) compliance gui...   
306                    Fisheries quota allocation 2019   
307   Accessibility statement for Explore Marine Plans   
308        Contact the marine planning team at the MMO   

                                                   URL     Last Updated  
0    https://www.gov.uk/guidance/create-a-uk-catch-...      2 July 2025  
1    https://www.gov.uk/guidance/do-i-need-a-marine...  6 November 2025  
2    https://www.gov.uk/guidance

In [0]:
df.to_csv("published.csv",index=False)