In [None]:
# Import the required libraries
import re
import httpx
from selectolax.parser import HTMLParser
from dataclasses import dataclass, asdict
import csv

# Set the base url
base_url = "https://scotlandsplaces.gov.uk/digital-volumes/ordnance-survey-name-books/lanarkshire-os-name-books-1858-1861/lanarkshire-volume-01/"

# Create a dataclass to store the information
@dataclass
class placename_information:
    # title: str | None
    placename: str = "No table on page"
    various_spellings: str = "No table on page"
    authority: str = "No table on page"
    situation: str = "No table on page"
    description: str = "No table on page"

# Create a function to get the page numbers
def get_page_numbers(url):
    response = httpx.get(url)
    html = HTMLParser(response.text)
    links = html.css('a')

    page_numbers = []
    for link in links:
        href = link.attributes.get('href')
        if href:
            match = re.search(r'\d+$', href)
            if match:
                page_numbers.append(int(match.group()))

    return list(set(page_numbers))

# Create a function to get the html
def get_html(page):
    url = f"{base_url}{page}"
    response = httpx.get(url)
    return HTMLParser(response.text)

# Create a function to parse the placename information
def parse_placename(html):
    name = html.css("div.well tr")  # get each table row from table
    results = []
    for item in name:
        item_parts = item.css("td")
        if len(item_parts) == 5:  # this will avoid the table headers
            new_item = placename_information(
                placename=item_parts[0].text().strip(),
                various_spellings=item_parts[1].text().replace('\n', '|').strip(),
                authority=item_parts[2].text().replace('\n', '|').strip(),
                situation=item_parts[3].text().strip(),
                description=item_parts[4].text().strip(),
            )
            results.append(asdict(new_item))
    return results

# Create a function to write the results to a csv
def to_csv(res):
    with open("name_books.csv", "a") as file:
        writer = csv.DictWriter(
            file,
            fieldnames=[
                "placename",
                "various_spellings",
                "authority",
                "situation",
                "description",
            ],
        )
        writer.writerows(res)

# Create a main function to run the code
def main():
    url = base_url
    page_numbers = get_page_numbers(url)
    for page in page_numbers:
        html = get_html(page)
        print(html.css_first("title").text())
        res = parse_placename(html)
        print(res)
        to_csv(res)

if __name__ == "__main__":
    main()