### Extrahera ut personer på Norra Begravningsplatsen från kulturgravar

* https://github.com/salgo60/Gravstensinventeringen-Wikidata/issues/39

In [1]:
import sys
!{sys.executable} -m pip install wikibaseintegrator



In [3]:
import requests
from bs4 import BeautifulSoup
from bs4.element import Tag 
# URL of the webpage to scrape
urllist = {
           "https://www.kulturgravar.se/6_14_500_SV.html",
           "https://www.kulturgravar.se/6_14_500_SO.html"} #NorraBegravningsplatsen

#urllist = {           "https://www.kulturgravar.se/6_14_263_karta.html",
#           "https://www.kulturgravar.se/6_14_500_SV.html",
#           "https://www.kulturgravar.se/6_14_500_SO.html"}
all_points = []

# Send a GET reaquest to fetch the page content
for url in urllist:     
    print(url)
    response = requests.get(url)
    if response.status_code == 200:
        print("Page fetched successfully!")
    else:
        print(f"Failed to fetch the page. Status code: {response.status_code}")
        exit()
    
    # Parse the HTML content
    soup = BeautifulSoup(response.content, "html.parser")
    
    # Locate the specific elements containing the list of people
    # Based on the structure of the page source, adjust the selector
    people = [] 
    
    # Find all the required elements
    points = soup.find_all('div', class_='point')
    if points:  # Ensure current_results is not None
        all_points.extend(points)


# Extract details
results = []
#print(all_points)
for points in  all_points:
    #print(len(points), points)

    for point in points:
        # Ensure the current element is a bs4.element.Tag
        if isinstance(point, Tag):
            try:
                # Extract the name and dates (from abbr title)
                label_title = point.find('abbr')['title']
            
                # Append the result as a tuple
                results.append(label_title)
            except:
                print("\tError title")
                print(point)

# Print the extracted data
for person in results:
    print(f"Name and Dates: {person}")


https://www.kulturgravar.se/6_14_500_SO.html
Page fetched successfully!
https://www.kulturgravar.se/6_14_500_SV.html
Page fetched successfully!
Name and Dates: Ingrid Bergman (1915-1982)
Name and Dates: Vilhelm Moberg (1898-1973)
Name and Dates: Familjegrav Schmelzkopf, Ekebohm & Stålbo
Name and Dates: Tatjana Angelini (1923-2006)
Name and Dates: Lovisa Bellman (1755-1847)
Name and Dates: Hinke Berggren (1861-1936)
Name and Dates: Nils Ericson (1802-1870)
Name and Dates: Waldemar Feith (1852-1918)
Name and Dates: James Haarlef Haasum (1791-1871)
Name and Dates: Siri von Essen (1850-1912)
Name and Dates: Johan Peter Molin (1814-1873)
Name and Dates: Ture Nerman (1886-1969)
Name and Dates: Jenny Nyström (1854-1946)
Name and Dates: Hanna Ouchterlony (1838-1924)
Name and Dates: Samuel Owen (1774-1854)
Name and Dates: Karl Staaf (1860-1915)
Name and Dates: Per Albin Hansson (1885-1946)
Name and Dates: Helge Palmcrantz (1842-1880)
Name and Dates: Viktor Balck (1844-1928)
Name and Dates: Fer

In [4]:
from wikibaseintegrator.wbi_helpers import execute_sparql_query
from wikibaseintegrator import WikibaseIntegrator 
from wikibaseintegrator.wbi_config import config as wbi_config

In [5]:
wbi_config['USER_AGENT'] = 'WikibaseIntegrator in PAWS by salgo60'
wbi = WikibaseIntegrator()

def get_qnumber(name):
        
    # Query to search for the person by label
    query = f"""
    SELECT ?person ?personLabel WHERE {{
        ?person ?label "{name}"@sv. # Swedish label
        SERVICE wikibase:label {{ bd:serviceParam wikibase:language "sv,en". }}
    }}
    LIMIT 1
    """
    
    # Execute the query
    try:
        results = execute_sparql_query(query)
        bindings = results["results"]["bindings"]
        print(f"Found {len(bindings)} results")

        if bindings:
            return bindings[0]['person']['value'].split('/')[-1]  # Extract Qnumber
        else:
            return None
    except Exception as e:
        print(f"Error searching for {name}: {e}")
        return None

# Your list of results


for person in results:
    name_and_dates = person
    name = name_and_dates.split(' (')[0]  # Extract name only
    qnumber = get_qnumber(name)
    
    print(f"File Link: {person[0]}")
    print(f"Grave Title: {person[1]}")
    print(f"Name and Dates: {name_and_dates}")
    print(f"Wikidata Qnumber: {qnumber if qnumber else 'Not found'}")
    print("-" * 40)


Found 1 results
File Link: I
Grave Title: n
Name and Dates: Ingrid Bergman (1915-1982)
Wikidata Qnumber: Q43247-454c1062-4e50-910b-f896-fe1aa3daf08f
----------------------------------------
Found 1 results
File Link: V
Grave Title: i
Name and Dates: Vilhelm Moberg (1898-1973)
Wikidata Qnumber: Q365406-3c2db577-43ec-ed63-feaa-8f494e175d41
----------------------------------------
Found 0 results
File Link: F
Grave Title: a
Name and Dates: Familjegrav Schmelzkopf, Ekebohm & Stålbo
Wikidata Qnumber: Not found
----------------------------------------
Found 1 results
File Link: T
Grave Title: a
Name and Dates: Tatjana Angelini (1923-2006)
Wikidata Qnumber: Tatjana_Angelini
----------------------------------------
Found 0 results
File Link: L
Grave Title: o
Name and Dates: Lovisa Bellman (1755-1847)
Wikidata Qnumber: Not found
----------------------------------------
Found 1 results
File Link: H
Grave Title: i
Name and Dates: Hinke Berggren (1861-1936)
Wikidata Qnumber: Q2632538
------------