In [7]:
def ensure(package):
    try:
        __import__(package)
    except ImportError:
        import subprocess, sys
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
ensure("rdflib")
ensure("requests")


In [None]:
import requests
from rdflib import Graph, URIRef, Literal, Namespace

# .nt file with artist data
input_path = "wikidata-personen-attributen.nt"
g = Graph()
g.parse(input_path, format="nt")

# schema.org
schema = Namespace("https://schema.org/")

# Get from Wikipedia the summary and url
def get_wikipedia_info(name):
    url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{name.replace(' ', '_')}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        summary = data.get("extract")
        page_url = data.get("content_urls", {}).get("desktop", {}).get("page", None)
        return summary, page_url
    else:
        return None, None

# New graph
g_out = Graph()

# Every artist name & attribute to tripple
for s, p, o in g:
    if p == URIRef("https://schema.org/name") and o.language == 'en':
        artist_name = str(o)
        summary, page_url = get_wikipedia_info(artist_name)

        # Add description to graph
        if summary:
            g_out.add((s, schema.description, Literal(summary, lang="en")))

        # Add mainEntityOfPage
        if page_url:
            g_out.add((s, schema.mainEntityOfPage, URIRef(page_url)))

# Save to a new .nt file
output_path = "artist_descriptions_with_wikipedia.nt"
g_out.serialize(destination=output_path, format="nt")
print(f"File saved to {output_path}")


In [12]:
for i, (s, p, o) in enumerate(g_out):
    print(f"{s.n3()} {p.n3()} {o.n3()}")
    if i >= 9:
        break


<http://www.wikidata.org/entity/Q765465> <https://schema.org/mainEntityOfPage> <https://en.wikipedia.org/wiki/Dirck_Coornhert>
<http://www.wikidata.org/entity/Q891199> <https://schema.org/description> "Johannes Marius (\"Bok\") de Korver was a football player from the Netherlands, who twice won a bronze medal with the Netherlands national football team at the Summer Olympics: in 1908 (London) and in 1912 (Stockholm)."@en
<http://www.wikidata.org/entity/Q2138933> <https://schema.org/description> "Anna Barbara van Meerten-Schilperoort, was a Dutch women's rights activist."@en
<http://www.wikidata.org/entity/Q5669> <https://schema.org/description> "Alessandro di Mariano di Vanni Filipepi, better known as Sandro Botticelli or simply Botticelli, was an Italian painter of the Early Renaissance. Botticelli's posthumous reputation suffered until the late 19th century, when he was rediscovered by the Pre-Raphaelites who stimulated a reappraisal of his work. Since then, his paintings have been s