In [1]:
import pandas as pd

from SPARQLWrapper import SPARQLWrapper, JSON
from sqlalchemy import create_engine, MetaData, text
#from sqlalchemy.orm import sessionmaker

In [2]:
#create sqlite engine in memory
engine = create_engine("sqlite:///:memory:")

with engine.connect() as con:
    with open("create_schema.sql") as file:
        #only one query is accepted at a time, so we split - feels hacky
        query_list = file.read().split(';')
        for query_text in query_list:
            query = text(query_text)
            con.execute(query)

In [3]:
#check if tables and columns are all there
m = MetaData()
m.reflect(engine)
for table in m.tables.values():
    print(table.name)
    for column in table.c:
        print(column.name)

t_art_objects
ID
TITLE
DESCRIPTION
PAINTER_ID
t_art_owners
ID
OWNER_ID
START_OWNERSHIP
END_OWNERSHIP


In [5]:
#wikidata endpoint
sparql = SPARQLWrapper(
    "https://query.wikidata.org/sparql"
)

#query for the art_object_list with AUTO_LANGUAGE replaced
sparql.setQuery("""
    SELECT ?artObject ?artObjectLabel ?artObjectDescription ?ownedby ?ownedbyLabel
    WHERE {
        ?artObject wdt:P31 wd:Q3305213 ; # Get items that are instances of painting
         wdt:P127 ?ownedby. #at least one owner is known in wikidata
        ?ownedby wdt:P1840 wd:Q30335959 . #the owner was ALIU Red Flag   
    
        SERVICE wikibase:label { bd:serviceParam wikibase:language "de, en"}
    }
    GROUP BY ?artObject ?artObjectLabel ?artObjectDescription ?ownedby ?ownedbyLabel
    """
)

sparql.setReturnFormat(JSON) #request json

#query and reshape to create dataframe with field values only
results = sparql.query().convert() 
results_df = pd.json_normalize(results['results']['bindings']) 
results_df = results_df[[col for col in results_df if 'value'in col]]

#change to db column names
results_df.columns = "ID DESCRIPTION TITLE PAINTER_ID CREATION_YEAR".split()

In [8]:
#insert complete table into df
results_df.to_sql('t_art_objects', con=engine, index=False, if_exists='replace')

630

In [83]:
#m.tables['t_art_objects']

Table('t_art_objects', MetaData(), Column('ID', TEXT(), table=<t_art_objects>), Column('DESCRIPTION', TEXT(), table=<t_art_objects>), Column('TITLE', TEXT(), table=<t_art_objects>), Column('PAINTER_ID', TEXT(), table=<t_art_objects>), Column('CREATION_YEAR', TEXT(), table=<t_art_objects>), schema=None)

In [27]:
#check if data is actually in db
con =  engine.connect() 
con.execute(text("SELECT * FROM t_art_objects")).fetchone()

('http://www.wikidata.org/entity/Q1114220', 'Mohnblumenfeld bei Vétheuil', 'Gemälde von Claude Monet, 1880', 'http://www.wikidata.org/entity/Q96297', 'Emil Georg Bührle')