In [1]:
from rdfframes.knowledge_graph import KnowledgeGraph
from rdfframes.dataset.rdfpredicate import RDFPredicate
from rdfframes.utils.constants import JoinType
from rdfframes.client.http_client import HttpClientDataFormat, HttpClient

In [2]:
graph = KnowledgeGraph(graph_uri='http://dbpedia.org',
                       prefixes={'dcterms': 'http://purl.org/dc/terms/',
                                 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
                                 'dbpprop': 'http://dbpedia.org/property/',
                                 'dbpr': 'http://dbpedia.org/resource/'})

dataset = graph.feature_domain_range('dbpprop:starring', domain_col_name='movie', range_col_name='actor')\
    
dataset = dataset.expand('actor', [('dbpprop:birthPlace', 'actor_country'), ('rdfs:label', 'actor_name')])\
    .expand('movie', [('rdfs:label', 'movie_name'), ('dcterms:subject', 'subject'),
                     ('dbpprop:country', 'movie_country')])\
    .cache()

american_actors = dataset.filter({'actor_country': ['regex(str(?actor_country), "USA")']})

prolific_actors = dataset.group_by(['actor'])\
    .count('movie', 'movie_count', unique=True).filter({'movie_count': ['>= 20', '<=30']})

films = american_actors.join(prolific_actors, join_col_name1='actor', join_type=JoinType.OuterJoin)\
    .join(dataset, join_col_name1='actor')\
    .select_cols(["movie_name", "actor_name", "subject"])

In [3]:
sparql_query = films.to_sparql()

print(sparql_query)

PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dbpprop: <http://dbpedia.org/property/>
PREFIX dbpr: <http://dbpedia.org/resource/>
SELECT ?movie_name ?actor_name ?subject 
FROM <http://dbpedia.org>
WHERE {
	?movie dbpprop:starring ?actor .
	?actor dbpprop:birthPlace ?actor_country .
	?actor rdfs:label ?actor_name .
	?movie rdfs:label ?movie_name .
	?movie dcterms:subject ?subject .
	?movie dbpprop:country ?movie_country .
		{
		SELECT * 
		WHERE {
			?movie dbpprop:starring ?actor .
			?actor dbpprop:birthPlace ?actor_country .
			?actor rdfs:label ?actor_name .
			?movie rdfs:label ?movie_name .
			?movie dcterms:subject ?subject .
			?movie dbpprop:country ?movie_country .
			FILTER ( regex(str(?actor_country), "USA") ) 
			}
		}
		UNION
		{
		SELECT ?actor  (COUNT(DISTINCT ?movie) AS ?movie_count) 
		WHERE {
			?movie dbpprop:starring ?actor .
			?actor dbpprop:birthPlace ?actor_country .
			?actor rdfs:label ?actor_name .
			

In [None]:
endpoint = 'http://10.161.202.101:8890/sparql/'
output_format = HttpClientDataFormat.PANDAS_DF

client = HttpClient(endpoint_url=endpoint, return_format=output_format)

df = films.execute(client, return_format=output_format)


In [None]:
print(df.head())