In [1]:
import time
from rdfframes.knowledge_graph import KnowledgeGraph
from rdfframes.utils.constants import JoinType
from rdfframes.client.http_client import HttpClientDataFormat, HttpClient

In [2]:
graph = KnowledgeGraph(graph_name='dbpedia')

endpoint = 'http://10.161.202.101:8890/sparql/'
output_format = HttpClientDataFormat.PANDAS_DF

client = HttpClient(endpoint_url=endpoint, return_format=output_format)

In [3]:
dataset = graph.feature_domain_range('dbpprop:starring', domain_col_name='movie', range_col_name='actor')\
    
dataset = dataset.expand('actor', [('dbpprop:birthPlace', 'actor_country'), ('rdfs:label', 'actor_name')])\
    .expand('movie', [('rdfs:label', 'movie_name'), ('dcterms:subject', 'subject'),
                     ('dbpprop:country', 'movie_country'), ('dbpo:genre', 'genre', True)]).cache()

american_actors = dataset.filter({'actor_country': ['regex(str(?actor_country), "USA")']})

prolific_actors = dataset.group_by(['actor'])\
    .count('movie', 'movie_count', unique=True).filter({'movie_count': ['>= 20']})

movies = american_actors.join(prolific_actors, join_col_name1='actor', join_type=JoinType.OuterJoin)\
    .join(dataset, join_col_name1='actor')\
    .select_cols(["movie_name", "actor_name", "subject", "genre"])

  "join columns".format(warn_cols))


In [4]:
df = movies.execute(client, return_format=output_format)

time of the query preparation 0.002777576446533203


In [5]:
print(df.head())

                   movie_name  actor_name  \
0  Hell on Wheels (TV series)  Tim Guinee   
1  Hell on Wheels (TV series)  Tim Guinee   
2  Hell on Wheels (TV series)  Tim Guinee   
3  Hell on Wheels (TV series)  Tim Guinee   
4  Hell on Wheels (TV series)  Tim Guinee   

                                             subject  \
0  http://dbpedia.org/resource/Category:Serial_dr...   
1  http://dbpedia.org/resource/Category:American_...   
2  http://dbpedia.org/resource/Category:Televisio...   
3  http://dbpedia.org/resource/Category:2011_Amer...   
4  http://dbpedia.org/resource/Category:Televisio...   

                                               genre  
0  http://dbpedia.org/resource/Historical_period_...  
1  http://dbpedia.org/resource/Historical_period_...  
2  http://dbpedia.org/resource/Historical_period_...  
3  http://dbpedia.org/resource/Historical_period_...  
4        http://dbpedia.org/resource/Western_(genre)  


In [6]:
sparql_query = movies.to_sparql()

print(sparql_query)

PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dbpprop: <http://dbpedia.org/property/>
PREFIX dbpr: <http://dbpedia.org/resource/>
PREFIX dbpo: <http://dbpedia.org/ontology/>
SELECT DISTINCT ?movie_name ?actor_name ?subject ?genre 
FROM <http://dbpedia.org>
WHERE {
	?movie dbpprop:starring ?actor .
	?actor dbpprop:birthPlace ?actor_country .
	?actor rdfs:label ?actor_name .
	?movie rdfs:label ?movie_name .
	?movie dcterms:subject ?subject .
	?movie dbpprop:country ?movie_country .
	OPTIONAL {
		?movie dbpo:genre ?genre .
			}
	{
	SELECT * 
	WHERE {
		?movie dbpprop:starring ?actor .
		?actor dbpprop:birthPlace ?actor_country .
		?actor rdfs:label ?actor_name .
		?movie rdfs:label ?movie_name .
		?movie dcterms:subject ?subject .
		?movie dbpprop:country ?movie_country .
		FILTER ( regex(str(?actor_country), "USA") ) 
	
		OPTIONAL {
			?movie dbpo:genre ?genre .
				}
		}
	}
	UNION
	{
	SELECT DISTINCT ?actor  (COUNT(DISTINCT ?movi