In [2]:
from question_types.sparql import SparqlQueries
from tabulate import tabulate
import json
import os

In [3]:
sparql = SparqlQueries("../dataset/14_graph.nt")

2024-10-10 18:35:04,551 | INFO | __init__ | [92mParsing graph[0m
2024-10-10 18:35:57,356 | INFO | __init__ | [92mGraph parsed[0m


In [4]:
def execute_query(query):
    sparql_result = sparql.execute_query(query)
    result_lst = [
        [str(item) for item in (row if isinstance(row, tuple) else [row])]
        for row in sparql_result
    ]
    if not len(result_lst):
        return print("Results were empty")
    headers = [f"Col {idx}" for idx in range(len(result_lst[0]))]
    print(tabulate(result_lst[:min(3, len(result_lst))], headers=headers, tablefmt="grid"))
    return result_lst

In [5]:
def get_movie_id_by_name(name: str) -> str:
    query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>

        SELECT ?movie WHERE {{
            ?movie wdt:P31/wdt:P279* wd:Q11424 ;   # Match film or its subclasses
                   rdfs:label ?movieLabel .        # Get the label of the movie
            FILTER(CONTAINS(LCASE(?movieLabel), LCASE("{name}"))) .  # Substring match
            FILTER(LANG(?movieLabel) = "en")       # Only English labels
        }}
        LIMIT 5
    """
    result_list = execute_query(query)
    if result_list:
        return [result[0] for result in result_list][0]
    return result_list


def get_actor_id_by_name(name: str) -> str:
    query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>

        SELECT ?actor ?actorLabel ?imdbID WHERE {{
            ?actor wdt:P31 wd:Q5 ;                         # Ensure the entity is a human
                   wdt:P106/wdt:P279* wd:Q33999 ;          # Occupation is actor or any subclass
                   rdfs:label ?actorLabel .                # Get actor label

            FILTER(CONTAINS(LCASE(?actorLabel), LCASE("{name}"))) .  # Match name substring in any label
            FILTER(LANG(?actorLabel) = "en")  # English labels only
            
            OPTIONAL {{ ?actor wdt:P345 ?imdbID . }}  # Optional: Include IMDb ID if present
        }}
        LIMIT 5
    """
    result_list = execute_query(query)
    return result_list if result_list else None

def get_film_professional_id_by_name(name: str) -> str:
    query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>

        SELECT ?professional ?label ?roleLabel WHERE {{
            ?professional rdfs:label ?label ;        # Get any label
                          wdt:P31 wd:Q5 ;            # Instance of human
                          wdt:P106 ?role .           # Occupation property

            # Include relevant film industry professions and subclasses
            ?role rdfs:label ?roleLabel .            # Get role label (for debugging)
            FILTER(LANG(?roleLabel) = "en") .        # English labels for roles
            FILTER(CONTAINS(LCASE(?label), LCASE("{name}"))) .  # Match name substring in any label
            FILTER(LANG(?label) = "en")              # English labels only
        }}
        LIMIT 10
    """
    result_list = execute_query(query)
    if result_list and result_list[0]:
        return result_list[0][0]
    return result_list

def get_imdb_id_by_name(name: str) -> str:
    query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>

        SELECT ?imdbId ?entity  WHERE {{
            ?entity rdfs:label ?label ;
                    wdt:P345 ?imdbId .                # IMDb ID property
            FILTER(CONTAINS(LCASE(?label), LCASE("{name}"))) .  # Match name substring
            FILTER(LANG(?label) = "en")               # English labels only
        }}
        LIMIT 1
    """
    result_list = execute_query(query)
    if result_list and result_list[0]:
        return result_list[0][0]
    return result_list



In [6]:
def get_all_predicates_by_id(entity_id: str) -> list:
    query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        
        SELECT ?predicate ?predicateLabel ?object ?objectLabel WHERE {{
            wd:{entity_id} ?predicate ?object .  # Retrieve all properties and values for the given entity
            OPTIONAL {{ ?predicate rdfs:label ?predicateLabel . FILTER(LANG(?predicateLabel) = "en") }}  # Get predicate label
            OPTIONAL {{ ?object rdfs:label ?objectLabel . FILTER(LANG(?objectLabel) = "en") }}           # Get object label
        }}
        ORDER BY ?predicate
    """
    return execute_query(query)



In [7]:
def get_all_of_type_film():
    query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        PREFIX ddis: <http://ddis.ch/atai/>
        
        SELECT ?movieLabel ?movie ?imdbID WHERE {
            ?movie wdt:P31/wdt:P279* wd:Q11424 .  # instance of film or any subclass of film
            ?movie rdfs:label ?movieLabel .  # only films with a label
            OPTIONAL { ?movie wdt:P345 ?imdbID . }  # IMDb ID
            FILTER(LANG(?movieLabel) = "en") .
        }
        ORDER BY ?movieLabel
    """
    return execute_query(query)


def get_all_of_type_actor():
    query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        PREFIX ddis: <http://ddis.ch/atai/>

        SELECT ?actorLabel ?actor ?imdbID WHERE {
            ?actor wdt:P31 wd:Q5 ;  # instance of human
                   wdt:P106/wdt:P279* wd:Q33999 ;  # occupation is an actor or any subclass of actor
                   rdfs:label ?actorLabel .
            FILTER(LANG(?actorLabel) = "en")
            OPTIONAL { ?actor wdt:P345 ?imdbID . }  # IMDb ID
        }
        ORDER BY ?actorLabel
    """
    return execute_query(query)

def get_all_of_type_film_professionals():
    query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        PREFIX ddis: <http://ddis.ch/atai/>

        SELECT ?personLabel ?person ?imdbID ?roleLabel WHERE {
            ?person wdt:P31 wd:Q5 ;  # instance of human
                   wdt:P106 ?role ;  # occupation
                   rdfs:label ?personLabel .
            FILTER(LANG(?personLabel) = "en")
            
            # Include relevant film industry professions
            VALUES ?role { 
                wd:Q2526255  # film director
                wd:Q947873   # film producer
                wd:Q28389    # screenwriter
                wd:Q488111   # cinematographer
                wd:Q3455803  # film editor
                wd:Q36479    # composer
                wd:Q3282637  # voice actor
            }
            
            OPTIONAL { ?role rdfs:label ?roleLabel FILTER(LANG(?roleLabel) = "en") . }  # Get occupation label
            OPTIONAL { ?person wdt:P345 ?imdbID . }  # IMDb ID
        }
        ORDER BY ?personLabel
    """
    return execute_query(query)


In [8]:
def ensure_directory_exists(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

def export_actor_json():
    try:
        ensure_directory_exists('exports')
        actor_lst = get_all_of_type_actor()
        db = {row[0]: (row[1], row[2]) for row in actor_lst}  
        with open('exports/actors_db.json', 'w') as file:
            json.dump(db, file)
        print(f"Actor data exported successfully. Stored {len(db)} actors.")
    except Exception as e:
        print(f"An error occurred: {e}")

def export_professionals_json():
    try:
        ensure_directory_exists('exports')
        actor_lst = get_all_of_type_film_professionals()
        db = {row[0]: (row[1], row[2]) for row in actor_lst}  
        with open('exports/professionals_db.json', 'w') as file:
            json.dump(db, file)
        print(f"Actor data exported successfully. Stored {len(db)} actors.")
    except Exception as e:
        print(f"An error occurred: {e}")

def export_film_json():
    try:
        ensure_directory_exists('exports')
        film_lst = get_all_of_type_film()
        db = {row[0]: (row[1], row[2]) for row in film_lst}
        with open('exports/film_db.json', 'w') as file:
            json.dump(db, file)
        print(f"Film data exported successfully. Stored {len(db)} films.")
    except Exception as e:
        print(f"An error occurred: {e}")


In [9]:
def search_anything_by_name(name: str):
    query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX schema: <http://schema.org/>

        SELECT ?entity ?label ?description WHERE {{
            ?entity rdfs:label ?label .        # Get any entity with a label
            OPTIONAL {{ ?entity schema:description ?description FILTER(LANG(?description) = "en") }}  # Get descriptions if available
            FILTER(CONTAINS(LCASE(?label), LCASE("{name}"))) .  # Match name substring, case-insensitive
            FILTER(LANG(?label) = "en")  # English labels only
        }}
        LIMIT 10
    """
    return execute_query(query)


In [10]:
export_film_json()

+-----------------------+-----------------------------------------+-----------+
| Col 0                 | Col 1                                   | Col 2     |
| "Crocodile" Dundee    | http://www.wikidata.org/entity/Q615254  | tt0090555 |
+-----------------------+-----------------------------------------+-----------+
| "Crocodile" Dundee II | http://www.wikidata.org/entity/Q1140745 | tt0092493 |
+-----------------------+-----------------------------------------+-----------+
| #1 Cheerleader Camp   | http://www.wikidata.org/entity/Q2299302 | tt1637976 |
+-----------------------+-----------------------------------------+-----------+
Film data exported successfully. Stored 25523 films.


In [11]:
export_actor_json()

+---------------------+-----------------------------------------+-----------+
| Col 0               | Col 1                                   | Col 2     |
| "Weird Al" Yankovic | http://www.wikidata.org/entity/Q8349    | nm0946148 |
+---------------------+-----------------------------------------+-----------+
| "Weird Al" Yankovic | http://www.wikidata.org/entity/Q8349    | nm0946148 |
+---------------------+-----------------------------------------+-----------+
| 40                  | http://www.wikidata.org/entity/Q3599072 | nm0790520 |
+---------------------+-----------------------------------------+-----------+
Actor data exported successfully. Stored 56177 actors.


In [12]:
export_professionals_json()

Results were empty
An error occurred: 'NoneType' object is not iterable


In [13]:
search_anything_by_name("Quentin Tarantino")

+--------------------------------------+-------------------+-----------------------------------------------------------+
| Col 0                                | Col 1             | Col 2                                                     |
| http://www.wikidata.org/entity/Q3772 | Quentin Tarantino | American film director, screenwriter, producer, and actor |
+--------------------------------------+-------------------+-----------------------------------------------------------+


[['http://www.wikidata.org/entity/Q3772',
  'Quentin Tarantino',
  'American film director, screenwriter, producer, and actor']]

In [14]:
get_imdb_id_by_name("Avatar")

+-----------+---------------------------------------+
| Col 0     | Col 1                                 |
| tt0499549 | http://www.wikidata.org/entity/Q24871 |
+-----------+---------------------------------------+


'tt0499549'

In [15]:
get_movie_id_by_name("Avatar")

+------------------------------------------+
| Col 0                                    |
| http://www.wikidata.org/entity/Q24871    |
+------------------------------------------+
| http://www.wikidata.org/entity/Q29580929 |
+------------------------------------------+
| http://www.wikidata.org/entity/Q3604746  |
+------------------------------------------+


'http://www.wikidata.org/entity/Q24871'

In [16]:
get_film_professional_id_by_name("Quentin Tarantino")

+--------------------------------------+-------------------+---------+
| Col 0                                | Col 1             | Col 2   |
| http://www.wikidata.org/entity/Q3772 | Quentin Tarantino | actor   |
+--------------------------------------+-------------------+---------+
| http://www.wikidata.org/entity/Q3772 | Quentin Tarantino | writer  |
+--------------------------------------+-------------------+---------+


'http://www.wikidata.org/entity/Q3772'

In [17]:
get_film_professional_id_by_name("Hans Zimmer")

+---------------------------------------+-------------+----------+
| Col 0                                 | Col 1       | Col 2    |
| http://www.wikidata.org/entity/Q76364 | Hans Zimmer | musician |
+---------------------------------------+-------------+----------+


'http://www.wikidata.org/entity/Q76364'

In [18]:
get_actor_id_by_name("Angelina Jolie")
get_all_predicates_by_id("Q13909")

+---------------------------------------+----------------+-----------+
| Col 0                                 | Col 1          | Col 2     |
| http://www.wikidata.org/entity/Q13909 | Angelina Jolie | nm0001401 |
+---------------------------------------+----------------+-----------+
+--------------------------------------------+------------------+--------------------------------------------------------+------------------+
| Col 0                                      | Col 1            | Col 2                                                  | Col 3            |
| http://schema.org/description              | node description | American actress, filmmaker, screenwriter and producer | None             |
+--------------------------------------------+------------------+--------------------------------------------------------+------------------+
| http://www.w3.org/2000/01/rdf-schema#label | node label       | Angelina Jolie                                         | None             |
+-----

[['http://schema.org/description',
  'node description',
  'American actress, filmmaker, screenwriter and producer',
  'None'],
 ['http://www.w3.org/2000/01/rdf-schema#label',
  'node label',
  'Angelina Jolie',
  'None'],
 ['http://www.wikidata.org/prop/direct/P103',
  'native language',
  'http://www.wikidata.org/entity/Q7976',
  'American English'],
 ['http://www.wikidata.org/prop/direct/P1038',
  'relative',
  'http://www.wikidata.org/entity/Q962604',
  'Chip Taylor'],
 ['http://www.wikidata.org/prop/direct/P106',
  'occupation',
  'http://www.wikidata.org/entity/Q1053574',
  'executive producer'],
 ['http://www.wikidata.org/prop/direct/P106',
  'occupation',
  'http://www.wikidata.org/entity/Q1414443',
  'filmmaker'],
 ['http://www.wikidata.org/prop/direct/P106',
  'occupation',
  'http://www.wikidata.org/entity/Q33999',
  'actor'],
 ['http://www.wikidata.org/prop/direct/P106',
  'occupation',
  'http://www.wikidata.org/entity/Q36180',
  'writer'],
 ['http://www.wikidata.org/prop/

In [19]:
get_movie_id_by_name("The Naked Gun")

+----------------------------------------+
| Col 0                                  |
| http://www.wikidata.org/entity/Q502979 |
+----------------------------------------+
| http://www.wikidata.org/entity/Q832047 |
+----------------------------------------+


'http://www.wikidata.org/entity/Q502979'

In [20]:
get_imdb_id_by_name("The Naked Gun")

+-----------+----------------------------------------+
| Col 0     | Col 1                                  |
| tt0102510 | http://www.wikidata.org/entity/Q502979 |
+-----------+----------------------------------------+


'tt0102510'

In [21]:
# Get movie ID
print(get_movie_id_by_name("Inception"))

# Get actor IDs for Brad Pitt
print(get_actor_id_by_name("Brad Pitt"))

# Get film professional ID for Quentin Tarantino
print(get_film_professional_id_by_name("Quentin Tarantino"))

# Get IMDb ID for a name
print(get_imdb_id_by_name("Brad Pitt"))


+---------------------------------------+
| Col 0                                 |
| http://www.wikidata.org/entity/Q25188 |
+---------------------------------------+
http://www.wikidata.org/entity/Q25188
+---------------------------------------+-----------+-----------+
| Col 0                                 | Col 1     | Col 2     |
| http://www.wikidata.org/entity/Q35332 | Brad Pitt | nm0000093 |
+---------------------------------------+-----------+-----------+
[['http://www.wikidata.org/entity/Q35332', 'Brad Pitt', 'nm0000093']]
+--------------------------------------+-------------------+---------+
| Col 0                                | Col 1             | Col 2   |
| http://www.wikidata.org/entity/Q3772 | Quentin Tarantino | actor   |
+--------------------------------------+-------------------+---------+
| http://www.wikidata.org/entity/Q3772 | Quentin Tarantino | writer  |
+--------------------------------------+-------------------+---------+
http://www.wikidata.org/entity/Q37

In [22]:
get_all_predicates_by_id("Q3772") # Quentin Tarantino

+--------------------------------------------+------------------+-----------------------------------------------------------+---------+
| Col 0                                      | Col 1            | Col 2                                                     | Col 3   |
| http://schema.org/description              | node description | American film director, screenwriter, producer, and actor | None    |
+--------------------------------------------+------------------+-----------------------------------------------------------+---------+
| http://www.w3.org/2000/01/rdf-schema#label | node label       | Quentin Tarantino                                         | None    |
+--------------------------------------------+------------------+-----------------------------------------------------------+---------+
| http://www.wikidata.org/prop/direct/P106   | occupation       | http://www.wikidata.org/entity/Q33999                     | actor   |
+--------------------------------------------+--

[['http://schema.org/description',
  'node description',
  'American film director, screenwriter, producer, and actor',
  'None'],
 ['http://www.w3.org/2000/01/rdf-schema#label',
  'node label',
  'Quentin Tarantino',
  'None'],
 ['http://www.wikidata.org/prop/direct/P106',
  'occupation',
  'http://www.wikidata.org/entity/Q33999',
  'actor'],
 ['http://www.wikidata.org/prop/direct/P106',
  'occupation',
  'http://www.wikidata.org/entity/Q36180',
  'writer'],
 ['http://www.wikidata.org/prop/direct/P1343',
  'described by source',
  'http://www.wikidata.org/entity/Q67311526',
  'Obalky knih.cz'],
 ['http://www.wikidata.org/prop/direct/P136',
  'genre',
  'http://www.wikidata.org/entity/Q188473',
  'action film'],
 ['http://www.wikidata.org/prop/direct/P136',
  'genre',
  'http://www.wikidata.org/entity/Q459290',
  'independent film'],
 ['http://www.wikidata.org/prop/direct/P140',
  'religion',
  'http://www.wikidata.org/entity/Q5043',
  'Christianity'],
 ['http://www.wikidata.org/prop/d

In [23]:
get_all_predicates_by_id("Q35332") # Brad Pitt

+--------------------------------------------+------------------+--------------------------------------+------------------+
| Col 0                                      | Col 1            | Col 2                                | Col 3            |
| http://schema.org/description              | node description | American actor and filmmaker         | None             |
+--------------------------------------------+------------------+--------------------------------------+------------------+
| http://www.w3.org/2000/01/rdf-schema#label | node label       | Brad Pitt                            | None             |
+--------------------------------------------+------------------+--------------------------------------+------------------+
| http://www.wikidata.org/prop/direct/P103   | native language  | http://www.wikidata.org/entity/Q7976 | American English |
+--------------------------------------------+------------------+--------------------------------------+------------------+


[['http://schema.org/description',
  'node description',
  'American actor and filmmaker',
  'None'],
 ['http://www.w3.org/2000/01/rdf-schema#label',
  'node label',
  'Brad Pitt',
  'None'],
 ['http://www.wikidata.org/prop/direct/P103',
  'native language',
  'http://www.wikidata.org/entity/Q7976',
  'American English'],
 ['http://www.wikidata.org/prop/direct/P106',
  'occupation',
  'http://www.wikidata.org/entity/Q1053574',
  'executive producer'],
 ['http://www.wikidata.org/prop/direct/P106',
  'occupation',
  'http://www.wikidata.org/entity/Q33999',
  'actor'],
 ['http://www.wikidata.org/prop/direct/P106',
  'occupation',
  'http://www.wikidata.org/entity/Q4610556',
  'model'],
 ['http://www.wikidata.org/prop/direct/P1066',
  'student of',
  'http://www.wikidata.org/entity/Q7373081',
  'Roy London'],
 ['http://www.wikidata.org/prop/direct/P1343',
  'described by source',
  'http://www.wikidata.org/entity/Q67311526',
  'Obalky knih.cz'],
 ['http://www.wikidata.org/prop/direct/P1411

In [24]:
get_all_predicates_by_id("Q25188") # Inception

+-------------------------+---------+-------------------+---------+
| Col 0                   | Col 1   | Col 2             | Col 3   |
| http://ddis.ch/atai/tag | None    | action            | None    |
+-------------------------+---------+-------------------+---------+
| http://ddis.ch/atai/tag | None    | alternate_reality | None    |
+-------------------------+---------+-------------------+---------+
| http://ddis.ch/atai/tag | None    | boring            | None    |
+-------------------------+---------+-------------------+---------+


[['http://ddis.ch/atai/tag', 'None', 'action', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'alternate_reality', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'boring', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'clever', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'dramatic', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'fantasy', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'flashback', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'murder', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'mystery', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'neo_noir', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'philosophical', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'plot_twist', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'psychedelic', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'psychological', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'sci-fi', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'sentimental', 'None'],
 ['http://ddis.ch/atai/tag', 'None', 'suspenseful', 'Non