In [191]:
from question_types.sparql import SparqlQueries
from tabulate import tabulate
import json
import os

In [4]:
sparql = SparqlQueries("../dataset/14_graph.nt")

2024-10-10 08:10:17,656 | INFO | __init__ | [92mParsing graph[0m
2024-10-10 08:11:14,989 | INFO | __init__ | [92mGraph parsed[0m


In [195]:
def execute_query(query):
    sparql_result = sparql.execute_query(query)
    result_lst = [
        [str(item) for item in (row if isinstance(row, tuple) else [row])]
        for row in sparql_result
    ]
    if not len(result_lst):
        return print("Results were empty")
    headers = [f"Col {idx}" for idx in range(len(result_lst[0]))]
    print(tabulate(result_lst[:min(3, len(result_lst))], headers=headers, tablefmt="grid"))
    return result_lst

In [260]:
def get_movie_id_by_name(name: str) -> str:
    query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>

        SELECT ?movie WHERE {{
            ?movie rdfs:label "{name}"@en ;
                   wdt:P31/wdt:P279* wd:Q11424 .  # Match film or its subclasses
        }}
        LIMIT 1
    """
    result_list = execute_query(query)
    if result_list and result_list[0]:
        return result_list[0][0]
    return result_list

def get_film_professional_id_by_name(name: str) -> str:
    query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>

        SELECT ?professional ?label ?roleLabel WHERE {{
            ?professional rdfs:label ?label ;        # Get any label
                          wdt:P31 wd:Q5 ;            # Instance of human
                          wdt:P106 ?role .           # Occupation property

            # Include relevant film industry professions and subclasses
            ?role rdfs:label ?roleLabel .            # Get role label (for debugging)
            FILTER(LANG(?roleLabel) = "en") .        # English labels for roles
            FILTER(CONTAINS(LCASE(?label), LCASE("{name}"))) .  # Match name substring in any label
            FILTER(LANG(?label) = "en")  # English labels only
        }}
        LIMIT 10
    """
    result_list = execute_query(query)
    if result_list and result_list[0]:
        return result_list[0][0]
    return result_list

def get_imdb_id_by_name(name: str) -> int:
    query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        PREFIX ddis: <http://ddis.ch/atai/>
        SELECT ?imdbId
        WHERE {{
            ?entity rdfs:label "{name}"@en ;
                    wdt:P345 ?imdbId .
        }}
    """
    result_list = execute_query(query)
    if result_list and result_list[0]:
        return result_list[0][0]
    return result_list

def get_actor_info_by_id(actor_id: str) -> dict:
    query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        
        SELECT ?predicate ?predicateLabel ?object ?objectLabel WHERE {{
          wd:{actor_id} ?predicate ?object .
          OPTIONAL {{ ?predicate rdfs:label ?predicateLabel . FILTER(LANG(?predicateLabel) = "en") }}
          OPTIONAL {{ ?object rdfs:label ?objectLabel . FILTER(LANG(?objectLabel) = "en") }}
        }}
        ORDER BY ?predicate
    """
    return execute_query(query)

def get_movie_info_by_id(movie_id: str) -> dict:
    query = f"""
        PREFIX wdt: <http://www.wikidata.org/prop/direct/> 
        PREFIX wd: <http://www.wikidata.org/entity/> 
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        
        SELECT ?item ?itemLabel ?typeLabel WHERE {{
          wd:{movie_id} rdfs:label ?itemLabel ;
                           wdt:P31 ?type .
          ?type rdfs:label ?typeLabel .
          FILTER(LANG(?itemLabel) = "en").
          FILTER(LANG(?typeLabel) = "en").
        }}
    """
    return execute_query(query)


In [252]:
def get_all_of_type_film():
    query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        PREFIX ddis: <http://ddis.ch/atai/>
        
        SELECT ?movieLabel ?movie ?imdbID WHERE {
            ?movie wdt:P31/wdt:P279* wd:Q11424 .  # instance of film or any subclass of film
            ?movie rdfs:label ?movieLabel .  # only films with a label
            OPTIONAL { ?movie wdt:P345 ?imdbID . }  # IMDb ID
            FILTER(LANG(?movieLabel) = "en") .
        }
        ORDER BY ?movieLabel
    """
    return execute_query(query)


def get_all_of_type_actor():
    query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        PREFIX ddis: <http://ddis.ch/atai/>

        SELECT ?actorLabel ?actor ?imdbID WHERE {
            ?actor wdt:P31 wd:Q5 ;  # instance of human
                   wdt:P106/wdt:P279* wd:Q33999 ;  # occupation is an actor or any subclass of actor
                   rdfs:label ?actorLabel .
            FILTER(LANG(?actorLabel) = "en")
            OPTIONAL { ?actor wdt:P345 ?imdbID . }  # IMDb ID
        }
        ORDER BY ?actorLabel
    """
    return execute_query(query)

def get_all_of_type_film_professionals():
    query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
        PREFIX ddis: <http://ddis.ch/atai/>

        SELECT ?personLabel ?person ?imdbID ?roleLabel WHERE {
            ?person wdt:P31 wd:Q5 ;  # instance of human
                   wdt:P106 ?role ;  # occupation
                   rdfs:label ?personLabel .
            FILTER(LANG(?personLabel) = "en")
            
            # Include relevant film industry professions
            VALUES ?role { 
                wd:Q2526255  # film director
                wd:Q947873   # film producer
                wd:Q28389    # screenwriter
                wd:Q488111   # cinematographer
                wd:Q3455803  # film editor
                wd:Q36479    # composer
                wd:Q3282637  # voice actor
            }
            
            OPTIONAL { ?role rdfs:label ?roleLabel FILTER(LANG(?roleLabel) = "en") . }  # Get occupation label
            OPTIONAL { ?person wdt:P345 ?imdbID . }  # IMDb ID
        }
        ORDER BY ?personLabel
    """
    return execute_query(query)


In [253]:
def ensure_directory_exists(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

def export_actor_json():
    try:
        ensure_directory_exists('exports')
        actor_lst = get_all_of_type_actor()
        db = {row[0]: (row[1], row[2]) for row in actor_lst}  
        with open('exports/actors_db.json', 'w') as file:
            json.dump(db, file)
        print(f"Actor data exported successfully. Stored {len(db)} actors.")
    except Exception as e:
        print(f"An error occurred: {e}")

def export_professionals_json():
    try:
        ensure_directory_exists('exports')
        actor_lst = get_all_of_type_film_professionals()
        db = {row[0]: (row[1], row[2]) for row in actor_lst}  
        with open('exports/professionals_db.json', 'w') as file:
            json.dump(db, file)
        print(f"Actor data exported successfully. Stored {len(db)} actors.")
    except Exception as e:
        print(f"An error occurred: {e}")

def export_film_json():
    try:
        ensure_directory_exists('exports')
        film_lst = get_all_of_type_film()
        db = {row[0]: (row[1], row[2]) for row in film_lst}
        with open('exports/film_db.json', 'w') as file:
            json.dump(db, file)
        print(f"Film data exported successfully. Stored {len(db)} films.")
    except Exception as e:
        print(f"An error occurred: {e}")


In [258]:
def search_anything_by_name(name: str):
    query = f"""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX wd: <http://www.wikidata.org/entity/>
        PREFIX wdt: <http://www.wikidata.org/prop/direct/>
        PREFIX schema: <http://schema.org/>

        SELECT ?entity ?label ?description WHERE {{
            ?entity rdfs:label ?label .        # Get any entity with a label
            OPTIONAL {{ ?entity schema:description ?description FILTER(LANG(?description) = "en") }}  # Get descriptions if available
            FILTER(CONTAINS(LCASE(?label), LCASE("{name}"))) .  # Match name substring, case-insensitive
            FILTER(LANG(?label) = "en")  # English labels only
        }}
        LIMIT 10
    """
    return execute_query(query)


In [243]:
export_film_json()

+-----------------------+-----------------------------------------+-----------+
| Col 0                 | Col 1                                   | Col 2     |
| "Crocodile" Dundee    | http://www.wikidata.org/entity/Q615254  | tt0090555 |
+-----------------------+-----------------------------------------+-----------+
| "Crocodile" Dundee II | http://www.wikidata.org/entity/Q1140745 | tt0092493 |
+-----------------------+-----------------------------------------+-----------+
| #1 Cheerleader Camp   | http://www.wikidata.org/entity/Q2299302 | tt1637976 |
+-----------------------+-----------------------------------------+-----------+
Film data exported successfully. Stored 25523 films.


In [244]:
export_actor_json()

+---------------------+-----------------------------------------+-----------+
| Col 0               | Col 1                                   | Col 2     |
| "Weird Al" Yankovic | http://www.wikidata.org/entity/Q8349    | nm0946148 |
+---------------------+-----------------------------------------+-----------+
| "Weird Al" Yankovic | http://www.wikidata.org/entity/Q8349    | nm0946148 |
+---------------------+-----------------------------------------+-----------+
| 40                  | http://www.wikidata.org/entity/Q3599072 | nm0790520 |
+---------------------+-----------------------------------------+-----------+
Actor data exported successfully. Stored 56177 actors.


In [245]:
export_professionals_json()

Results were empty
An error occurred: 'NoneType' object is not iterable


In [259]:
search_anything_by_name("Quentin Tarantino")

+--------------------------------------+-------------------+-----------------------------------------------------------+
| Col 0                                | Col 1             | Col 2                                                     |
| http://www.wikidata.org/entity/Q3772 | Quentin Tarantino | American film director, screenwriter, producer, and actor |
+--------------------------------------+-------------------+-----------------------------------------------------------+


[['http://www.wikidata.org/entity/Q3772',
  'Quentin Tarantino',
  'American film director, screenwriter, producer, and actor']]

In [246]:
get_imdb_id_by_name("Avatar")

+-----------+
| Col 0     |
| tt0499549 |
+-----------+


'tt0499549'

In [247]:
get_movie_id_by_name("Avatar")

+---------------------------------------+
| Col 0                                 |
| http://www.wikidata.org/entity/Q24871 |
+---------------------------------------+


'http://www.wikidata.org/entity/Q24871'

In [262]:
get_film_professional_id_by_name("Quentin Tarantino")

+--------------------------------------+-------------------+---------+
| Col 0                                | Col 1             | Col 2   |
| http://www.wikidata.org/entity/Q3772 | Quentin Tarantino | actor   |
+--------------------------------------+-------------------+---------+
| http://www.wikidata.org/entity/Q3772 | Quentin Tarantino | writer  |
+--------------------------------------+-------------------+---------+


'http://www.wikidata.org/entity/Q3772'

In [261]:
get_film_professional_id_by_name("Hans Zimmer")

+---------------------------------------+-------------+----------+
| Col 0                                 | Col 1       | Col 2    |
| http://www.wikidata.org/entity/Q76364 | Hans Zimmer | musician |
+---------------------------------------+-------------+----------+


'http://www.wikidata.org/entity/Q76364'