In [1]:
from rdflib import Graph
from SPARQLWrapper import SPARQLWrapper, JSON, N3
from pprint import pprint

In [13]:
sparql = SPARQLWrapper('https://dbpedia.org/sparql')
sparql.setQuery('''
SELECT ?property ?value
WHERE {
  dbr:Matrix ?property ?value.
}
''')
sparql.setReturnFormat(JSON)
qres = sparql.query().convert()

# pprint(qres)
for result in qres['results']['bindings']:
    # print(result['object'])
    
    lang, value = result['property'], result['value']
    print(f'Property: {lang}\tValue: {value}')
    # if lang == 'en':
        # print(value)


Property: {'type': 'uri', 'value': 'http://www.w3.org/2000/01/rdf-schema#label'}	Value: {'type': 'literal', 'xml:lang': 'en', 'value': 'Matrix'}
Property: {'type': 'uri', 'value': 'http://www.w3.org/2000/01/rdf-schema#label'}	Value: {'type': 'literal', 'xml:lang': 'ar', 'value': 'ماتريكس (توضيح)'}
Property: {'type': 'uri', 'value': 'http://www.w3.org/2000/01/rdf-schema#label'}	Value: {'type': 'literal', 'xml:lang': 'ca', 'value': 'Matriu'}
Property: {'type': 'uri', 'value': 'http://www.w3.org/2000/01/rdf-schema#label'}	Value: {'type': 'literal', 'xml:lang': 'cs', 'value': 'Matrix (rozcestník)'}
Property: {'type': 'uri', 'value': 'http://www.w3.org/2000/01/rdf-schema#label'}	Value: {'type': 'literal', 'xml:lang': 'de', 'value': 'Matrix'}
Property: {'type': 'uri', 'value': 'http://www.w3.org/2000/01/rdf-schema#label'}	Value: {'type': 'literal', 'xml:lang': 'es', 'value': 'Matrix (desambiguación)'}
Property: {'type': 'uri', 'value': 'http://www.w3.org/2000/01/rdf-schema#label'}	Value: {'t

In [3]:
query = """
SELECT ?property ?value
WHERE {
  dbr:Matrix ?property ?value.
}
"""

sparql.setQuery(query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

# Print the results
for result in results["results"]["bindings"]:
    print(f"Country: {result['country']['value']}")
    print(f"Capital: {result['capital']['value']}")
    print(f"Population: {result['population']['value']}\n")


In [2]:
from SPARQLWrapper import SPARQLWrapper, JSON

In [7]:


def get_related_films(wikidata_id):
  endpoint_url = "https://query.wikidata.org/sparql"
    
  # SPARQL query to get films based on the specified rules
  query = f"""
  SELECT DISTINCT ?z
  WHERE {{
      {{
        ?x wdt:P136 ?genre.
        ?z wdt:P136 ?genre.
        
        ?x wdt:P577 ?publicationDateX.
        ?z wdt:P577 ?publicationDateZ.
        FILTER(?publicationDateZ >= ?publicationDateX - 10 && ?publicationDateZ <= ?publicationDateX + 10)
        FILTER(?x != ?z)
      }}
      UNION
      {{
        ?x wdt:P57 ?director.
        ?z wdt:P57 ?director.
        
        FILTER(?x != ?z)
      }}
      UNION
      {{
        ?x wdt:P921 ?mainSubject.
        ?z wdt:P921 ?mainSubject.
        
        FILTER(?publicationDateZ >= ?publicationDateX - 10 && ?publicationDateZ <= ?publicationDateX + 10)
        FILTER(?x != ?z)
      }}

    VALUES ?x {{ wd:{wikidata_id} }}
  }}
  """

    
  # Set up SPARQL query and execute it
  sparql = SPARQLWrapper(endpoint_url)
  sparql.setQuery(query)
  sparql.setReturnFormat(JSON)
  results = sparql.query().convert()
  
  # Extract and return the list of related film IDs
  related_films = [result['z']['value'].split('/')[-1] for result in results['results']['bindings']]
  return related_films

# Example usage
wikidata_id = "Q83495"  # Replace with the actual Wikidata film ID
result = get_related_films(wikidata_id)
print(result)


['Q189600', 'Q207536', 'Q312078', 'Q736528', 'Q864930', 'Q959892', 'Q1203242', 'Q1210827', 'Q12125299', 'Q17080059', 'Q80322391', 'Q28936']


In [8]:
len(result)

12

In [6]:
from SPARQLWrapper import SPARQLWrapper, JSON

def get_main_subjects(film_ids):
    endpoint_url = "https://query.wikidata.org/sparql"
    
    main_subjects = []
    
    for film_id in film_ids:
        # SPARQL query to get main subjects of a film
        query = f"""
        SELECT DISTINCT ?mainSubjectLabel
        WHERE {{
          wd:{film_id} wdt:P921 ?mainSubject.
          SERVICE wikibase:label {{ bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }}
        }}
        """
        
        # Set up SPARQL query and execute it
        sparql = SPARQLWrapper(endpoint_url)
        sparql.setQuery(query)
        sparql.setReturnFormat(JSON)
        results = sparql.query().convert()
        
        # Extract main subjects and add them to the main_subjects list
        subjects = {
            "film_id":film_id, 
            "main_subjects": [result['mainSubjectLabel']['value'] for result in results['results']['bindings']]
        }
        main_subjects.append(subjects)
    
    return main_subjects

# Example usage:  # Example film IDs
main_subjects = get_main_subjects(result)
print(main_subjects)


[{'film_id': 'Q189600', 'main_subjects': ['telepresence']}, {'film_id': 'Q207536', 'main_subjects': ['telepresence']}, {'film_id': 'Q312078', 'main_subjects': ['motor car']}, {'film_id': 'Q736528', 'main_subjects': []}, {'film_id': 'Q864930', 'main_subjects': ['organized crime']}, {'film_id': 'Q959892', 'main_subjects': []}, {'film_id': 'Q1203242', 'main_subjects': []}, {'film_id': 'Q1210827', 'main_subjects': ['artificial intelligence']}, {'film_id': 'Q12125299', 'main_subjects': []}, {'film_id': 'Q17080059', 'main_subjects': ['connection', 'LGBT']}, {'film_id': 'Q80322391', 'main_subjects': ['free will', 'virtual reality', 'love', 'actuality', 'human-machine relationship', 'dream and reality', 'autonomy', 'human nature']}, {'film_id': 'Q28936', 'main_subjects': ['cloning', 'slavery', 'seamanship']}]


In [None]:
# Query to link film from dbpedia to wikidata
"""PREFIX wd: <http://www.wikidata.org/entity/>

SELECT DISTINCT ?film
WHERE {
  ?film rdf:type dbo:Film ;
        owl:sameAs wd:Q83495.
}
LIMIT 100"""

In [1]:
import pandas as pd 

# Read the data
df = pd.read_csv('data/IMDB-Movie-Data.csv')
titles = df['Title'].tolist()

In [12]:
from SPARQLWrapper import SPARQLWrapper, JSON

def get_wikidata_id(film_title):
    endpoint_url = "https://query.wikidata.org/sparql"
    
    # SPARQL query to get the Wikidata ID of a film
    query = f"""
    SELECT DISTINCT ?film
    WHERE {{
      ?film rdf:type dbo:Film ;
            rdfs:label "{film_title}"@en.
    }}
    LIMIT 1
    """
    
    # Set up SPARQL query and execute it
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    # Extract and return the Wikidata ID
    wikidata_id = results['results']['bindings'][0]['film']['value'].split('/')[-1]
    return wikidata_id

wikidata_ids = []
for title in titles:
    try:
        wikidata_id = get_wikidata_id(title)
        wikidata_ids.append(wikidata_id)
    except:
        print(f"{title}: No Wikidata ID found")

Guardians of the Galaxy: No Wikidata ID found
Prometheus: No Wikidata ID found
Split: No Wikidata ID found
Sing: No Wikidata ID found
Suicide Squad: No Wikidata ID found
The Great Wall: No Wikidata ID found
La La Land: No Wikidata ID found
Mindhorn: No Wikidata ID found
The Lost City of Z: No Wikidata ID found
Passengers: No Wikidata ID found
Fantastic Beasts and Where to Find Them: No Wikidata ID found
Hidden Figures: No Wikidata ID found
Rogue One: No Wikidata ID found
Moana: No Wikidata ID found
Colossal: No Wikidata ID found
The Secret Life of Pets: No Wikidata ID found
Hacksaw Ridge: No Wikidata ID found
Jason Bourne: No Wikidata ID found
Lion: No Wikidata ID found
Arrival: No Wikidata ID found
Gold: No Wikidata ID found
Manchester by the Sea: No Wikidata ID found
Hounds of Love: No Wikidata ID found
Trolls: No Wikidata ID found
Independence Day: Resurgence: No Wikidata ID found
Paris pieds nus: No Wikidata ID found
Bahubali: The Beginning: No Wikidata ID found
Dead Awake: No Wiki

In [6]:
from SPARQLWrapper import SPARQLWrapper, JSON

def query_films_from_wikidata():
    print("Querying films from Wikidata...")
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    sparql.setQuery("""
        SELECT DISTINCT ?item ?title ?year ?director ?image ?idwikidata WHERE {
            ?item wdt:P31 wd:Q11424.
            ?item wdt:P1476 ?title. 
            OPTIONAL { ?item wdt:P577 ?date. BIND(YEAR(?date) AS ?year) }
            OPTIONAL { ?item wdt:P57 ?director_item. ?director_item rdfs:label ?director. FILTER(LANG(?director) = "en") }
            OPTIONAL { ?item wdt:P18 ?image }
            BIND(REPLACE(STR(?item), ".*Q", "Q") AS ?idwikidata)
            FILTER(LANG(?title) = "en")
        }
        LIMIT 10
    """)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    films = []
    print("Processing results...")
    for result in results["results"]["bindings"]:
        film = {
            "title": result["title"]["value"],
            "year": result.get("year", {}).get("value"),
            "director": result.get("director", {}).get("value") if "director" in result else None,
            "image": result.get("image", {}).get("value") if "image" in result else None,
            "idwikidata": result["idwikidata"]["value"]
        }
        if film["idwikidata"] not in [f["idwikidata"] for f in films]:
            films.append(film)
        else:
            index = next((i for i, f in enumerate(films) if f["idwikidata"] == film["idwikidata"]), None)
            if film["director"] and film["director"] not in films[index]["director"]:
                films[index]["director"] += ", " + film["director"]
            if film["year"] and film["year"] not in films[index]["year"]:
                films[index]["year"] = min(films[index]["year"], film["year"])

    return films

In [7]:
films = query_films_from_wikidata()

Querying films from Wikidata...
Processing results...


In [8]:
films

[{'title': 'Africa Screams',
  'year': '1950',
  'director': 'Charles Barton',
  'image': 'http://commons.wikimedia.org/wiki/Special:FilePath/Africa%20screams%20title%20screen.JPG',
  'idwikidata': 'Q23892'},
 {'title': 'Whale Rider',
  'year': '2002',
  'director': 'Niki Caro',
  'image': None,
  'idwikidata': 'Q19393'},
 {'title': 'Paradise Alley',
  'year': '1978',
  'director': 'Sylvester Stallone',
  'image': None,
  'idwikidata': 'Q19268'},
 {'title': 'Dances with Wolves',
  'year': '1990',
  'director': 'Kevin Costner',
  'image': 'http://commons.wikimedia.org/wiki/Special:FilePath/Kevin%20Costner%20-Dances%20with%20Wolves.jpg',
  'idwikidata': 'Q20456'},
 {'title': 'Alligator II - The Mutation',
  'year': '1991',
  'director': 'Jon Hess',
  'image': None,
  'idwikidata': 'Q24151'},
 {'title': 'Signs',
  'year': '2002',
  'director': 'M. Night Shyamalan',
  'image': None,
  'idwikidata': 'Q22432'},
 {'title': 'Visions of Light',
  'year': '1992',
  'director': 'Todd McCarthy',
 