In [2]:
!pip install SPARQLWrapper

Collecting SPARQLWrapper
  Using cached SPARQLWrapper-2.0.0-py3-none-any.whl (28 kB)
Installing collected packages: SPARQLWrapper
Successfully installed SPARQLWrapper-2.0.0


In [3]:
from SPARQLWrapper import SPARQLWrapper, JSON

In [29]:
LMDB_PREFIX = "<https://triplydb.com/Triply/linkedmdb/vocab/>"
SPARQL_ENDPOINT = "https://api.triplydb.com/datasets/Triply/linkedmdb/services/linkedmdb/sparql"
PROPERTY_NAMES = ["actor", "director", "genre"]  # must be from lmdb namespace

In [115]:
def get_movie_properties(movie):

    select_line = (" ").join(["?"+prop for prop in PROPERTY_NAMES])
    where_clause_lines = [f"\t\t{movie} lmdb:{prop} ?{prop} ." for prop in PROPERTY_NAMES]
    where_clause = ("\n").join(where_clause_lines)

    query_str = f"""
    PREFIX lmdb: {LMDB_PREFIX}

    SELECT {select_line}
    WHERE {{
{where_clause}
    }}
    """

    sparql = SPARQLWrapper(SPARQL_ENDPOINT)
    sparql.setQuery(query_str)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    properties = {prop: set() for prop in PROPERTY_NAMES}

    for result in results["results"]["bindings"]:
        for prop in PROPERTY_NAMES:
            if prop in result:
                properties[prop].add(result[prop]["value"])

    return properties

In [120]:
def get_movie_recommendations(properties):

    property_lines = []
    for prop, values in properties.items():
        for i, value in enumerate(values):
            property_lines.append(f"""
          {{
            ?movie lmdb:{prop} <{value}> .
            BIND("{prop}{i}" as ?matchedProperty)
          }}""")
    property_lines_combined = ("\nUNION\n").join(property_lines)

    query_str = f"""
    PREFIX lmdb: {LMDB_PREFIX}

    SELECT ?movie (COUNT(DISTINCT ?matchedProperty) as ?score)
    WHERE {{
      ?movie a lmdb:Film .
      {property_lines_combined}
    }}
    GROUP BY ?movie
    ORDER BY DESC(?score)
    LIMIT 15
    """

    sparql = SPARQLWrapper(SPARQL_ENDPOINT)
    sparql.setQuery(query_str)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    recommendations = []
    for result in results["results"]["bindings"]:
        movie_id = result["movie"]["value"].split('/')[-1]
        score = result["score"]["value"]
        recommendations.append({"movie_id": movie_id, "score": score})

    return recommendations

In [118]:
movie = "<https://triplydb.com/Triply/linkedmdb/id/film/3247>"
properties = get_movie_properties(movie)
properties

{'actor': {'https://triplydb.com/Triply/linkedmdb/id/actor/27722',
  'https://triplydb.com/Triply/linkedmdb/id/actor/40971',
  'https://triplydb.com/Triply/linkedmdb/id/actor/59750'},
 'director': {'https://triplydb.com/Triply/linkedmdb/id/director/2623',
  'https://triplydb.com/Triply/linkedmdb/id/director/2624'},
 'genre': {'https://triplydb.com/Triply/linkedmdb/id/film_genre/65'}}

In [121]:
similar_movies = get_movie_recommendations(properties)

print("Similar Movies:")
for movie in similar_movies:
    print(f"Movie ID: {movie['movie_id']}, Score: {movie['score']}")

Similar Movies:
Movie ID: 3247, Score: 6
Movie ID: 9002, Score: 3
Movie ID: 26542, Score: 1
Movie ID: 9646, Score: 1
Movie ID: 9514, Score: 1
Movie ID: 36461, Score: 1
Movie ID: 21954, Score: 1
Movie ID: 22307, Score: 1
Movie ID: 8415, Score: 1
Movie ID: 22561, Score: 1
Movie ID: 23314, Score: 1
Movie ID: 13118, Score: 1
Movie ID: 29274, Score: 1
Movie ID: 13653, Score: 1
Movie ID: 11236, Score: 1
