In [18]:
import pandas as pd
from py2neo import authenticate, Graph 
from py2neo import Node, Relationship, NodeSelector

In [5]:
# Neo4j service authentication and connection
authenticate("host_port", "user", "password")
graph = Graph('host_port/db/data/', bolt = False)

In [69]:
# Load Movielens dataset
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")

# Work with a small subset of Movielens dataset (to satisfy requirements of GrapheneDB free tier)
num_movies = 50
num_users = 100

In [70]:
# Create Genre nodes
genres = ["Action", "Adventure", "Animation", "Children's", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", 
          "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western"]
genre_nodes = {genre: Node('Genre', name=genre) for genre in genres}

for g in genre_nodes.values():
    graph.create(g)

In [71]:
# Create Movie nodes
for _, row in movies.iterrows():
    if row['movieId'] <= num_movies:
        movie = Node("Movie", title=row['title'])
        graph.create( movie )
        
        # Create Movie->Genre relationships
        for genre in row['genres'].split('|'):
            if genre in genres:
                graph.create( Relationship( movie, "OF_GENRE", genre_nodes[genre]))

In [72]:
# Creeate User nodes
for _, row in ratings.iterrows():
    if row['userId'] <= num_users and row['movieId'] <= num_movies:
        
        # check if User node exists, and if not - create a new User node
        selector = NodeSelector(graph)
        users = list(selector.select("User", id=row['userId']))
        if len(users) == 0:
            user = Node("User", id=row['userId'])
            graph.create( user )
        else:
            user = users[0]
            
        # retrieve Movie title from movies dataframe
        title = movies[movies['movieId'] == row['movieId']]['title'].values[0]
        # retrieve Movie node
        movie = list(selector.select("Movie", title=title))[0]
        
        # create User->Movie relationship
        graph.create( Relationship( user, "RATED", movie, rating=row['rating']))

In [99]:
# How much does a specific User likes a specific Genre?
# Query to find out an average rating this specific User assigned to Movies having that Genre
user_to_genre_query = 
"""
MATCH (u:User)-[r:RATED]->(m:Movie)-[:OF_GENRE]->(g:Genre)
WHERE u.id = {user_id} AND g.name = {genre}
RETURN AVG(r.rating) AS avg_rating
"""

In [111]:
graph.run(user_to_genre_query, user_id=8, genre='Thriller').evaluate()

4.375