In [1]:
# :play intro-neo4j-exercises
# Exercise 3

In [2]:
# Imports
from neo4j import GraphDatabase
import pandas as pd

In [3]:
# Connection to Graph Database
user = 'neo4j'
pw = 'test'
uri = 'bolt://192.168.1.135:7687'
neo4j_driver = GraphDatabase.driver(uri, auth=(user, pw), encrypted = False)

In [4]:
# Same Database as Exercise 1
with open('./Exercise1Database.cypher', 'r') as f_read: 
  create_db_cypher = f_read.read()

with neo4j_driver.session() as session:
    for cypher in create_db_cypher.split(';'): # statements are split by ';'
        session.run(cypher)

Exercise 3.1: Display the schema of the database (Instructions)

You will use schema information to help you specify relationships in your queries.

Display the schema of the database.

In [5]:
# answer in the exercise is 'CALL db.schema' but that has been superseeded by CALL db.schema.visualization
# Using two other schema functions due to ease of viewing in python
# CALL db.schema.visualization works great in Neo4j Browser, but would needs some formatting to be useful here

cypher = 'CALL db.schema.relTypeProperties()'
with neo4j_driver.session() as session:
    result = session.run(cypher)
    for r in result:
        print(r)

cypher = 'CALL db.schema.nodeTypeProperties()'
with neo4j_driver.session() as session:
    result = session.run(cypher)
    for r in result:
        print(r)
        


<Record relType=':`ACTED_IN`' propertyName='roles' propertyTypes=['StringArray'] mandatory=True>
<Record relType=':`DIRECTED`' propertyName=None propertyTypes=None mandatory=False>
<Record relType=':`PRODUCED`' propertyName=None propertyTypes=None mandatory=False>
<Record relType=':`WROTE`' propertyName=None propertyTypes=None mandatory=False>
<Record relType=':`FOLLOWS`' propertyName=None propertyTypes=None mandatory=False>
<Record relType=':`REVIEWED`' propertyName='summary' propertyTypes=['String'] mandatory=True>
<Record relType=':`REVIEWED`' propertyName='rating' propertyTypes=['Long'] mandatory=True>
<Record nodeType=':`Person`' nodeLabels=['Person'] propertyName='born' propertyTypes=['Long'] mandatory=False>
<Record nodeType=':`Person`' nodeLabels=['Person'] propertyName='name' propertyTypes=['String'] mandatory=True>
<Record nodeType=':`Movie`' nodeLabels=['Movie'] propertyName='tagline' propertyTypes=['String'] mandatory=False>
<Record nodeType=':`Movie`' nodeLabels=['Movie'] 

Exercise 3.2: Retrieve all people who wrote the movie Speed Racer (Instructions)

Retrieve all people who wrote the movie Speed Racer.

In [20]:
# doing it as a function
def get_writers(title):
    writers = []
    cypher = 'MATCH (p:Person)-[:WROTE]->(:Movie {title: $title}) RETURN p.name'
    with neo4j_driver.session() as session:
        result = session.run(cypher, title=title)
        for r in result.value():
            writers.append(r)
    return writers        

In [21]:
get_writers('Speed Racer')

['Lana Wachowski', 'Lilly Wachowski']

Exercise 3.2: Retrieve all people who wrote the movie Speed Racer (Taking it further - optional)

Retrieve all people who have written other movies.

Retrieve people who have acted in a particular movie.

Retrieve people who have directed a particular movie.

In [6]:
# Retrieve all people who have written other movies.
def get_all_writers():
    writers = []
    cypher = 'MATCH (p:Person)-[:WROTE]->(:Movie) RETURN p.name'
    with neo4j_driver.session() as session:
        result = session.run(cypher)
        for r in result.value():
            writers.append(r)
    return writers      

# Retrieve people who have acted in a particular movie.
def get_actors(title):
    writers = []
    cypher = 'MATCH (p:Person)-[:ACTED_IN]->(:Movie {title: $title}) RETURN p.name'
    with neo4j_driver.session() as session:
        result = session.run(cypher, title=title)
        for r in result.value():
            writers.append(r)
    return writers      

# Retrieve people who have directed a particular movie.
def get_directors(title):
    writers = []
    cypher = 'MATCH (p:Person)-[:DIRECTED]->(:Movie {title: $title}) RETURN p.name'
    with neo4j_driver.session() as session:
        result = session.run(cypher, title=title)
        for r in result.value():
            writers.append(r)
    return writers        

In [7]:
writers = get_all_writers()
print(writers)
actors = get_actors('Speed Racer')
print(actors)
directors = get_directors('Speed Racer')
print(directors)

['Aaron Sorkin', 'Jim Cash', 'Cameron Crowe', 'Nora Ephron', 'David Mitchell', 'Lana Wachowski', 'Lilly Wachowski', 'Lana Wachowski', 'Lilly Wachowski', 'Nancy Meyers']
['Rain', 'Susan Sarandon', 'Christina Ricci', 'Matthew Fox', 'Emile Hirsch', 'Ben Miles', 'John Goodman']
['Lana Wachowski', 'Lilly Wachowski']


Exercise 3.3: Retrieve all movies that are connected to the person, Tom Hanks (Instructions)

Retrieve all movies connected with Tom Hanks.

Hint: Tom Hanks has multiple relationships with a movie so you should not specify a relationship type in the query.

In [8]:
def get_all_movies(name):
    movies = []
    cypher = 'MATCH (m:Movie)<--(:Person {name: $name}) RETURN m.title AS title' # node with varibal (m) first due to best practice
    with neo4j_driver.session() as session:
        result = session.run(cypher, name=name)
        for r in result:
            movies.append(r['title'])
    return movies     

In [9]:
get_all_movies('Tom Hanks')

['A League of Their Own',
 'Cloud Atlas',
 'The Da Vinci Code',
 'Sleepless in Seattle',
 'The Polar Express',
 'The Green Mile',
 'Cast Away',
 "Charlie Wilson's War",
 'That Thing You Do',
 'That Thing You Do',
 'Joe Versus the Volcano',
 'Apollo 13',
 "You've Got Mail"]

Exercise 3.3: Retrieve all movies that are connected to the person, Tom Hanks (Taking it further - optional)

Retrieve all movies connected with another actor.

Retrieve all people connected with a particular movie

In [6]:
best_movies = get_all_movies('Keanu Reeves')
print(best_movies)

['The Matrix Revolutions', 'The Matrix Reloaded', "Something's Gotta Give", "The Devil's Advocate", 'The Replacements', 'Johnny Mnemonic', 'The Matrix']


In [16]:
def get_all_people(title):
    people = []
    cypher = 'MATCH (p:Person)-->(:Movie {title: $title}) RETURN p.name as name'
    with neo4j_driver.session() as session:
        result = session.run(cypher, title=title)
        for r in result:
            people.append(r['name'])
    return people

In [17]:
get_all_people('The Matrix')

<Record name='Joel Silver'>
<Record name='Emil Eifrem'>
<Record name='Laurence Fishburne'>
<Record name='Lana Wachowski'>
<Record name='Hugo Weaving'>
<Record name='Lilly Wachowski'>
<Record name='Carrie-Anne Moss'>
<Record name='Keanu Reeves'>


['Joel Silver',
 'Emil Eifrem',
 'Laurence Fishburne',
 'Lana Wachowski',
 'Hugo Weaving',
 'Lilly Wachowski',
 'Carrie-Anne Moss',
 'Keanu Reeves']

Exercise 3.4: Retrieve information about the relationships Tom Hanks has with the set of movies retrieved earlier (Instructions)

Modify the query that you just executed to return the type information about the relationships between Tom Hanks and the movies.

In [37]:
def get_all_movies_and_rel(name):
    movies = []
    # note cypher uses two way relationship ()-[]-() even though results in this data set are one way ()<-[]-()
    cypher = 'MATCH (m:Movie)-[rel]-(:Person {name: $name}) RETURN m.title AS title, rel' # node with varibal (m) first due to best practice
    with neo4j_driver.session() as session:
        result = session.run(cypher, name=name)
        for r in result:
            movies.append({'title': r['title'], 'position': r['rel'].type})
    return movies     

In [38]:
get_all_movies_and_rel('Tom Hanks')

[{'title': 'A League of Their Own', 'position': 'ACTED_IN'},
 {'title': 'Cloud Atlas', 'position': 'ACTED_IN'},
 {'title': 'The Da Vinci Code', 'position': 'ACTED_IN'},
 {'title': 'Sleepless in Seattle', 'position': 'ACTED_IN'},
 {'title': 'The Polar Express', 'position': 'ACTED_IN'},
 {'title': 'The Green Mile', 'position': 'ACTED_IN'},
 {'title': 'Cast Away', 'position': 'ACTED_IN'},
 {'title': "Charlie Wilson's War", 'position': 'ACTED_IN'},
 {'title': 'That Thing You Do', 'position': 'ACTED_IN'},
 {'title': 'That Thing You Do', 'position': 'DIRECTED'},
 {'title': 'Joe Versus the Volcano', 'position': 'ACTED_IN'},
 {'title': 'Apollo 13', 'position': 'ACTED_IN'},
 {'title': "You've Got Mail", 'position': 'ACTED_IN'}]

Exercise 3.4: Retrieve information about the relationships Tom Hanks has with the set of movies retrieved earlier (Taking it further - optional)

Retrieve the relationship information about a different actor.

In [39]:
get_all_movies_and_rel('Keanu Reeves')

[{'title': 'The Matrix Revolutions', 'position': 'ACTED_IN'},
 {'title': 'The Matrix Reloaded', 'position': 'ACTED_IN'},
 {'title': "Something's Gotta Give", 'position': 'ACTED_IN'},
 {'title': "The Devil's Advocate", 'position': 'ACTED_IN'},
 {'title': 'The Replacements', 'position': 'ACTED_IN'},
 {'title': 'Johnny Mnemonic', 'position': 'ACTED_IN'},
 {'title': 'The Matrix', 'position': 'ACTED_IN'}]

Exercise 3.5: Retrieve information about the roles that Tom Hanks acted in (Instructions)

As an actor, a Person node in the database connects to a Movie node using the ACTED_IN relationship. One of the properties of the ACTED_IN relationship is roles.

Retrieve information about the roles that Tom Hanks played.

In [44]:
def get_roles(name):
    roles = []
    cypher = 'MATCH (m:Movie)<-[rel:ACTED_IN]-(:Person {name: $name}) return m.title AS title, rel.roles AS roles'
    with neo4j_driver.session() as session:
        results = session.run(cypher, name=name)
        for r in results:
            roles.append({'title': r['title'], 'roles': r['roles']})
    return roles

In [45]:
get_roles('Tom Hanks')

[{'title': 'A League of Their Own', 'roles': ['Jimmy Dugan']},
 {'title': 'Cloud Atlas',
  'roles': ['Zachry', 'Dr. Henry Goose', 'Isaac Sachs', 'Dermot Hoggins']},
 {'title': 'The Da Vinci Code', 'roles': ['Dr. Robert Langdon']},
 {'title': 'Sleepless in Seattle', 'roles': ['Sam Baldwin']},
 {'title': 'The Polar Express',
  'roles': ['Hero Boy',
   'Father',
   'Conductor',
   'Hobo',
   'Scrooge',
   'Santa Claus']},
 {'title': 'The Green Mile', 'roles': ['Paul Edgecomb']},
 {'title': 'Cast Away', 'roles': ['Chuck Noland']},
 {'title': "Charlie Wilson's War", 'roles': ['Rep. Charlie Wilson']},
 {'title': 'That Thing You Do', 'roles': ['Mr. White']},
 {'title': 'Joe Versus the Volcano', 'roles': ['Joe Banks']},
 {'title': 'Apollo 13', 'roles': ['Jim Lovell']},
 {'title': "You've Got Mail", 'roles': ['Joe Fox']}]

Exercise 3.5: Retrieve information about the roles that Tom Hanks acted in (Taking it further - optional)

Retrieve all roles for a different actor.

Retrieve all roles played for a particular movie.

In [46]:
get_roles('Keanu Reeves')

[{'title': 'The Matrix Revolutions', 'roles': ['Neo']},
 {'title': 'The Matrix Reloaded', 'roles': ['Neo']},
 {'title': "Something's Gotta Give", 'roles': ['Julian Mercer']},
 {'title': "The Devil's Advocate", 'roles': ['Kevin Lomax']},
 {'title': 'The Replacements', 'roles': ['Shane Falco']},
 {'title': 'Johnny Mnemonic', 'roles': ['Johnny Mnemonic']},
 {'title': 'The Matrix', 'roles': ['Neo']}]

In [47]:
def get_roles_in_movie(title):
    roles = []
    cypher = 'MATCH (p:Person)-[rel:ACTED_IN]->(:Movie {title: $title}) return p.name AS name, rel.roles AS roles'
    with neo4j_driver.session() as session:
        results = session.run(cypher, title=title)
        for r in results:
            roles.append({'name': r['name'], 'roles': r['roles']})
    return roles

In [48]:
get_roles_in_movie('The Matrix')

[{'name': 'Emil Eifrem', 'roles': ['Emil']},
 {'name': 'Laurence Fishburne', 'roles': ['Morpheus']},
 {'name': 'Hugo Weaving', 'roles': ['Agent Smith']},
 {'name': 'Carrie-Anne Moss', 'roles': ['Trinity']},
 {'name': 'Keanu Reeves', 'roles': ['Neo']}]