Workshop
Below are queries on a sample IMDB Movie dataset that you will use to load and query data to/from Neo4j
//how many rows in all?
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/priya-jacob/pinkprogramming/main/movies.csv' AS row
RETURN COUNT(row)
//what does a row contain?
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/priya-jacob/pinkprogramming/main/movies.csv' AS row
RETURN row
LIMIT 1
//any missing values?
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/priya-jacob/pinkprogramming/main/movies.csv' AS row
WITH row
WHERE row.gross = "null"
RETURN COUNT(row)
//lock & load - create constraints!
//earlier than neo4j ver 4.4 syntax
CREATE CONSTRAINT c_movie_title IF NOT EXISTS ON (m:Movie) ASSERT m.title IS UNIQUE;
CREATE CONSTRAINT c_movie_genre IF NOT EXISTS ON (g:Genre) ASSERT g.genre IS UNIQUE;
CREATE CONSTRAINT c_person_name IF NOT EXISTS ON (p:Person) ASSERT p.name IS UNIQUE;
//revised neo4j ver 4.4 syntax
CREATE CONSTRAINT c_movie_title IF NOT EXISTS FOR (n:Movie) REQUIRE n.title IS UNIQUE;
CREATE CONSTRAINT c_movie_genre IF NOT EXISTS FOR (n:Genre) REQUIRE n.genre IS UNIQUE;
CREATE CONSTRAINT c_person_name IF NOT EXISTS FOR (n:Person) REQUIRE n.name IS UNIQUE;
//lock & load - create nodes & relationships
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/priya-jacob/pinkprogramming/main/movies.csv' AS row
WITH row
MERGE (n:Movie {title: row.title})
SET
n.year = row.year,
n.summary = row.summary,
n.runtime = toInteger(row.runtime),
n.certificate = row.certificate,
n.rating = toFloat(row.rating),
n.votes = toInteger(row.votes),
n.gross = toFloat(row.gross)
FOREACH (x IN split(row.genre, ',') | MERGE (g:Genre {genre: trim(x)}) MERGE (n)-[:GENRE]->(g))
FOREACH (x IN split(row.actors, ',') | MERGE (p:Person {name: trim(x)}) MERGE (p)-[:ACTOR]->(n))
FOREACH (x IN split(row.directors, ',') | MERGE (p:Person {name: trim(x)}) MERGE (p)-[:DIRECTOR]->(n))
//check graph model
CALL db.schema.visualization()
//check graph model - alternative method
CALL apoc.meta.graph()
//check schema - constraints
CALL db.constraints()
//check schema - indexes
CALL db.indexes()
//check graph data statistics
CALL apoc.meta.stats()
//fetch all Person nodes
MATCH (n:Person)
RETURN n
//fetch all Movie nodes
MATCH (n:Movie)
RETURN n
//look for Tom Hanks
MATCH (n:Person {name:'Tom Hanks'})
RETURN n
//look for Tom Hanks - any difference between the two queries here?
MATCH (n:Person)
WHERE n.name = 'Tom Hanks'
RETURN n
//and one more with the new neo4j 4.4 cypher syntax!
MATCH (n:Person WHERE n.name = 'Tom Hanks')
RETURN n
//try string conditional filtering
MATCH (n:Movie)
WHERE n.title CONTAINS 'Knight'
RETURN n
//how many called Tom?
MATCH (n:Person)
WHERE n.name STARTS WITH 'Tom'
RETURN n
//did Tom act with Tom? notice something strange?
MATCH (p1:Person)-[a1:ACTOR]->(m:Movie)<-[a2:ACTOR]-(p2:Person)
WHERE p1.name STARTS WITH 'Tom'
AND p2.name STARTS WITH 'Tom'
RETURN p1.name, p2.name, m.title;
//break the symmetry!
MATCH (p1:Person)-[a1:ACTOR]->(m:Movie)<-[a2:ACTOR]-(p2:Person)
WHERE p1.name STARTS WITH 'Tom'
AND p2.name STARTS WITH 'Tom'
AND ID(p1) < ID(p2)
RETURN p1.name, p2.name, m.title;
//fetch all actors and their movies
MATCH p=(:Person)-[:ACTOR]->(:Movie)
RETURN p
//fetch all actors &/OR directors and their movies
MATCH p=(:Person)-[:ACTOR|DIRECTOR]->(:Movie)
RETURN p
//fetch those acted-in & directed a movie
MATCH p=(n:Person)-[:ACTOR]->(m:Movie)<-[:DIRECTOR]-(n)
RETURN p
//fetch those acted-in & directed a movie with their information
MATCH (n:Person)-[:ACTOR]->(m:Movie)<-[:DIRECTOR]-(n)
RETURN n.name AS person, COUNT(m) AS cnt, COLLECT(m.title) AS movies
ORDER BY cnt DESC
//movies with the most directors
MATCH (n:Movie)
WITH n, size(()-[:DIRECTOR]->(n)) AS cnt
WHERE cnt > 1
RETURN n.title AS movie, [(p)-[:DIRECTOR]->(n)|p.name] AS directors
ORDER BY cnt DESC
//top actors by #movies
MATCH (a:Person)-[:ACTOR]->(m:Movie)
WITH a, COLLECT(m.title) AS movies
WHERE size(movies) > 1
RETURN a.name AS actor, movies, size(movies) AS cnt
ORDER BY cnt DESC
//who’ve worked together the most?
MATCH (actor:Person)-[:ACTOR]->(m)<-[:ACTOR]-(coactor:Person)
WHERE ID(actor) < ID(coactor)
WITH actor, coactor, COUNT(m) AS cnt, COLLECT(m.title) AS movies
WHERE cnt > 1
RETURN actor.name, coactor.name, movies, cnt
ORDER BY cnt DESC
LIMIT 5
//who must Tom act with? (a recommendation!)
MATCH (tom:Person {name: "Tom Hanks"})-[:ACTOR]->(m:Movie)<-[:ACTOR]-(coActors:Person)-[:ACTOR]->(oth:Movie)<-[:ACTOR]-(cocoActors:Person)
WHERE NOT EXISTS {(tom)-[:ACTOR]->()<-[:ACTOR]-(cocoActors)}
AND tom <> cocoActors
RETURN cocoActors.name AS recommended, collect(DISTINCT coActors.name) AS coactors, COUNT(DISTINCT coActors.name) AS strength
ORDER BY strength DESC;
//how to get to Keanu Reeves from Tom Hanks?
MATCH path=shortestPath((:Person {name:'Tom Hanks'})-[:ACTOR|DIRECTOR*]-(:Person {name:'Keanu Reeves'}))
RETURN path
//or from Brad Pitt to Clint Eastwood? - feel free to find connections between your favorite pair of actors!
MATCH p=shortestPath((:Person {name:'Brad Pitt'})-[:ACTOR|DIRECTOR*]-(:Person {name:'Clint Eastwood'}))
RETURN p
//Tom Hanks’ connections
MATCH (tom:Person {name:"Tom Hanks"})
MATCH (oth:Person)
WHERE tom <> oth
WITH tom, oth, shortestPath((tom)-[:ACTOR|DIRECTOR*..]-(oth)) AS path
WHERE length(path) > 0
WITH tom, oth, length(path) AS hops
ORDER BY hops ASC
RETURN hops/2 AS level, COLLECT(oth.name) AS connections
//Tom Hanks’ connections & mutual connections
MATCH (tom:Person {name:"Tom Hanks"})
MATCH (oth:Person)
WHERE tom <> oth
WITH tom, oth, shortestPath((tom)-[:ACTOR|DIRECTOR*]-(oth)) AS path
WHERE length(path) > 0
WITH tom, oth, length(path) AS hops,
[x IN NODES(path) WHERE x <> tom AND x <> oth AND LABELS(x)=['Person'] |x.name] AS mutualConnections
RETURN oth.name AS connection, hops/2 AS level, mutualConnections
ORDER BY level ASC
//best years for movie-buffs?
MATCH (n:Movie)
RETURN n.year AS year, COUNT(n) AS cnt, AVG(n.rating) AS avgRating, MAX(n.votes) AS maxVotes, COLLECT(n.title)[..5] AS sampleTitles
ORDER BY cnt DESC, avgRating DESC
//to aid with node rule styling for neo4j bloom
MATCH (n:Movie)
RETURN min(n.gross), avg(n.gross), max(n.gross), percentileCont(n.gross, 0.5), percentileDisc(n.gross, 0.5)
MATCH (n:Movie)
RETURN min(n.votes), avg(n.votes), max(n.votes), percentileCont(n.votes, 0.5), percentileDisc(n.votes, 0.5)
MATCH (n:Movie)
RETURN min(n.rating), avg(n.rating), max(n.rating), percentileCont(n.rating, 0.5), percentileDisc(n.rating, 0.5)
//if you wish to delete what we just loaded!
//delete relationships first followed by nodes!
MATCH (:Person)-[r:ACTOR]->(:Movie)
DELETE r;
MATCH (:Person)-[r:DIRECTOR]->(:Movie)
DELETE r;
MATCH (:Movie)-[r:GENRE]->(:Genre)
DELETE r;
MATCH (n:Genre)
DELETE n;
MATCH (n:Person)
DELETE n;
MATCH (n:Movie)
DELETE n;
//or choose to delete everything in one go - use with caution - this will delete **everything** from your database!
MATCH (n)
DETACH DELETE n