## Graph Database

1. install neo4j Desktop from https://neo4j.com/download/
2. pip install neo4j
3. pip install py2neo

**Query a graph database using neo4j package for python**

In [2]:
from neo4j import GraphDatabase
import pandas as pd

In [3]:
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j",""))

In [18]:
## Which Employee had the Highest Cross-Selling Count of ‘Chocolade’ and Which Product?
with driver.session() as session:
    result=session.run("""MATCH (choc:Product {productName:'Chocolade'})
    <-[:PRODUCT]-(:Order)<-[:SOLD]-(employee),
     (employee)-[:SOLD]->(o2)-[:PRODUCT]->(other:Product)
RETURN employee.employeeID, other.productName, count(distinct o2) as count
ORDER BY count DESC
LIMIT 5;""")

df = pd.DataFrame([r.values() for r in result], columns=result.keys())
df


Unnamed: 0,employee.employeeID,other.productName,count
0,4,Gnocchi di nonna Alice,14
1,4,Pâté chinois,12
2,1,Flotemysost,12
3,3,Gumbär Gummibärchen,12
4,1,Pavlova,11


In [12]:
# How are Employees Organized? Who Reports to Whom?
with driver.session() as session:
    result=session.run("""MATCH path = (e:Employee)<-[:REPORTS_TO]-(sub)
RETURN e.employeeID AS manager, sub.employeeID AS employee;""")

df = pd.DataFrame([r.values() for r in result], columns=result.keys())
df


# SQL
#SELECT e2.employee_id AS ManagerID, e1.employee_id AS EmployeeID
#FROM employees e1
#INNER JOIN employees e2
#ON e1.reports_to = e2.employee_id
#order by e2.employee_id

Unnamed: 0,manager,employee
0,2,8
1,2,4
2,2,3
3,2,1
4,2,5
5,5,7
6,5,9
7,5,6


In [13]:
# Which Employees Report to Each Other Indirectly?
with driver.session() as session:
    result=session.run("""MATCH path = (e:Employee)<-[:REPORTS_TO*]-(sub)
WITH e, sub, [person in NODES(path) | person.employeeID][1..-1] AS path
RETURN e.employeeID AS manager, sub.employeeID AS employee, CASE WHEN LENGTH(path) = 0 THEN "Direct Report" ELSE path END AS via
ORDER BY LENGTH(path);""")

df = pd.DataFrame([r.values() for r in result], columns=result.keys())
df


Unnamed: 0,manager,employee,via
0,2,5,Direct Report
1,2,1,Direct Report
2,2,3,Direct Report
3,2,4,Direct Report
4,2,8,Direct Report
5,5,6,Direct Report
6,5,9,Direct Report
7,5,7,Direct Report
8,2,6,[5]
9,2,9,[5]


In [14]:
#How Many Orders were Made by Each Part of the Hierarchy?
with driver.session() as session:
    result=session.run("""MATCH (e:Employee)
OPTIONAL MATCH (e)<-[:REPORTS_TO*0..]-(sub)-[:SOLD]->(order)
RETURN e.employeeID, [x IN COLLECT(DISTINCT sub.employeeID) WHERE x <> e.employeeID] AS reports, COUNT(distinct order) AS totalOrders
ORDER BY totalOrders DESC;""")

df = pd.DataFrame([r.values() for r in result], columns=result.keys())
df

Unnamed: 0,e.employeeID,reports,totalOrders
0,2,"[5, 6, 9, 7, 1, 3, 4, 8]",830
1,5,"[6, 9, 7]",224
2,4,[],156
3,3,[],127
4,1,[],123
5,8,[],104
6,7,[],72
7,6,[],67
8,9,[],43


In [27]:
# Find who bought Chocolade
with driver.session() as session:
    result=session.run("""MATCH (:Product {productName:"Chocolade"})
    <-[:PRODUCT]-(:Order)<-[:PURCHASED]-(c:Customer)
    RETURN DISTINCT c.companyName as Company""")

df = pd.DataFrame([r.values() for r in result], columns=result.keys())
df


# SQL
#Select distinct c.company_name 
#from customers as c
#JOIN orders as o on c.customer_id = o.customer_id
#JOIN order_details as od on o.order_id = od.order_id
#JOIN products as p on (od.product_id = p.product_id)
#where p.product_name ='Chocolade'

Unnamed: 0,Company
0,Victuailles en stock
1,Antonio Moreno Taquería
2,Around the Horn
3,Ernst Handel
4,Queen Cozinha
5,Furia Bacalhau e Frutos do Mar


In [29]:
# What have I bought and paid in total?

with driver.session() as session:
    result=session.run("""MATCH (c:Customer {companyName:"Drachenblut Delikatessen"})
    OPTIONAL MATCH (p:Product) <- [pu:PRODUCT]-(:Order)<-[:PURCHASED]-(c)
    RETURN p.productName, sum(pu.unitPrice * pu.quantity) as volume
    order by volume desc""")

df = pd.DataFrame([r.values() for r in result], columns=result.keys())
df


# SQL
#SELECT p.product_name, SUM(od.unit_price * od.quantity) as Volume
#from customers as c
#Left outer join orders as o on c.customer_id = o.customer_id
#left outer join order_details as od on o.order_id = od.order_id
#left outer join products as p on od.product_id = p.product_id
#where c.company_name = 'Drachenblut Delikatessen'
#group by p.product_name
#order by Volume desc

Unnamed: 0,p.productName,volume
0,Raclette Courdavault,1650.0
1,Perth Pasties,656.0
2,Queso Cabrales,420.0
3,Gumbär Gummibärchen,374.76
4,Gorgonzola Telino,200.0
5,Lakkalikööri,172.8
6,Konbu,128.4
7,Jack's New England Clam Chowder,86.85
8,Rhönbräu Klosterbier,74.4


## Create a graph from scratch in a database

In [36]:
from py2neo import Graph
graph = Graph(password='<password>')

In [37]:
graph.delete_all()

In [38]:
from py2neo import Node

nicole = Node("Person", name="Nicole", age=24)
drew = Node("Person", name="Drew", age=20)

mtdew = Node("Drink", name="Mountain Dew", calories=9000)
cokezero = Node("Drink", name="Coke Zero", calories=0)

coke = Node("Manufacturer", name="Coca Cola")
pepsi = Node("Manufacturer", name="Pepsi")

graph.create(nicole | drew | mtdew | cokezero | coke | pepsi)

In [35]:
#from scripts.vis import draw
options = {"Person": "name", "Drink": "name", "Manufacturer": "name"}
#draw(graph, options)

In [39]:
from py2neo import Relationship
graph.create(Relationship(nicole, "LIKES", cokezero))
graph.create(Relationship(nicole, "LIKES", mtdew))
graph.create(Relationship(drew, "LIKES", mtdew))
graph.create(Relationship(coke, "MAKES", cokezero))
graph.create(Relationship(pepsi, "MAKES", mtdew))

In [41]:
query = """
MATCH (person:Person)-[:LIKES]->(drink:Drink)
RETURN person.name AS name, drink.name AS drink
"""

result = graph.run(query)

df = pd.DataFrame([r.values() for r in result], columns=result.keys())
df

Unnamed: 0,name,drink
0,Drew,Mountain Dew
1,Nicole,Mountain Dew
2,Nicole,Coke Zero


In [45]:
query = """
MATCH (p:Person)-[:LIKES]->(drink:Drink)
WHERE p.name = {name}
RETURN p.name AS name, AVG(drink.calories) AS avg_calories
"""

result = graph.run(query, name="Nicole")

df = pd.DataFrame([r.values() for r in result], columns=result.keys())
df

Unnamed: 0,name,avg_calories
0,Nicole,4500.0


In [46]:
query = """
MATCH (person:Person)-[:LIKES]->(drink:Drink)
RETURN person.name, drink.name, drink.calories
"""

result = graph.run(query)

df = pd.DataFrame([r.values() for r in result], columns=result.keys())
df

Unnamed: 0,person.name,drink.name,drink.calories
0,Nicole,Mountain Dew,9000
1,Nicole,Coke Zero,0
2,Drew,Mountain Dew,9000
