<br>
<font size=18>Lecture 7 - Graph Databases</font>
   
    
# Load required modules

In [None]:
import sys
#!{sys.executable} -m pip install names
#!{sys.executable} -m pip install neo4j
#!{sys.executable} -m pip install py2neo

In [None]:
import psycopg2  # handles database connection to postgres db
import pandas.io.sql as sqlio  # we'll use this for improved query presentation
import names
import numpy as np
import time
import neo4j
import py2neo

# Connect to the relational database

In [None]:
# to be deleted before push!
# GET THE CONNECTION OBJECT (SEE LECTURE 1)
conn = psycopg2.connect("XXX")
conn

# Create new tables 'person' and 'friendship'

In [None]:
sql = """CREATE TABLE person (
    user_id integer primary key,
    name varchar(50)
)
"""

cur = conn.cursor()
try:
    cur.execute(sql)
except Exception as e:
    print("Error creating the table: {}".format(e))

conn.commit()

In [None]:
sql = """
CREATE TABLE friendship (
    user_id1 integer references person(user_id),
    user_id2 integer references person(user_id),
    primary key (user_id1, user_id2)
)
"""

cur = conn.cursor()
try:
    cur.execute(sql)
except Exception as e:
    print("Error creating the table: {}".format(e))

conn.commit()

### Delete table contents (for later use)

In [None]:
sql = "DELETE FROM friendship"

cur = conn.cursor()
try:
    pass
    cur.execute(sql)
except Exception as e:
    print("Error deleting table: {}".format(e))

conn.commit()

sql = "DELETE FROM person"

cur = conn.cursor()
try:
    pass
    cur.execute(sql)
except Exception as e:
    print("Error deleting table: {}".format(e))

conn.commit()

# Insert persons

In [None]:
n = 20  # number of persons in the person database
no_of_friends = 2  #  number of friendship relationships to be generated per person

In [None]:
# we can get random names with this method
names.get_full_name()

In [None]:
sql = "INSERT INTO person VALUES "


for i in range(n):
    sql += "({}, '{}')".format(i, names.get_full_name())
    if i < n - 1:
        sql += ",\n"

cur = conn.cursor()
try:
    pass
    cur.execute(sql)
except Exception as e:
    print("Error inserting into table: {}".format(e))

conn.commit()

In [None]:
# Select all elements from table kunde
sql = """
SELECT * FROM person LIMIT 10
"""

dat = sqlio.read_sql_query(sql, conn)
dat

# Insert friendship relationships

In [None]:
sql = "INSERT INTO friendship VALUES "

for i in range(n):
    friends = np.random.choice(range(n), size=no_of_friends, replace=False)
    friends = np.delete(friends, np.where(friends == i))  # delete friendships to the same person
    
    for j, friend_no in enumerate(friends):
        sql += "({}, {})".format(i, friend_no)
        if j < no_of_friends - 1 or i < n - 1:
            sql += ",\n"
            
            
cur = conn.cursor()
try:
    pass
    cur.execute(sql)
except Exception as e:
    print("Error inserting into table: {}".format(e))

conn.commit()

In [None]:
# Select all elements from table kunde
sql = """
SELECT * FROM friendship LIMIT 100
"""

dat = sqlio.read_sql_query(sql, conn)
dat

## Get all friends with degree 4 for person 0

In [None]:
user_from = 16  # starting user

sql = """select p1.name as from_person_name, f1.user_id1 as from_person, f1.user_id2 as intermediate_person_1, f2.user_id2 as intermediate_person_2, f3.user_id2 as intermediate_person, f4.user_id2 as final_person, p4.name as final_person_name
from friendship f1 
inner join friendship f2 on f1.user_id2 = f2.user_id1
inner join friendship f3 on f2.user_id2 = f3.user_id1
inner join friendship f4 on f3.user_id2 = f4.user_id1
inner join person p1 on f1.user_id1 = p1.user_id
inner join person p4 on f4.user_id2 = p4.user_id
where f1.user_id1 = {0} and f2.user_id1 <> {0} and f3.user_id1 <> {0} and f4.user_id1 <> {0} and f4.user_id2 <> {0}
and f3.user_id1 <> f2.user_id1 and f4.user_id1 <> f2.user_id1
and f4.user_id1 <> f3.user_id1
and (f2.user_id1={1} or f3.user_id1={1} or f4.user_id1={1} or f4.user_id2={1})""".format(user_from, 11)

dat = sqlio.read_sql_query(sql, conn)
dat

In [None]:
user_from = 16  # starting user

sql = """select p1.name as from_person_name, f1.user_id1 as from_person, 
f1.user_id2 as to_person, p2.name as final_person_name
from friendship f1 
inner join person p1 on f1.user_id1 = p1.user_id
inner join person p2 on f1.user_id2 = p2.user_id
where f1.user_id1={}
""".format(user_from)

dat = sqlio.read_sql_query(sql, conn)
dat

# Neo4j Graph Database

In [None]:
from py2neo import Database, Graph, Node, Relationship
g = Graph("XXX", auth=("XXX", "XXX"))

## Example Insertion of a Relationship

In [None]:
a = Node("Person", name="Alice", age=33)
b = Node("Person", name="Bob", age=44)
KNOWS = Relationship.type("KNOWS")
g.merge(KNOWS(a, b), "Person", "name")

## Insertion of the Social Network

In [None]:
n = 20  # number of persons in the person database
no_of_friends = 2  #  number of friendship relationships to be generated per person

# insert persons
persons = []
for i in range(n):
    new_person_node = Node("Person", user_id=i, name="{}".format(names.get_full_name()))
    persons.append(new_person_node)
    
for i in range(n):
    friends = np.random.choice(range(n), size=no_of_friends, replace=False)
    friends = np.delete(friends, np.where(friends == i))
    
    for friend_index in friends:
        KNOWS = Relationship.type("KNOWS")
        g.merge(KNOWS(persons[i], persons[friend_index]), "Person", "user_id")

In [None]:
persons

## Example Query Using "py2neo"

In [None]:
for rel in g.match((persons[1], ), r_type="KNOWS"):
    print(rel.end_node["name"])

## Queries Using the neo4j package and neo4j-native queries
### Connect to the Cloud Database

In [None]:
from neo4j import GraphDatabase

uri = "XXX"
driver = GraphDatabase.driver(uri, auth=("XXX", "XXX"))

### Query all relations between two persons

In [None]:
from pprint import pprint
with driver.session() as session:
    result = session.run("MATCH (start:Person {name : 'Joan Phelps'})-[f:KNOWS*3]-(end:Person {name : 'Douglas Gokey'})" +
                         "RETURN start,end,f")
    pprint(result.data())

### Get the shortest path between two persons
This is not easily implementable with standard SQL.
<br>Please change the start and end persons' names to your data.

In [None]:
with driver.session() as session:
    result = session.run("MATCH (start:Person {name : 'Joan Phelps'}), (end:Person {name : 'Douglas Gokey'}), " +
                         "p = shortestPath((start)-[:KNOWS*..15]-(end)) " + 
                         "RETURN p")
    print(result.data())