# Basic Graph Queries with ArangoDB

<a href="https://colab.research.google.com/github/joerg84/Graph_Powered_ML_Workshop/blob/master/Graphs_Queries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In this notebook we explore some basic graph queries using [ArangoDB](https://www.arangodb.com), including simple traversals and shortest path queries. 

The first step is to setup our environment:

In [None]:
%%capture
!git clone https://github.com/joerg84/Graph_Powered_ML_Workshop.git
!rsync -av Graph_Powered_ML_Workshop/ ./ --exclude=.git
!pip3 install pyarango

In [None]:
import json
import requests
import sys
import oasis

from pyArango.connection import *
from pyArango.collection import Collection, Edges, Field
from pyArango.graph import Graph, EdgeDefinition
from pyArango.collection import BulkOperation as BulkOperation

Next, create a temporary database instance backed by ArangoDB's Managed Cloud Service Oasis:

In [None]:
# Retrieve tmp credentials from ArangoDB Tutorial Service
login = oasis.getTempCredentials()

# Connect to the temp database
conn = oasis.connect(login)
db = conn[login["dbName"]] 

In [None]:
print("https://"+login["hostname"]+":"+str(login["port"]))
print("Username: " + login["username"])
print("Password: " + login["password"])
print("Database: " + login["dbName"])

# First Graph

Recall

```
@prefix : <http://www.snee.com/ns/demo#> .
:Jane :hasParent :Gene .
:Gene :hasParent :Pat ;
      :gender    :female .
:Joan :hasParent :Pat ;
      :gender    :female .
:Pat  :gender    :male .
:Mike :hasParent :Joan .
```



In [None]:
from pyArango.collection import Collection, Field
from pyArango.graph import Graph, EdgeDefinition

class Person(Collection):
    _fields = {
        "gender" : Field(),
    }

class Parent(Edges):
    _fields = {}

class Family(Graph) :
    _edgeDefinitions = [EdgeDefinition("Parent", fromCollections=["Person"], toCollections=["Person"])]
 
db.createCollection("Person")
db.createCollection("Parent")
# same for the graph
familyGraph = db.createGraph("Family")



# creating some person
p1 = familyGraph.createVertex('Person', {"_key": "Jane"})
p2 = familyGraph.createVertex('Person', {"_key": "Gene", "gender" : "female"})
p3 = familyGraph.createVertex('Person', {"_key": "Joan", "gender" : "female"})
p4 = familyGraph.createVertex('Person', {"_key": "Pat", "gender" : "male"})
p5 = familyGraph.createVertex('Person', {"_key": "Mike"})

# linking them
e1 = familyGraph.link('Parent', p1, p2, {})
e2 = familyGraph.link('Parent', p2, p4, {})
e3 = familyGraph.link('Parent', p3, p4, {})
e3 = familyGraph.link('Parent', p5, p3, {})

# Train Network

Lets define a structure for a simple train network.

![trainNetwork](https://github.com/joerg84/Graph_Powered_ML_Workshop/blob/master/img/train_network.png?raw=1)

In [None]:
from pyArango.collection import Collection, Field
from pyArango.graph import Graph, EdgeDefinition

class Cities(Collection):
    _fields = {
        "country": Field()
    }

class Connection(Edges):
    _fields = {
        "travel_time": Field()
    }

class RailNetwork(Graph) :
    _edgeDefinitions = [EdgeDefinition("Connection", fromCollections=["Cities"], toCollections=["Cities"])]
    _orphanedCollections = []

db.createCollection("Cities")
db.createCollection("Connection")
# same for the graph
railNetworkGraph = db.createGraph("RailNetwork", replicationFactor=3)

# creating some documents
h1 = railNetworkGraph.createVertex('Cities', {"_key": "Berlin", "country" : "Germany"})
h2 = railNetworkGraph.createVertex('Cities', {"_key": "Boston", "country" : "USA"})

# linking them
e1 = railNetworkGraph.link('Connection', h1, h2, {"distance_km": 6077})

# Lookup berlin vertex
berlin = db["Cities"]["Berlin"] 

# As we unfortunately cannot travel from Berlin to Boston by train....
railNetworkGraph.unlink('Connection',berlin, h2)

Next let us add more cities and connections: 

In [None]:

cities = [
    "Inverness",
    "Aberdeen",
    "Leuchars",
    "StAndrews",
    "Edinburgh",
    "Glasgow",
    "York",
    "Cologne",
    "Carlisle",
    "Birmingham",
    "London",
    "Brussels",
    "Toronto",
    "Winnipeg",
    "Saskatoon",
    "Edmonton",
    "Jasper",
    "Vancouver"
  ];

connections = [
    ( "Inverness", "Aberdeen", 3, 2.5 ),
    ( "Aberdeen", "Leuchars", 1.5, 1 ),
    ( "Leuchars", "Edinburgh", 1.5, 3 ),
    ( "Edinburgh", "Glasgow", 1, 1 ),
    ( "Edinburgh", "York", 3.5, 4 ),
    ( "Glasgow", "Carlisle", 1, 1 ),
    ( "Carlisle", "York", 2.5, 3.5 ),
    ( "Carlisle", "Birmingham", 2.0, 1 ),
    ( "Birmingham", "London", 1.5, 2.5 ),
    ( "Leuchars", "StAndrews", 0.2, 0.2 ),
    ( "York", "London", 1.8, 2.0 ),
    ( "London", "Brussels", 2.5, 3.5 ),
    ( "Brussels", "Cologne", 2, 1.5 ),
    ( "Toronto", "Winnipeg", 36, 35 ),
    ( "Winnipeg", "Saskatoon", 12, 5 ),
    ( "Saskatoon", "Edmonton", 12, 17 ),
    ( "Edmonton", "Jasper", 6, 5 ),
    ( "Jasper", "Vancouver", 12, 13 )
]

for city in cities:
    railNetworkGraph.createVertex('Cities', {"_key": city})

    
for city1, city2, time1, time2  in connections:
        # Note this is a directional graph 
        railNetworkGraph.link('Connection', db["Cities"][city1], db["Cities"][city2], {"travel_time": time1})
        railNetworkGraph.link('Connection', db["Cities"][city2], db["Cities"][city1], {"travel_time": time2})


We can login into the database using the above login credentials and view the RailNetwork graph from the Graphs tab.

![trainNetwork](https://github.com/joerg84/Graph_Powered_ML_Workshop/blob/master/img/arango_train_graph.png?raw=1)

While we are here, to understand the underlying format let us also visit the collections tab.

![trainNetwork](https://github.com/joerg84/Graph_Powered_ML_Workshop/blob/master/img/arango_collections.png?raw=1)

Finally: Our first Graph Traversal

In [None]:
reachabilty_query = """WITH Cities
FOR vertex, edge, path
  IN 1..5 
  OUTBOUND 'Cities/London'
  GRAPH 'RailNetwork'
  FILTER SUM(path.edges[*].travel_time) < 5
  return 
  { 'city': vertex._key,
    'path': CONCAT_SEPARATOR(" -> ", path.edges[*]._to)
  }"""

queryResult = db.AQLQuery(reachabilty_query, rawResults=True)
for result in queryResult:
    print("city: " + result["city"])
    print("path: Cities/London -> " + result["path"])
    print()

Next: Shortest Path

In [None]:
shortest_path_query = """FOR p IN OUTBOUND K_SHORTEST_PATHS 'Cities/Aberdeen' TO 'Cities/London'
  GRAPH 'RailNetwork'
      LIMIT 1
      RETURN {
          places: p.vertices[*]._key,
          travelTimes: p.edges[*].travel_time,
          travelTimeTotal: SUM(p.edges[*].travel_time)
      }"""

queryResult = db.AQLQuery(shortest_path_query, rawResults=True)
for result in  queryResult:
    print("places: " +  str(result['places']))
    print("intermediate travel times: " +  str(result['travelTimes']))
    print("total travel time: " +  str(result['travelTimeTotal']))
    print()

In [None]:
# Alternative Shortest path query with more options
shortest_path_query = """FOR p IN OUTBOUND K_SHORTEST_PATHS'Cities/Aberdeen' TO 'Cities/London'
  GRAPH 'RailNetwork'
      OPTIONS {
      weightAttribute: "travel_time",
      defaultWeight: 100
      }
      LIMIT 3
      RETURN {
          places: p.vertices[*]._key,
          travelTimes: p.edges[*].travel_time,
          travelTimeTotal: SUM(p.edges[*].travel_time)
      }"""
queryResult = db.AQLQuery(shortest_path_query, rawResults=True)
for result in  queryResult:
    print("places: " +  str(result['places']))
    print("intermediate travel times: " +  str(result['travelTimes']))
    print("total travel time: " +  str(result['travelTimeTotal']))
    print()

Next, steps: To explore more of the Graph capabilities of ArangoDB visit the [Training Center](https://www.arangodb.com/arangodb-training-center/graphs/) including many example and the free Graph Course.

In [None]:
# Delete collections
db.dropAllCollections() 
db.reload()