In [5]:
from graphframes import *

In [4]:
# Create a Vertex DataFrame with unique ID column "id"
v = sqlContext.createDataFrame([
  ("a", "Alice", 34),
  ("b", "Bob", 36),
  ("c", "Charlie", 30),
], ["id", "name", "age"])

In [6]:
# Create an Edge DataFrame with "src" and "dst" columns
e = sqlContext.createDataFrame([
  ("a", "b", "friend"),
  ("b", "c", "follow"),
  ("c", "b", "follow"),
], ["src", "dst", "relationship"])
# Create a GraphFrame

In [7]:
g = GraphFrame(v, e)

# Query: Get in-degree of each vertex.
g.inDegrees.show()

# Query: Count the number of "follow" connections in the graph.
g.edges.filter("relationship = 'follow'").count()

# Run PageRank algorithm, and show results.
results = g.pageRank(resetProbability=0.01, maxIter=20)
results.vertices.select("id", "pagerank").show()

                                                                                

+---+--------+
| id|inDegree|
+---+--------+
|  b|       2|
|  c|       1|
+---+--------+

+---+------------------+
| id|          pagerank|
+---+------------------+
|  a|              0.01|
|  b|1.0905890109440908|
|  c|1.8994109890559092|
+---+------------------+



In [8]:
### Fetching a sample graph implicitly available in the library
from graphframes.examples import Graphs
g = Graphs(sqlContext).friends()

In [9]:
# displaying all the vertices
g.vertices.show()

+---+-------+---+
| id|   name|age|
+---+-------+---+
|  a|  Alice| 34|
|  b|    Bob| 36|
|  c|Charlie| 30|
|  d|  David| 29|
|  e| Esther| 32|
|  f|  Fanny| 36|
+---+-------+---+



In [10]:
g.edges.show()

+---+---+------------+
|src|dst|relationship|
+---+---+------------+
|  a|  b|      friend|
|  b|  c|      follow|
|  c|  b|      follow|
|  f|  c|      follow|
|  e|  f|      follow|
|  e|  d|      friend|
|  d|  a|      friend|
+---+---+------------+



In [16]:
# getting the entity with the minimum edge
g.vertices.groupby().min('age').show()

+--------+
|min(age)|
+--------+
|      29|
+--------+



In [20]:
# checking the columns in the edge list
g.edges.columns

['src', 'dst', 'relationship']

In [None]:
## Motif finding  - search for a structured pattern in a graph
# Uses a simple Domain-Specific Language (DSL) for expressesing strucuted queries
# eg: g.find("(a)-[e]->(b); (b)-[e2]->(a)") - will search for all edges that are bi-directionally connected.d

In [23]:
# searhing for pairs of vertices with edges in both direction between them
motifs = g.find("(a)-[e]->(b); (b)-[e2]->(a)")
motifs.show()

+----------------+--------------+----------------+--------------+
|               a|             e|               b|            e2|
+----------------+--------------+----------------+--------------+
|{c, Charlie, 30}|{c, b, follow}|    {b, Bob, 36}|{b, c, follow}|
|    {b, Bob, 36}|{b, c, follow}|{c, Charlie, 30}|{c, b, follow}|
+----------------+--------------+----------------+--------------+

