In [5]:
from graphframes import *

In [4]:
# Create a Vertex DataFrame with unique ID column "id"
v = sqlContext.createDataFrame([
  ("a", "Alice", 34),
  ("b", "Bob", 36),
  ("c", "Charlie", 30),
], ["id", "name", "age"])

In [6]:
# Create an Edge DataFrame with "src" and "dst" columns
e = sqlContext.createDataFrame([
  ("a", "b", "friend"),
  ("b", "c", "follow"),
  ("c", "b", "follow"),
], ["src", "dst", "relationship"])
# Create a GraphFrame

In [7]:
g = GraphFrame(v, e)

# Query: Get in-degree of each vertex.
g.inDegrees.show()

# Query: Count the number of "follow" connections in the graph.
g.edges.filter("relationship = 'follow'").count()

# Run PageRank algorithm, and show results.
results = g.pageRank(resetProbability=0.01, maxIter=20)
results.vertices.select("id", "pagerank").show()

                                                                                

+---+--------+
| id|inDegree|
+---+--------+
|  b|       2|
|  c|       1|
+---+--------+

+---+------------------+
| id|          pagerank|
+---+------------------+
|  a|              0.01|
|  b|1.0905890109440908|
|  c|1.8994109890559092|
+---+------------------+



In [8]:
### Fetching a sample graph implicitly available in the library
from graphframes.examples import Graphs
g = Graphs(sqlContext).friends()

In [9]:
# displaying all the vertices
g.vertices.show()

+---+-------+---+
| id|   name|age|
+---+-------+---+
|  a|  Alice| 34|
|  b|    Bob| 36|
|  c|Charlie| 30|
|  d|  David| 29|
|  e| Esther| 32|
|  f|  Fanny| 36|
+---+-------+---+



In [10]:
g.edges.show()

+---+---+------------+
|src|dst|relationship|
+---+---+------------+
|  a|  b|      friend|
|  b|  c|      follow|
|  c|  b|      follow|
|  f|  c|      follow|
|  e|  f|      follow|
|  e|  d|      friend|
|  d|  a|      friend|
+---+---+------------+



In [16]:
# getting the entity with the minimum edge
g.vertices.groupby().min('age').show()

+--------+
|min(age)|
+--------+
|      29|
+--------+



In [15]:
### Rendering a graphframe using networkx and igraph

# import the modules
import networkx as nx
from igraph import *

# create your networkx graph object 
graph_list=[]
graph_list.append(['start','end',{'edge_label':'myLabel'}])
G=nx.MultiDiGraph()
G.add_edges_from(graph_list)

# then create the label lists
# you can either use the edge/vertices values as label or add custom labels as attributes

vertex_label=[str(e[0]) for e in g.edges.data()]
edge_label=[int(e[2]['edge_label']) for e in g.edges.data()]

ig = Graph.TupleList(g.edges, directed=True)
plot(ig,vertex_label=vertex_label,edge_label=edge_label)

AttributeError: 'DataFrame' object has no attribute 'data'

In [20]:
g.edges.columns

['src', 'dst', 'relationship']

In [21]:
dir(g.edges)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_collect_as_arrow',
 '_ipython_key_completions_',
 '_jcols',
 '_jdf',
 '_jmap',
 '_joinAsOf',
 '_jseq',
 '_lazy_rdd',
 '_repr_html_',
 '_sc',
 '_schema',
 '_session',
 '_sort_cols',
 '_sql_ctx',
 '_support_repr_html',
 'agg',
 'alias',
 'approxQuantile',
 'cache',
 'checkpoint',
 'coalesce',
 'colRegex',
 'collect',
 'columns',
 'corr',
 'count',
 'cov',
 'createGlobalTempView',
 'createOrReplaceGlobalTempView',
 'createOrReplaceTempView',
 'createTempView',
 'crossJoin',
 'crosstab',
 'cube',
 'describe',
 'distinct',
 'drop',
 'dropDuplicates',
 'dropDuplicatesWithinWatermark',
 'drop_du