In [None]:
from pyspark.sql import SparkSession

# Create a Spark session
spark = SparkSession.builder \
    .appName("GraphFramesExample") \
    .config("spark.jars.packages", "graphframes:graphframes:0.8.4-spark3.5-s_2.12") \
    .getOrCreate()

In [None]:
# Import GraphFrame
from graphframes import GraphFrame

In [None]:
# Create a Vertex DataFrame with unique ID column "id"
v = spark.createDataFrame([
  ("a", "Alice", 34),
  ("b", "Bob", 36),
  ("c", "Charlie", 30),
], ["id", "name", "age"])

In [None]:

# Create an Edge DataFrame with "src" and "dst" columns
e = spark.createDataFrame([
  ("a", "b", "friend"),
  ("b", "c", "follow"),
  ("c", "b", "follow"),
], ["src", "dst", "relationship"])

In [None]:
# Create a GraphFrame
from graphframes import *
g = GraphFrame(v, e)

In [None]:
# Query: Get in-degree of each vertex.
g.inDegrees.show()

In [None]:

# Query: Count the number of "follow" connections in the graph.
g.edges.filter("relationship = 'follow'").count()


In [None]:

# Run PageRank algorithm, and show results.
results = g.pageRank(resetProbability=0.01, maxIter=20)
results.vertices.select("id", "pagerank").show()