In [1]:
#Step 1: Install Dependencies
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://archive.apache.org/dist/spark/spark-3.3.0/spark-3.3.0-bin-hadoop3.tgz
!tar xf spark-3.3.0-bin-hadoop3.tgz
!pip install -q findspark

#Step 2: Add environment variables
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "spark-3.3.0-bin-hadoop3"
os.environ["HADOOP_HOME"] = os.environ["SPARK_HOME"]

#these are new enviromental variables
os.environ["PYSPARK_DRIVER_PYTHON"] = "jupyter"
os.environ["PYSPARK_DRIVER_PYTHON_OPTS"] = "notebook"
os.environ["PYSPARK_SUBMIT_ARGS"] = "--packages graphframes:graphframes:0.8.1-spark3.0-s_2.12 pyspark-shell"
#Step 3: Initialize Pyspark
import findspark
findspark.init()

In [None]:
#creating spark context
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").getOrCreate()
sc = spark.sparkContext
sc

In [14]:
vertices = spark.createDataFrame([
				    ('A', 350, "ARON"), 
                                  ('B', 360, "BILL"),
                                  ('C', 195, "CLAIR"),
                                  ('D', 90, "DANIEL"),
                                  ('E', 90, "ERIC"),
                                  ('F', 215, "FRANK"),
                                  ('G', 30, "GRAHAM"),
                                  ('H', 25, "HENRY"),
                                  ('I', 25, "INNA"),
                                  ('J', 20, "JEN")
				   ],['id', 'value', 'name'])

In [15]:
edges = spark.createDataFrame([
				('A','B',60),
				('B','A',50),
				('A','C',50),
				('C','A',100),
				('A','D',90),
				('C','I',25),
        ('C','J',20),
        ('B','F',50),
        ('F','B',110),
        ('F','G',30),
        ('F','H',25),
        ('B','E',90)
			     ],['src', 'dst', 'wt'])

In [16]:
from graphframes import *
g = GraphFrame(vertices, edges)



In [17]:
g.degrees.show()



+---+------+
| id|degree|
+---+------+
|  B|     5|
|  D|     1|
|  C|     4|
|  A|     5|
|  I|     1|
|  F|     4|
|  E|     1|
|  J|     1|
|  G|     1|
|  H|     1|
+---+------+



In [18]:
g.inDegrees.show()

+---+--------+
| id|inDegree|
+---+--------+
|  B|       2|
|  D|       1|
|  C|       1|
|  A|       2|
|  I|       1|
|  F|       1|
|  E|       1|
|  J|       1|
|  G|       1|
|  H|       1|
+---+--------+



In [19]:
g.outDegrees.show()

+---+---------+
| id|outDegree|
+---+---------+
|  B|        3|
|  C|        3|
|  A|        3|
|  F|        3|
+---+---------+



In [20]:
g.vertices.show()

+---+-----+------+
| id|value|  name|
+---+-----+------+
|  A|  350|  ARON|
|  B|  360|  BILL|
|  C|  195| CLAIR|
|  D|   90|DANIEL|
|  E|   90|  ERIC|
|  F|  215| FRANK|
|  G|   30|GRAHAM|
|  H|   25| HENRY|
|  I|   25|  INNA|
|  J|   20|   JEN|
+---+-----+------+



In [21]:
g.edges.show()

+---+---+---+
|src|dst| wt|
+---+---+---+
|  A|  B| 60|
|  B|  A| 50|
|  A|  C| 50|
|  C|  A|100|
|  A|  D| 90|
|  C|  I| 25|
|  C|  J| 20|
|  B|  F| 50|
|  F|  B|110|
|  F|  G| 30|
|  F|  H| 25|
|  B|  E| 90|
+---+---+---+



In [22]:
g.shortestPaths(landmarks=["A", "J"]).show()

+---+-----+------+----------------+
| id|value|  name|       distances|
+---+-----+------+----------------+
|  F|  215| FRANK|{A -> 2, J -> 4}|
|  E|   90|  ERIC|              {}|
|  B|  360|  BILL|{A -> 1, J -> 3}|
|  D|   90|DANIEL|              {}|
|  C|  195| CLAIR|{A -> 1, J -> 1}|
|  J|   20|   JEN|        {J -> 0}|
|  A|  350|  ARON|{A -> 0, J -> 2}|
|  G|   30|GRAHAM|              {}|
|  I|   25|  INNA|              {}|
|  H|   25| HENRY|              {}|
+---+-----+------+----------------+



In [23]:
results = g.shortestPaths(landmarks=["A", "J"])
results.select("name", "distances").show()

+------+----------------+
|  name|       distances|
+------+----------------+
| FRANK|{A -> 2, J -> 4}|
|  ERIC|              {}|
|  BILL|{A -> 1, J -> 3}|
|DANIEL|              {}|
| CLAIR|{A -> 1, J -> 1}|
|   JEN|        {J -> 0}|
|  ARON|{A -> 0, J -> 2}|
|GRAHAM|              {}|
|  INNA|              {}|
| HENRY|              {}|
+------+----------------+

