In [1]:
#Step 1: Install Dependencies
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://archive.apache.org/dist/spark/spark-3.3.0/spark-3.3.0-bin-hadoop3.tgz
!tar xf spark-3.3.0-bin-hadoop3.tgz
!pip install -q findspark

#Step 2: Add environment variables
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "spark-3.3.0-bin-hadoop3"
os.environ["HADOOP_HOME"] = os.environ["SPARK_HOME"]

os.environ["PYSPARK_DRIVER_PYTHON"] = "jupyter"
os.environ["PYSPARK_DRIVER_PYTHON_OPTS"] = "notebook"
os.environ["PYSPARK_SUBMIT_ARGS"] = "--packages graphframes:graphframes:0.8.1-spark3.0-s_2.12 pyspark-shell"
#Step 3: Initialize Pyspark
import findspark
findspark.init()

In [2]:
#creating spark context
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").getOrCreate()
sc = spark.sparkContext
sc

Vertices and Edges

In [47]:
vertices = spark.createDataFrame([
				    ('A','ARON', 350), 
                                  ('B','BILL',360),
                                  ('C','CLAIR',195),
                                  ('D','DANIEL',90),
                                  ('E','ERIC',90),
                                  ('F','FRANK',215),
                                  ('G','GRAHAM',30),
                                  ('H','HENRY',25),
                                  ('I','INNA',25),
                                  ('J','JEN',20)
				   ],['id','name','total_seconds'])

In [48]:
edges = spark.createDataFrame([
				('A','B',60),
				('B','A',50),
				('A','C',50),
				('C','A',100),
				('A','D',90),
				('C','I',25),
				('C','J',20),
				('B','F',50),
				('F','B',110),
				('F','G',30),
				('F','H',25),
				('B','E',90)
			     ],['src', 'dst', 'relationship'])


1. Creating a Graph G using the provided edges and vertices

In [50]:
from graphframes import *
g = GraphFrame(vertices, edges)

  "DataFrame.sql_ctx is an internal property, and will be removed "


2. Vertices and Edges

In [51]:
g.vertices.show()
g.edges.show()

+---+------+-------------+
| id|  name|total_seconds|
+---+------+-------------+
|  A|  ARON|          350|
|  B|  BILL|          360|
|  C| CLAIR|          195|
|  D|DANIEL|           90|
|  E|  ERIC|           90|
|  F| FRANK|          215|
|  G|GRAHAM|           30|
|  H| HENRY|           25|
|  I|  INNA|           25|
|  J|   JEN|           20|
+---+------+-------------+

+---+---+------------+
|src|dst|relationship|
+---+---+------------+
|  A|  B|          60|
|  B|  A|          50|
|  A|  C|          50|
|  C|  A|         100|
|  A|  D|          90|
|  C|  I|          25|
|  C|  J|          20|
|  B|  F|          50|
|  F|  B|         110|
|  F|  G|          30|
|  F|  H|          25|
|  B|  E|          90|
+---+---+------------+



3. In-degrees and Out-degrees


In [29]:
g.inDegrees.show()
g.outDegrees.show()


+---+--------+
| id|inDegree|
+---+--------+
|  B|       2|
|  D|       1|
|  C|       1|
|  A|       2|
|  I|       1|
|  F|       1|
|  E|       1|
|  J|       1|
|  G|       1|
|  H|       1|
+---+--------+

+---+---------+
| id|outDegree|
+---+---------+
|  B|        3|
|  C|        3|
|  A|        3|
|  F|        3|
+---+---------+



4. Shortest Distance between ARON and JEN 

In [39]:
g.shortestPaths(landmarks=["A", "J"]).show()
results.select("id", "distances").show()

+---+------+-------------+----------------+
| id|  name|total_seconds|       distances|
+---+------+-------------+----------------+
|  F| FRANK|          215|{A -> 2, J -> 4}|
|  E|  ERIC|           90|              {}|
|  B|  BILL|          360|{A -> 1, J -> 3}|
|  D|DANIEL|           90|              {}|
|  C| CLAIR|          195|{A -> 1, J -> 1}|
|  J|   JEN|           20|        {J -> 0}|
|  A|  ARON|          350|{A -> 0, J -> 2}|
|  G|GRAHAM|           30|              {}|
|  I|  INNA|           25|              {}|
|  H| HENRY|           25|              {}|
+---+------+-------------+----------------+

+---+----------------+
| id|       distances|
+---+----------------+
|  F|{D -> 3, A -> 2}|
|  E|              {}|
|  B|{D -> 2, A -> 1}|
|  D|        {D -> 0}|
|  C|{D -> 2, A -> 1}|
|  J|              {}|
|  A|{D -> 1, A -> 0}|
|  G|              {}|
|  I|              {}|
|  H|              {}|
+---+----------------+

