# Use graphx to solve real world problems



In [None]:
import networkx as nx
import matplotlib.pyplot as plt
from pyspark.sql import SparkSession
import os
from pyspark.sql import Row
from graphframes import GraphFrame

In [None]:
def buildEdgesLabels(InGraph:GraphFrame):
    edgeLabels = {}
    # edgeList is a list of spark dataframe rows
    edgeList = InGraph.edges.collect()
    for edgeItem in edgeList:
        edgeLabels[(edgeItem["src"], edgeItem["dst"])]=edgeItem["type"]
    return edgeLabels

def drawGraph(InGraph:GraphFrame):
    nxGraph = nx.Graph()
    for edge in InGraph.edges.collect():
        nxGraph.add_edge(edge["src"], edge["dst"])
    
    # Define layout
    pos = nx.spring_layout(nxGraph, seed=42)
    
    # Draw nodes and edges
    nx.draw(nxGraph, pos, with_labels=True, node_size=500, node_color='lightblue', font_size=10, font_color='black')
    nx.draw_networkx_edge_labels(nxGraph, pos, edge_labels=buildEdgesLabels(InGraph))
    
    plt.show()

In [None]:

local=True
if local:
    spark = SparkSession.builder\
        .master("local[4]")\
        .appName("GraphX")\
        .config("spark.executor.memory", "4g")\
        .config('spark.jars.packages','graphframes:graphframes:0.8.2-spark3.2-s_2.12') \
        .getOrCreate()
else:
    spark = SparkSession.builder\
        .master("k8s://https://kubernetes.default.svc:443")\
        .appName("GraphX")\
        .config("spark.kubernetes.container.image", "inseefrlab/jupyter-datascience:master")\
        .config("spark.kubernetes.authenticate.driver.serviceAccountName", os.environ['KUBERNETES_SERVICE_ACCOUNT'])\
        .config('spark.jars.packages','graphframes:graphframes:0.8.2-spark3.2-s_2.12') \
        .config("spark.executor.instances", "4")\
        .config("spark.executor.memory","2g")\
        .config("spark.kubernetes.namespace", os.environ['KUBERNETES_NAMESPACE'])\
        .getOrCreate()

# make the large dataframe show pretty
spark.conf.set("spark.sql.repl.eagerEval.enabled",True)